ILIAS  release_5-4 Revision v5.4.26-12-gabc799a52e6
class.ilBiblTexFileReader.php
Go to the documentation of this file.
1 <?php
2 
9 {
10 
14  protected static $ignored_keywords = array( 'Preamble' );
15 
16 
20  public function parseContent()
21  {
22  $this->convertBibSpecialChars();
23  $this->normalizeContent();
24 
25  // get entries
26  $subject = $this->getFileContent();
27  $objects = preg_split("/\\@([\\w]*)/uix", $subject, null, PREG_SPLIT_DELIM_CAPTURE
28  | PREG_SPLIT_NO_EMPTY);
29 
30  if (in_array($objects[0], self::$ignored_keywords)) {
31  $objects = array_splice($objects, 2);
32  }
33  // some files lead to a empty first entry in the array with the fist bib-entry, we have to trow them away...
34  if (strlen($objects[0]) <= 3) {
35  $objects = array_splice($objects, 1);
36  }
37 
38  $entries = array();
39  foreach ($objects as $key => $object) {
40  if ((int) $key % 2 == 0 || (int) $key == 0) {
41  $entry = array();
42  $entry['entryType'] = strtolower($object);
43  } else {
44  // Citation
45  preg_match("/^{(?<cite>.*),\\n/um", $object, $cite_matches);
46  if ($cite_matches['cite']) {
47  $entry['cite'] = $cite_matches['cite'];
48  }
49 
50  // Edit at regex101.com: (?<attr>[\w]*)\s*=\s*[{"]*(?<content>(.*?))\s*[}"]*?\s*[,]*?\s*\n
51  $re = "/(?<attr>[\\w]*)\\s*=\\s*[{\"]*(?<content>(.*?))\\s*[}\"]*?\\s*[,]*?\\s*\\n/";
52 
53  preg_match_all($re, $object, $matches, PREG_SET_ORDER);
54 
55  foreach ($matches as $match) {
56  $clean = $match['content'];
57  $clean = preg_replace("/[\", \\t\\s]*\\n/u", "\n", $clean);
58 
59  $entry[strtolower($match['attr'])] = $clean;
60  }
61 
62  $entries[] = $entry;
63  }
64  }
65 
66  return $entries;
67  }
68 
69 
73  protected function normalizeContent()
74  {
75  $result = $this->removeBomUtf8($this->getFileContent());
76  // remove emty newlines
77  $result = preg_replace("/^\n/um", "", $result);
78  // Remove lines with only whitespaces
79  $result = preg_replace("/^[\\s]*$/um", "\n", $result);
80  $result = preg_replace("/\\n\\n\\n/um", "\n\n", $result);
81 
82  // remove comments
83  $result = preg_replace("/^%.*\\n/um", "", $result);
84 
85  // Intend attributes with a tab
86  $result = preg_replace("/^[ ]+/um", "\t", $result);
87  $result = preg_replace("/^([\\w])/um", "\t$1", $result);
88 
89  // replace newline-braktes with brakets
90  $result = preg_replace('/\\n}/uimx', '}', $result);
91 
92  // move last bracket on newline
93  $result = preg_replace("/}[\\s]*$/um", "\n}", $result);
94 
95  // Support long lines (not working at the moment)
96  // $re = "/(\"[^\"\\n]*)\\r?\\n(?!(([^\"]*\"){2})*[^\"]*$)/";
97  // $subst = "$1";
98  // $result = preg_replace($re, $subst, $result);
99 
100  $this->setFileContent($result);
101  }
102 
103 
104  protected function convertBibSpecialChars()
105  {
106  $bibtex_special_chars['ä'] = '{\"a}';
107  $bibtex_special_chars['ë'] = '{\"e}';
108  $bibtex_special_chars['ï'] = '{\"i}';
109  $bibtex_special_chars['ö'] = '{\"o}';
110  $bibtex_special_chars['ü'] = '{\"u}';
111  $bibtex_special_chars['Ä'] = '{\"A}';
112  $bibtex_special_chars['Ë'] = '{\"E}';
113  $bibtex_special_chars['Ï'] = '{\"I}';
114  $bibtex_special_chars['Ö'] = '{\"O}';
115  $bibtex_special_chars['Ü'] = '{\"U}';
116  $bibtex_special_chars['â'] = '{\^a}';
117  $bibtex_special_chars['ê'] = '{\^e}';
118  $bibtex_special_chars['î'] = '{\^i}';
119  $bibtex_special_chars['ô'] = '{\^o}';
120  $bibtex_special_chars['û'] = '{\^u}';
121  $bibtex_special_chars['Â'] = '{\^A}';
122  $bibtex_special_chars['Ê'] = '{\^E}';
123  $bibtex_special_chars['Î'] = '{\^I}';
124  $bibtex_special_chars['Ô'] = '{\^O}';
125  $bibtex_special_chars['Û'] = '{\^U}';
126  $bibtex_special_chars['à'] = '{\`a}';
127  $bibtex_special_chars['è'] = '{\`e}';
128  $bibtex_special_chars['ì'] = '{\`i}';
129  $bibtex_special_chars['ò'] = '{\`o}';
130  $bibtex_special_chars['ù'] = '{\`u}';
131  $bibtex_special_chars['À'] = '{\`A}';
132  $bibtex_special_chars['È'] = '{\`E}';
133  $bibtex_special_chars['Ì'] = '{\`I}';
134  $bibtex_special_chars['Ò'] = '{\`O}';
135  $bibtex_special_chars['Ù'] = '{\`U}';
136  $bibtex_special_chars['á'] = '{\\\'a}';
137  $bibtex_special_chars['é'] = '{\\\'e}';
138  $bibtex_special_chars['í'] = '{\\\'i}';
139  $bibtex_special_chars['ó'] = '{\\\'o}';
140  $bibtex_special_chars['ú'] = '{\\\'u}';
141  $bibtex_special_chars['Á'] = '{\\\'A}';
142  $bibtex_special_chars['É'] = '{\\\'E}';
143  $bibtex_special_chars['Í'] = '{\\\'I}';
144  $bibtex_special_chars['Ó'] = '{\\\'O}';
145  $bibtex_special_chars['Ú'] = '{\\\'U}';
146  $bibtex_special_chars['à'] = '{\`a}';
147  $bibtex_special_chars['è'] = '{\`e}';
148  $bibtex_special_chars['ì'] = '{\`i}';
149  $bibtex_special_chars['ò'] = '{\`o}';
150  $bibtex_special_chars['ù'] = '{\`u}';
151  $bibtex_special_chars['À'] = '{\`A}';
152  $bibtex_special_chars['È'] = '{\`E}';
153  $bibtex_special_chars['Ì'] = '{\`I}';
154  $bibtex_special_chars['Ò'] = '{\`O}';
155  $bibtex_special_chars['Ù'] = '{\`U}';
156  $bibtex_special_chars['ç'] = '{\c c}';
157  $bibtex_special_chars['ß'] = '{\ss}';
158  $bibtex_special_chars['ñ'] = '{\~n}';
159  $bibtex_special_chars['Ñ'] = '{\~N}';
160  $bibtex_special_chars['ń'] = "{\\'n}";
161  $bibtex_special_chars['l'] = "{\\'n}";
162  $bibtex_special_chars['&'] = "{\&}";
163  $bibtex_special_chars['@'] = "{\@}";
164 
165  $this->setFileContent(str_replace(array_values($bibtex_special_chars), array_keys($bibtex_special_chars), $this->getFileContent()));
166  }
167 
168 
174  protected function removeBomUtf8($s)
175  {
176  if (substr($s, 0, 3) == chr(hexdec('EF')) . chr(hexdec('BB')) . chr(hexdec('BF'))) {
177  return substr($s, 3);
178  } else {
179  return $s;
180  }
181  }
182 }
Interface ilBiblFileReaderInterface.
$result
$s
Definition: pwgen.php:45
Class ilBiblRisFileReader.
Class ilBiblFileReaderBase.
parseContent()
REFACTOR Implementierungen mit Objekten statt mit Arrays array
$key
Definition: croninfo.php:18