ILIAS  trunk Revision v11.0_alpha-1702-gfd3ecb7f852
All Data Structures Namespaces Files Functions Variables Enumerations Enumerator Modules Pages
class.ilBiblTexFileReader.php
Go to the documentation of this file.
1 <?php
2 
24 {
25  protected static array $ignored_keywords = ['Preamble'];
26 
30  public function parseContent(): array
31  {
32  $this->convertBibSpecialChars();
33  $this->normalizeContent();
34 
35  // get entries
36  $subject = $this->getFileContent();
37  $objects = preg_split("/\\@([\\w]*)/uix", $subject, -1, PREG_SPLIT_DELIM_CAPTURE
38  | PREG_SPLIT_NO_EMPTY);
39 
40  if (in_array($objects[0], self::$ignored_keywords)) {
41  $objects = array_splice($objects, 2);
42  }
43  // some files lead to a empty first entry in the array with the fist bib-entry, we have to trow them away...
44  if (strlen((string) $objects[0]) <= 3) {
45  $objects = array_splice($objects, 1);
46  }
47 
48  $entries = [];
49  foreach ($objects as $key => $object) {
50  if ((int) $key % 2 == 0 || (int) $key == 0) {
51  $entry = [];
52  $entry['entryType'] = strtolower((string) $object);
53  } else {
54  // Citation
55  preg_match("/^{(?<cite>.*),\\n/um", (string) $object, $cite_matches);
56  if ($cite_matches['cite'] ?? false) {
57  $entry['cite'] = $cite_matches['cite'];
58  }
59 
60  // Edit at regex101.com: (?<attr>[\w]*)\s*=\s*[{"]*(?<content>(.*?))\s*[}"]*?\s*[,]*?\s*\n
61  $re = "/(?<attr>[\\w]*)\\s*=\\s*[{\"]*(?<content>(.*?))\\s*[}\"]*?\\s*[,]*?\\s*\\n/";
62 
63  preg_match_all($re, (string) $object, $matches, PREG_SET_ORDER);
64 
65  foreach ($matches as $match) {
66  $clean = $match['content'];
67  $clean = preg_replace("/[\", \\t\\s]*\\n/u", "\n", $clean);
68 
69  $entry[strtolower($match['attr'])] = $clean;
70  }
71  // this looks strange, since $entry is only declared every second loop. this is because BibTex first delivers a line for type, in the next line the content (see lines 34.36)
72  $entries[] = $entry ?? [];
73  }
74  }
75 
76  return $entries;
77  }
78 
82  protected function normalizeContent(): void
83  {
84  $result = $this->removeBomUtf8($this->getFileContent());
85  // remove emty newlines
86  $result = preg_replace("/^\n/um", "", $result);
87  // Remove lines with only whitespaces
88  $result = preg_replace("/^[\\s]*$/um", "\n", (string) $result);
89  $result = preg_replace("/\\n\\n\\n/um", "\n\n", (string) $result);
90 
91  // remove comments
92  $result = preg_replace("/^%.*\\n/um", "", (string) $result);
93 
94  // Intend attributes with a tab
95  $result = preg_replace("/^[ ]+/um", "\t", (string) $result);
96  $result = preg_replace("/^([\\w])/um", "\t$1", (string) $result);
97 
98  // replace newline-braktes with brakets
99  $result = preg_replace('/\\n}/uimx', '}', (string) $result);
100 
101  // move last bracket on newline
102  $result = preg_replace("/}[\\s]*$/um", "\n}", (string) $result);
103 
104  // Support long lines (not working at the moment)
105  // $re = "/(\"[^\"\\n]*)\\r?\\n(?!(([^\"]*\"){2})*[^\"]*$)/";
106  // $subst = "$1";
107  // $result = preg_replace($re, $subst, $result);
108 
109  $this->setFileContent($result);
110  }
111 
113  protected function convertBibSpecialChars(): void
114  {
115  $bibtex_special_chars['ä'] = '{\"a}';
116  $bibtex_special_chars['ë'] = '{\"e}';
117  $bibtex_special_chars['ï'] = '{\"i}';
118  $bibtex_special_chars['ö'] = '{\"o}';
119  $bibtex_special_chars['ü'] = '{\"u}';
120  $bibtex_special_chars['Ä'] = '{\"A}';
121  $bibtex_special_chars['Ë'] = '{\"E}';
122  $bibtex_special_chars['Ï'] = '{\"I}';
123  $bibtex_special_chars['Ö'] = '{\"O}';
124  $bibtex_special_chars['Ü'] = '{\"U}';
125  $bibtex_special_chars['â'] = '{\^a}';
126  $bibtex_special_chars['ê'] = '{\^e}';
127  $bibtex_special_chars['î'] = '{\^i}';
128  $bibtex_special_chars['ô'] = '{\^o}';
129  $bibtex_special_chars['û'] = '{\^u}';
130  $bibtex_special_chars['Â'] = '{\^A}';
131  $bibtex_special_chars['Ê'] = '{\^E}';
132  $bibtex_special_chars['Î'] = '{\^I}';
133  $bibtex_special_chars['Ô'] = '{\^O}';
134  $bibtex_special_chars['Û'] = '{\^U}';
135  $bibtex_special_chars['à'] = '{\`a}';
136  $bibtex_special_chars['è'] = '{\`e}';
137  $bibtex_special_chars['ì'] = '{\`i}';
138  $bibtex_special_chars['ò'] = '{\`o}';
139  $bibtex_special_chars['ù'] = '{\`u}';
140  $bibtex_special_chars['À'] = '{\`A}';
141  $bibtex_special_chars['È'] = '{\`E}';
142  $bibtex_special_chars['Ì'] = '{\`I}';
143  $bibtex_special_chars['Ò'] = '{\`O}';
144  $bibtex_special_chars['Ù'] = '{\`U}';
145  $bibtex_special_chars['á'] = '{\\\'a}';
146  $bibtex_special_chars['é'] = '{\\\'e}';
147  $bibtex_special_chars['í'] = '{\\\'i}';
148  $bibtex_special_chars['ó'] = '{\\\'o}';
149  $bibtex_special_chars['ú'] = '{\\\'u}';
150  $bibtex_special_chars['Á'] = '{\\\'A}';
151  $bibtex_special_chars['É'] = '{\\\'E}';
152  $bibtex_special_chars['Í'] = '{\\\'I}';
153  $bibtex_special_chars['Ó'] = '{\\\'O}';
154  $bibtex_special_chars['Ú'] = '{\\\'U}';
155  $bibtex_special_chars['à'] = '{\`a}';
156  $bibtex_special_chars['è'] = '{\`e}';
157  $bibtex_special_chars['ì'] = '{\`i}';
158  $bibtex_special_chars['ò'] = '{\`o}';
159  $bibtex_special_chars['ù'] = '{\`u}';
160  $bibtex_special_chars['À'] = '{\`A}';
161  $bibtex_special_chars['È'] = '{\`E}';
162  $bibtex_special_chars['Ì'] = '{\`I}';
163  $bibtex_special_chars['Ò'] = '{\`O}';
164  $bibtex_special_chars['Ù'] = '{\`U}';
165  $bibtex_special_chars['ç'] = '{\c c}';
166  $bibtex_special_chars['ß'] = '{\ss}';
167  $bibtex_special_chars['ñ'] = '{\~n}';
168  $bibtex_special_chars['Ñ'] = '{\~N}';
169  $bibtex_special_chars['ń'] = "{\\'n}";
170  $bibtex_special_chars['l'] = "{\\'n}";
171  $bibtex_special_chars['&'] = "{\&}";
172  $bibtex_special_chars['@'] = "{\@}";
173 
174  $this->setFileContent(str_replace(array_values($bibtex_special_chars), array_keys($bibtex_special_chars), $this->getFileContent()));
175  }
176 
177  protected function removeBomUtf8(string $s): string
178  {
179  if (substr($s, 0, 3) === chr(hexdec('EF')) . chr(hexdec('BB')) . chr(hexdec('BF'))) {
180  return substr($s, 3);
181  }
182  return $s;
183  }
184 }
Interface ilBiblFileReaderInterface.
This file is part of ILIAS, a powerful learning management system published by ILIAS open source e-Le...
Class ilBiblFileReaderBase.
setFileContent(string $file_content)
convertBibSpecialChars()
PhpArrayIndexImmediatelyRewrittenInspection