ILIAS  trunk Revision v11.0_alpha-3011-gc6b235a2e85
class.ilBiblTexFileReader.php
Go to the documentation of this file.
1<?php
2
24{
25 protected static array $ignored_keywords = ['Preamble'];
26
30 public function parseContent(): array
31 {
33 $this->normalizeContent();
34
35 // get entries
36 $subject = $this->getFileContent();
37 $objects = preg_split("/\\@([\\w]*)/uix", $subject, -1, PREG_SPLIT_DELIM_CAPTURE
38 | PREG_SPLIT_NO_EMPTY);
39
40 if (in_array($objects[0], self::$ignored_keywords)) {
41 $objects = array_splice($objects, 2);
42 }
43 // some files lead to a empty first entry in the array with the fist bib-entry, we have to trow them away...
44 if (strlen((string) $objects[0]) <= 3) {
45 $objects = array_splice($objects, 1);
46 }
47
48 $entries = [];
49 foreach ($objects as $key => $object) {
50 if ((int) $key % 2 == 0 || (int) $key == 0) {
51 $entry = [];
52 $entry['entryType'] = strtolower((string) $object);
53 } else {
54 // Citation
55 preg_match("/^{(?<cite>.*),\\n/um", (string) $object, $cite_matches);
56 if ($cite_matches['cite'] ?? false) {
57 $entry['cite'] = $cite_matches['cite'];
58 }
59
60 // Edit at regex101.com: (?<attr>[\w]*)\s*=\s*[{"]*(?<content>(.*?))\s*[}"]*?\s*[,]*?\s*\n
61 $re = "/(?<attr>[\\w]*)\\s*=\\s*[{\"]*(?<content>(.*?))\\s*[}\"]*?\\s*[,]*?\\s*\\n/";
62
63 preg_match_all($re, (string) $object, $matches, PREG_SET_ORDER);
64
65 foreach ($matches as $match) {
66 $clean = $match['content'];
67 $clean = preg_replace("/[\", \\t\\s]*\\n/u", "\n", $clean);
68
69 $entry[strtolower($match['attr'])] = $clean;
70 }
71 // this looks strange, since $entry is only declared every second loop. this is because BibTex first delivers a line for type, in the next line the content (see lines 34.36)
72 $entries[] = $entry ?? [];
73 }
74 }
75
76 return $entries;
77 }
78
82 protected function normalizeContent(): void
83 {
84 $result = $this->removeBomUtf8($this->getFileContent());
85 // remove emty newlines
86 $result = preg_replace("/^\n/um", "", $result);
87 // Remove lines with only whitespaces
88 $result = preg_replace("/^[\\s]*$/um", "\n", (string) $result);
89 $result = preg_replace("/\\n\\n\\n/um", "\n\n", (string) $result);
90
91 // remove comments
92 $result = preg_replace("/^%.*\\n/um", "", (string) $result);
93
94 // Intend attributes with a tab
95 $result = preg_replace("/^[ ]+/um", "\t", (string) $result);
96 $result = preg_replace("/^([\\w])/um", "\t$1", (string) $result);
97
98 // replace newline-braktes with brakets
99 $result = preg_replace('/\\n}/uimx', '}', (string) $result);
100
101 // move last bracket on newline
102 $result = preg_replace("/}[\\s]*$/um", "\n}", (string) $result);
103
104 // Support long lines (not working at the moment)
105 // $re = "/(\"[^\"\\n]*)\\r?\\n(?!(([^\"]*\"){2})*[^\"]*$)/";
106 // $subst = "$1";
107 // $result = preg_replace($re, $subst, $result);
108
109 $this->setFileContent($result);
110 }
111
113 protected function convertBibSpecialChars(): void
114 {
115 $bibtex_special_chars['ä'] = '{\"a}';
116 $bibtex_special_chars['ë'] = '{\"e}';
117 $bibtex_special_chars['ï'] = '{\"i}';
118 $bibtex_special_chars['ö'] = '{\"o}';
119 $bibtex_special_chars['ü'] = '{\"u}';
120 $bibtex_special_chars['Ä'] = '{\"A}';
121 $bibtex_special_chars['Ë'] = '{\"E}';
122 $bibtex_special_chars['Ï'] = '{\"I}';
123 $bibtex_special_chars['Ö'] = '{\"O}';
124 $bibtex_special_chars['Ü'] = '{\"U}';
125 $bibtex_special_chars['â'] = '{\^a}';
126 $bibtex_special_chars['ê'] = '{\^e}';
127 $bibtex_special_chars['î'] = '{\^i}';
128 $bibtex_special_chars['ô'] = '{\^o}';
129 $bibtex_special_chars['û'] = '{\^u}';
130 $bibtex_special_chars['Â'] = '{\^A}';
131 $bibtex_special_chars['Ê'] = '{\^E}';
132 $bibtex_special_chars['Î'] = '{\^I}';
133 $bibtex_special_chars['Ô'] = '{\^O}';
134 $bibtex_special_chars['Û'] = '{\^U}';
135 $bibtex_special_chars['à'] = '{\`a}';
136 $bibtex_special_chars['è'] = '{\`e}';
137 $bibtex_special_chars['ì'] = '{\`i}';
138 $bibtex_special_chars['ò'] = '{\`o}';
139 $bibtex_special_chars['ù'] = '{\`u}';
140 $bibtex_special_chars['À'] = '{\`A}';
141 $bibtex_special_chars['È'] = '{\`E}';
142 $bibtex_special_chars['Ì'] = '{\`I}';
143 $bibtex_special_chars['Ò'] = '{\`O}';
144 $bibtex_special_chars['Ù'] = '{\`U}';
145 $bibtex_special_chars['á'] = '{\\\'a}';
146 $bibtex_special_chars['é'] = '{\\\'e}';
147 $bibtex_special_chars['í'] = '{\\\'i}';
148 $bibtex_special_chars['ó'] = '{\\\'o}';
149 $bibtex_special_chars['ú'] = '{\\\'u}';
150 $bibtex_special_chars['Á'] = '{\\\'A}';
151 $bibtex_special_chars['É'] = '{\\\'E}';
152 $bibtex_special_chars['Í'] = '{\\\'I}';
153 $bibtex_special_chars['Ó'] = '{\\\'O}';
154 $bibtex_special_chars['Ú'] = '{\\\'U}';
155 $bibtex_special_chars['à'] = '{\`a}';
156 $bibtex_special_chars['è'] = '{\`e}';
157 $bibtex_special_chars['ì'] = '{\`i}';
158 $bibtex_special_chars['ò'] = '{\`o}';
159 $bibtex_special_chars['ù'] = '{\`u}';
160 $bibtex_special_chars['À'] = '{\`A}';
161 $bibtex_special_chars['È'] = '{\`E}';
162 $bibtex_special_chars['Ì'] = '{\`I}';
163 $bibtex_special_chars['Ò'] = '{\`O}';
164 $bibtex_special_chars['Ù'] = '{\`U}';
165 $bibtex_special_chars['ç'] = '{\c c}';
166 $bibtex_special_chars['ß'] = '{\ss}';
167 $bibtex_special_chars['ñ'] = '{\~n}';
168 $bibtex_special_chars['Ñ'] = '{\~N}';
169 $bibtex_special_chars['ń'] = "{\\'n}";
170 $bibtex_special_chars['l'] = "{\\'n}";
171 $bibtex_special_chars['&'] = "{\&}";
172 $bibtex_special_chars['@'] = "{\@}";
173
174 $this->setFileContent(str_replace(array_values($bibtex_special_chars), array_keys($bibtex_special_chars), $this->getFileContent()));
175 }
176
177 protected function removeBomUtf8(string $s): string
178 {
179 if (substr($s, 0, 3) === chr(hexdec('EF')) . chr(hexdec('BB')) . chr(hexdec('BF'))) {
180 return substr($s, 3);
181 }
182 return $s;
183 }
184}
Class ilBiblFileReaderBase.
setFileContent(string $file_content)
This file is part of ILIAS, a powerful learning management system published by ILIAS open source e-Le...
convertBibSpecialChars()
@noinspection PhpArrayIndexImmediatelyRewrittenInspection
Interface ilBiblFileReaderInterface.
PREG_SPLIT_NO_EMPTY PREG_SPLIT_DELIM_CAPTURE