ILIAS  release_5-2 Revision v5.2.25-18-g3f80b828510
class.ilBibTex.php
Go to the documentation of this file.
1 <?php
2 require_once('./Modules/Bibliographic/classes/Types/class.ilBibliograficFileReaderBase.php');
3 
11 
15  protected static $ignored_keywords = array( 'Preamble' );
16 
17 
37  public function parseContent() {
38  $this->convertBibSpecialChars();
39  $this->normalizeContent();
40 
41  // get entries
42 
43  $objects = preg_split("/\\@([\\w]*)/uix", $this->getFileContent(), null, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
44 
45  if (in_array($objects[0], self::$ignored_keywords)) {
46  $objects = array_splice($objects, 2);
47  }
48  // some files lead to a empty first entry in the array with the fist bib-entry, we have to trow them away...
49  if (strlen($objects[0]) <= 3) {
50  $objects = array_splice($objects, 1);
51  }
52 
53  $entries = array();
54  foreach ($objects as $key => $object) {
55  if ((int)$key % 2 == 0 || (int)$key == 0) {
56  $entry = array();
57  $entry['entryType'] = strtolower($object);
58  } else {
59  // Citation
60  preg_match("/^{(?<cite>.*),\\n/um", $object, $cite_matches);
61  if ($cite_matches['cite']) {
62  $entry['cite'] = $cite_matches['cite'];
63  }
64 
65  // Edit at regex101.com: (?<attr>[\w]*)\s*=\s*[{"]*(?<content>(.*?))\s*[}"]*?\s*[,]*?\s*\n
66  $re = "/(?<attr>[\\w]*)\\s*=\\s*[{\"]*(?<content>(.*?))\\s*[}\"]*?\\s*[,]*?\\s*\\n/";
67 
68  preg_match_all($re, $object, $matches, PREG_SET_ORDER);
69 
70  foreach ($matches as $match) {
71  $clean = $match['content'];
72  $clean = preg_replace("/[\", \\t\\s]*\\n/u", "\n", $clean);
73 
74  $entry[strtolower($match['attr'])] = $clean;
75  }
76 
77  $entries[] = $entry;
78  }
79  }
80 
81  return $entries;
82  }
83 
84 
85  protected function normalizeContent() {
86  $result = $this->removeBomUtf8($this->getFileContent());
87  // remove emty newlines
88  $result = preg_replace("/^\n/um", "", $result);
89  // Remove lines with only whitespaces
90  $result = preg_replace("/^[\\s]*$/um", "\n", $result);
91  $result = preg_replace("/\\n\\n\\n/um", "\n\n", $result);
92 
93  // remove comments
94  $result = preg_replace("/^%.*\\n/um", "", $result);
95 
96  // Intend attributes with a tab
97  $result = preg_replace("/^[ ]+/um", "\t", $result);
98  $result = preg_replace("/^([\\w])/um", "\t$1", $result);
99 
100  // replace newline-braktes with brakets
101  $result = preg_replace('/\\n}/uimx', '}', $result);
102 
103  // move last bracket on newline
104  $result = preg_replace("/}[\\s]*$/um", "\n}", $result);
105 
106  // Support long lines (not working at the moment)
107  // $re = "/(\"[^\"\\n]*)\\r?\\n(?!(([^\"]*\"){2})*[^\"]*$)/";
108  // $subst = "$1";
109  // $result = preg_replace($re, $subst, $result);
110 
111  $this->setFileContent($result);
112  }
113 
114 
115  protected function convertBibSpecialChars() {
116  $bibtex_special_chars['ä'] = '{\"a}';
117  $bibtex_special_chars['ë'] = '{\"e}';
118  $bibtex_special_chars['ï'] = '{\"i}';
119  $bibtex_special_chars['ö'] = '{\"o}';
120  $bibtex_special_chars['ü'] = '{\"u}';
121  $bibtex_special_chars['Ä'] = '{\"A}';
122  $bibtex_special_chars['Ë'] = '{\"E}';
123  $bibtex_special_chars['Ï'] = '{\"I}';
124  $bibtex_special_chars['Ö'] = '{\"O}';
125  $bibtex_special_chars['Ü'] = '{\"U}';
126  $bibtex_special_chars['â'] = '{\^a}';
127  $bibtex_special_chars['ê'] = '{\^e}';
128  $bibtex_special_chars['î'] = '{\^i}';
129  $bibtex_special_chars['ô'] = '{\^o}';
130  $bibtex_special_chars['û'] = '{\^u}';
131  $bibtex_special_chars['Â'] = '{\^A}';
132  $bibtex_special_chars['Ê'] = '{\^E}';
133  $bibtex_special_chars['Î'] = '{\^I}';
134  $bibtex_special_chars['Ô'] = '{\^O}';
135  $bibtex_special_chars['Û'] = '{\^U}';
136  $bibtex_special_chars['à'] = '{\`a}';
137  $bibtex_special_chars['è'] = '{\`e}';
138  $bibtex_special_chars['ì'] = '{\`i}';
139  $bibtex_special_chars['ò'] = '{\`o}';
140  $bibtex_special_chars['ù'] = '{\`u}';
141  $bibtex_special_chars['À'] = '{\`A}';
142  $bibtex_special_chars['È'] = '{\`E}';
143  $bibtex_special_chars['Ì'] = '{\`I}';
144  $bibtex_special_chars['Ò'] = '{\`O}';
145  $bibtex_special_chars['Ù'] = '{\`U}';
146  $bibtex_special_chars['á'] = '{\\\'a}';
147  $bibtex_special_chars['é'] = '{\\\'e}';
148  $bibtex_special_chars['í'] = '{\\\'i}';
149  $bibtex_special_chars['ó'] = '{\\\'o}';
150  $bibtex_special_chars['ú'] = '{\\\'u}';
151  $bibtex_special_chars['Á'] = '{\\\'A}';
152  $bibtex_special_chars['É'] = '{\\\'E}';
153  $bibtex_special_chars['Í'] = '{\\\'I}';
154  $bibtex_special_chars['Ó'] = '{\\\'O}';
155  $bibtex_special_chars['Ú'] = '{\\\'U}';
156  $bibtex_special_chars['à'] = '{\`a}';
157  $bibtex_special_chars['è'] = '{\`e}';
158  $bibtex_special_chars['ì'] = '{\`i}';
159  $bibtex_special_chars['ò'] = '{\`o}';
160  $bibtex_special_chars['ù'] = '{\`u}';
161  $bibtex_special_chars['À'] = '{\`A}';
162  $bibtex_special_chars['È'] = '{\`E}';
163  $bibtex_special_chars['Ì'] = '{\`I}';
164  $bibtex_special_chars['Ò'] = '{\`O}';
165  $bibtex_special_chars['Ù'] = '{\`U}';
166  $bibtex_special_chars['ç'] = '{\c c}';
167  $bibtex_special_chars['ß'] = '{\ss}';
168  $bibtex_special_chars['ñ'] = '{\~n}';
169  $bibtex_special_chars['Ñ'] = '{\~N}';
170  $bibtex_special_chars['ń'] = "{\\'n}";
171  $bibtex_special_chars['l'] = "{\\'n}";
172  $bibtex_special_chars['&'] = "{\&}";
173  $bibtex_special_chars['@'] = "{\@}";
174 
175  $this->setFileContent(str_replace(array_values($bibtex_special_chars), array_keys($bibtex_special_chars), $this->getFileContent()));
176  }
177 
178 
182  protected static $standard_fields = array(
183  'address',
184  'annote',
185  'author',
186  'booktitle',
187  'chapter',
188  'crossref',
189  'edition',
190  'editor',
191  'eprint',
192  'howpublished',
193  'institution',
194  'journal',
195  'key',
196  'month',
197  'note',
198  'number',
199  'organization',
200  'pages',
201  'publisher',
202  'school',
203  'series',
204  'title',
205  'type',
206  'url',
207  'volume',
208  'year',
209  );
213  protected static $entry_types = array(
214  'article',
215  'book',
216  'booklet',
217  'conference',
218  'inbook',
219  'incollection',
220  'inproceedings',
221  'manual',
222  'mastersthesis',
223  'misc',
224  'phdthesis',
225  'proceedings',
226  'techreport',
227  'unpublished',
228  );
229 
230 
236  public static function isStandardField($field_name) {
237  return in_array($field_name, self::$standard_fields);
238  }
239 
240 
246  public static function isEntryType($entry_ype) {
247  return in_array($entry_ype, self::$entry_types);
248  }
249 
250 
256  protected function removeBomUtf8($s) {
257  if (substr($s, 0, 3) == chr(hexdec('EF')) . chr(hexdec('BB')) . chr(hexdec('BF'))) {
258  return substr($s, 3);
259  } else {
260  return $s;
261  }
262  }
263 }
$result
static isStandardField($field_name)
convertBibSpecialChars()
Class ilBibTex.
static $entry_types
Create styles array
The data for the language used.
parseContent()
should return
static $ignored_keywords
Interface ilBibliograficFileReader.
Class ilBibliograficFileReaderBase.
static isEntryType($entry_ype)
static $standard_fields