ILIAS  release_5-3 Revision v5.3.23-19-g915713cf615
class.ilBibTex.php
Go to the documentation of this file.
1 <?php
2 
10 {
11 
15  protected static $ignored_keywords = array( 'Preamble' );
16 
17 
37  public function parseContent()
38  {
39  $this->convertBibSpecialChars();
40  $this->normalizeContent();
41 
42  // get entries
43  $subject = $this->getFileContent();
44  $objects = preg_split("/\\@([\\w]*)/uix", $subject, null, PREG_SPLIT_DELIM_CAPTURE
45  | PREG_SPLIT_NO_EMPTY);
46 
47  if (in_array($objects[0], self::$ignored_keywords)) {
48  $objects = array_splice($objects, 2);
49  }
50  // some files lead to a empty first entry in the array with the fist bib-entry, we have to trow them away...
51  if (strlen($objects[0]) <= 3) {
52  $objects = array_splice($objects, 1);
53  }
54 
55  $entries = array();
56  foreach ($objects as $key => $object) {
57  if ((int) $key % 2 == 0 || (int) $key == 0) {
58  $entry = array();
59  $entry['entryType'] = strtolower($object);
60  } else {
61  // Citation
62  preg_match("/^{(?<cite>.*),\\n/um", $object, $cite_matches);
63  if ($cite_matches['cite']) {
64  $entry['cite'] = $cite_matches['cite'];
65  }
66 
67  // Edit at regex101.com: (?<attr>[\w]*)\s*=\s*[{"]*(?<content>(.*?))\s*[}"]*?\s*[,]*?\s*\n
68  $re = "/(?<attr>[\\w]*)\\s*=\\s*[{\"]*(?<content>(.*?))\\s*[}\"]*?\\s*[,]*?\\s*\\n/";
69 
70  preg_match_all($re, $object, $matches, PREG_SET_ORDER);
71 
72  foreach ($matches as $match) {
73  $clean = $match['content'];
74  $clean = preg_replace("/[\", \\t\\s]*\\n/u", "\n", $clean);
75 
76  $entry[strtolower($match['attr'])] = $clean;
77  }
78 
79  $entries[] = $entry;
80  }
81  }
82 
83  return $entries;
84  }
85 
86 
87  protected function normalizeContent()
88  {
89  $result = $this->removeBomUtf8($this->getFileContent());
90  // remove emty newlines
91  $result = preg_replace("/^\n/um", "", $result);
92  // Remove lines with only whitespaces
93  $result = preg_replace("/^[\\s]*$/um", "\n", $result);
94  $result = preg_replace("/\\n\\n\\n/um", "\n\n", $result);
95 
96  // remove comments
97  $result = preg_replace("/^%.*\\n/um", "", $result);
98 
99  // Intend attributes with a tab
100  $result = preg_replace("/^[ ]+/um", "\t", $result);
101  $result = preg_replace("/^([\\w])/um", "\t$1", $result);
102 
103  // replace newline-braktes with brakets
104  $result = preg_replace('/\\n}/uimx', '}', $result);
105 
106  // move last bracket on newline
107  $result = preg_replace("/}[\\s]*$/um", "\n}", $result);
108 
109  // Support long lines (not working at the moment)
110  // $re = "/(\"[^\"\\n]*)\\r?\\n(?!(([^\"]*\"){2})*[^\"]*$)/";
111  // $subst = "$1";
112  // $result = preg_replace($re, $subst, $result);
113 
114  $this->setFileContent($result);
115  }
116 
117 
118  protected function convertBibSpecialChars()
119  {
120  $bibtex_special_chars['ä'] = '{\"a}';
121  $bibtex_special_chars['ë'] = '{\"e}';
122  $bibtex_special_chars['ï'] = '{\"i}';
123  $bibtex_special_chars['ö'] = '{\"o}';
124  $bibtex_special_chars['ü'] = '{\"u}';
125  $bibtex_special_chars['Ä'] = '{\"A}';
126  $bibtex_special_chars['Ë'] = '{\"E}';
127  $bibtex_special_chars['Ï'] = '{\"I}';
128  $bibtex_special_chars['Ö'] = '{\"O}';
129  $bibtex_special_chars['Ü'] = '{\"U}';
130  $bibtex_special_chars['â'] = '{\^a}';
131  $bibtex_special_chars['ê'] = '{\^e}';
132  $bibtex_special_chars['î'] = '{\^i}';
133  $bibtex_special_chars['ô'] = '{\^o}';
134  $bibtex_special_chars['û'] = '{\^u}';
135  $bibtex_special_chars['Â'] = '{\^A}';
136  $bibtex_special_chars['Ê'] = '{\^E}';
137  $bibtex_special_chars['Î'] = '{\^I}';
138  $bibtex_special_chars['Ô'] = '{\^O}';
139  $bibtex_special_chars['Û'] = '{\^U}';
140  $bibtex_special_chars['à'] = '{\`a}';
141  $bibtex_special_chars['è'] = '{\`e}';
142  $bibtex_special_chars['ì'] = '{\`i}';
143  $bibtex_special_chars['ò'] = '{\`o}';
144  $bibtex_special_chars['ù'] = '{\`u}';
145  $bibtex_special_chars['À'] = '{\`A}';
146  $bibtex_special_chars['È'] = '{\`E}';
147  $bibtex_special_chars['Ì'] = '{\`I}';
148  $bibtex_special_chars['Ò'] = '{\`O}';
149  $bibtex_special_chars['Ù'] = '{\`U}';
150  $bibtex_special_chars['á'] = '{\\\'a}';
151  $bibtex_special_chars['é'] = '{\\\'e}';
152  $bibtex_special_chars['í'] = '{\\\'i}';
153  $bibtex_special_chars['ó'] = '{\\\'o}';
154  $bibtex_special_chars['ú'] = '{\\\'u}';
155  $bibtex_special_chars['Á'] = '{\\\'A}';
156  $bibtex_special_chars['É'] = '{\\\'E}';
157  $bibtex_special_chars['Í'] = '{\\\'I}';
158  $bibtex_special_chars['Ó'] = '{\\\'O}';
159  $bibtex_special_chars['Ú'] = '{\\\'U}';
160  $bibtex_special_chars['à'] = '{\`a}';
161  $bibtex_special_chars['è'] = '{\`e}';
162  $bibtex_special_chars['ì'] = '{\`i}';
163  $bibtex_special_chars['ò'] = '{\`o}';
164  $bibtex_special_chars['ù'] = '{\`u}';
165  $bibtex_special_chars['À'] = '{\`A}';
166  $bibtex_special_chars['È'] = '{\`E}';
167  $bibtex_special_chars['Ì'] = '{\`I}';
168  $bibtex_special_chars['Ò'] = '{\`O}';
169  $bibtex_special_chars['Ù'] = '{\`U}';
170  $bibtex_special_chars['ç'] = '{\c c}';
171  $bibtex_special_chars['ß'] = '{\ss}';
172  $bibtex_special_chars['ñ'] = '{\~n}';
173  $bibtex_special_chars['Ñ'] = '{\~N}';
174  $bibtex_special_chars['ń'] = "{\\'n}";
175  $bibtex_special_chars['l'] = "{\\'n}";
176  $bibtex_special_chars['&'] = "{\&}";
177  $bibtex_special_chars['@'] = "{\@}";
178 
179  $this->setFileContent(str_replace(array_values($bibtex_special_chars), array_keys($bibtex_special_chars), $this->getFileContent()));
180  }
181 
182 
186  protected static $standard_fields = array(
187  'address',
188  'annote',
189  'author',
190  'booktitle',
191  'chapter',
192  'crossref',
193  'edition',
194  'editor',
195  'eprint',
196  'howpublished',
197  'institution',
198  'journal',
199  'key',
200  'month',
201  'note',
202  'number',
203  'organization',
204  'pages',
205  'publisher',
206  'school',
207  'series',
208  'title',
209  'type',
210  'url',
211  'volume',
212  'year',
213  );
217  protected static $entry_types = array(
218  'article',
219  'book',
220  'booklet',
221  'conference',
222  'inbook',
223  'incollection',
224  'inproceedings',
225  'manual',
226  'mastersthesis',
227  'misc',
228  'phdthesis',
229  'proceedings',
230  'techreport',
231  'unpublished',
232  );
233 
234 
240  public static function isStandardField($field_name)
241  {
242  return in_array($field_name, self::$standard_fields);
243  }
244 
245 
251  public static function isEntryType($entry_ype)
252  {
253  return in_array($entry_ype, self::$entry_types);
254  }
255 
256 
262  protected function removeBomUtf8($s)
263  {
264  if (substr($s, 0, 3) == chr(hexdec('EF')) . chr(hexdec('BB')) . chr(hexdec('BF'))) {
265  return substr($s, 3);
266  } else {
267  return $s;
268  }
269  }
270 }
$result
static isStandardField($field_name)
convertBibSpecialChars()
Class ilBibTex.
$s
Definition: pwgen.php:45
static $entry_types
Create styles array
The data for the language used.
parseContent()
should return
static $ignored_keywords
Interface ilBibliograficFileReader.
Class ilBibliograficFileReaderBase.
$key
Definition: croninfo.php:18
static isEntryType($entry_ype)
static $standard_fields