ILIAS  release_5-4 Revision v5.4.26-12-gabc799a52e6
class.ilBiblTexFileReader.php
Go to the documentation of this file.
1<?php
2
9{
10
14 protected static $ignored_keywords = array( 'Preamble' );
15
16
20 public function parseContent()
21 {
23 $this->normalizeContent();
24
25 // get entries
26 $subject = $this->getFileContent();
27 $objects = preg_split("/\\@([\\w]*)/uix", $subject, null, PREG_SPLIT_DELIM_CAPTURE
28 | PREG_SPLIT_NO_EMPTY);
29
30 if (in_array($objects[0], self::$ignored_keywords)) {
31 $objects = array_splice($objects, 2);
32 }
33 // some files lead to a empty first entry in the array with the fist bib-entry, we have to trow them away...
34 if (strlen($objects[0]) <= 3) {
35 $objects = array_splice($objects, 1);
36 }
37
38 $entries = array();
39 foreach ($objects as $key => $object) {
40 if ((int) $key % 2 == 0 || (int) $key == 0) {
41 $entry = array();
42 $entry['entryType'] = strtolower($object);
43 } else {
44 // Citation
45 preg_match("/^{(?<cite>.*),\\n/um", $object, $cite_matches);
46 if ($cite_matches['cite']) {
47 $entry['cite'] = $cite_matches['cite'];
48 }
49
50 // Edit at regex101.com: (?<attr>[\w]*)\s*=\s*[{"]*(?<content>(.*?))\s*[}"]*?\s*[,]*?\s*\n
51 $re = "/(?<attr>[\\w]*)\\s*=\\s*[{\"]*(?<content>(.*?))\\s*[}\"]*?\\s*[,]*?\\s*\\n/";
52
53 preg_match_all($re, $object, $matches, PREG_SET_ORDER);
54
55 foreach ($matches as $match) {
56 $clean = $match['content'];
57 $clean = preg_replace("/[\", \\t\\s]*\\n/u", "\n", $clean);
58
59 $entry[strtolower($match['attr'])] = $clean;
60 }
61
62 $entries[] = $entry;
63 }
64 }
65
66 return $entries;
67 }
68
69
73 protected function normalizeContent()
74 {
75 $result = $this->removeBomUtf8($this->getFileContent());
76 // remove emty newlines
77 $result = preg_replace("/^\n/um", "", $result);
78 // Remove lines with only whitespaces
79 $result = preg_replace("/^[\\s]*$/um", "\n", $result);
80 $result = preg_replace("/\\n\\n\\n/um", "\n\n", $result);
81
82 // remove comments
83 $result = preg_replace("/^%.*\\n/um", "", $result);
84
85 // Intend attributes with a tab
86 $result = preg_replace("/^[ ]+/um", "\t", $result);
87 $result = preg_replace("/^([\\w])/um", "\t$1", $result);
88
89 // replace newline-braktes with brakets
90 $result = preg_replace('/\\n}/uimx', '}', $result);
91
92 // move last bracket on newline
93 $result = preg_replace("/}[\\s]*$/um", "\n}", $result);
94
95 // Support long lines (not working at the moment)
96 // $re = "/(\"[^\"\\n]*)\\r?\\n(?!(([^\"]*\"){2})*[^\"]*$)/";
97 // $subst = "$1";
98 // $result = preg_replace($re, $subst, $result);
99
100 $this->setFileContent($result);
101 }
102
103
104 protected function convertBibSpecialChars()
105 {
106 $bibtex_special_chars['ä'] = '{\"a}';
107 $bibtex_special_chars['ë'] = '{\"e}';
108 $bibtex_special_chars['ï'] = '{\"i}';
109 $bibtex_special_chars['ö'] = '{\"o}';
110 $bibtex_special_chars['ü'] = '{\"u}';
111 $bibtex_special_chars['Ä'] = '{\"A}';
112 $bibtex_special_chars['Ë'] = '{\"E}';
113 $bibtex_special_chars['Ï'] = '{\"I}';
114 $bibtex_special_chars['Ö'] = '{\"O}';
115 $bibtex_special_chars['Ü'] = '{\"U}';
116 $bibtex_special_chars['â'] = '{\^a}';
117 $bibtex_special_chars['ê'] = '{\^e}';
118 $bibtex_special_chars['î'] = '{\^i}';
119 $bibtex_special_chars['ô'] = '{\^o}';
120 $bibtex_special_chars['û'] = '{\^u}';
121 $bibtex_special_chars['Â'] = '{\^A}';
122 $bibtex_special_chars['Ê'] = '{\^E}';
123 $bibtex_special_chars['Î'] = '{\^I}';
124 $bibtex_special_chars['Ô'] = '{\^O}';
125 $bibtex_special_chars['Û'] = '{\^U}';
126 $bibtex_special_chars['à'] = '{\`a}';
127 $bibtex_special_chars['è'] = '{\`e}';
128 $bibtex_special_chars['ì'] = '{\`i}';
129 $bibtex_special_chars['ò'] = '{\`o}';
130 $bibtex_special_chars['ù'] = '{\`u}';
131 $bibtex_special_chars['À'] = '{\`A}';
132 $bibtex_special_chars['È'] = '{\`E}';
133 $bibtex_special_chars['Ì'] = '{\`I}';
134 $bibtex_special_chars['Ò'] = '{\`O}';
135 $bibtex_special_chars['Ù'] = '{\`U}';
136 $bibtex_special_chars['á'] = '{\\\'a}';
137 $bibtex_special_chars['é'] = '{\\\'e}';
138 $bibtex_special_chars['í'] = '{\\\'i}';
139 $bibtex_special_chars['ó'] = '{\\\'o}';
140 $bibtex_special_chars['ú'] = '{\\\'u}';
141 $bibtex_special_chars['Á'] = '{\\\'A}';
142 $bibtex_special_chars['É'] = '{\\\'E}';
143 $bibtex_special_chars['Í'] = '{\\\'I}';
144 $bibtex_special_chars['Ó'] = '{\\\'O}';
145 $bibtex_special_chars['Ú'] = '{\\\'U}';
146 $bibtex_special_chars['à'] = '{\`a}';
147 $bibtex_special_chars['è'] = '{\`e}';
148 $bibtex_special_chars['ì'] = '{\`i}';
149 $bibtex_special_chars['ò'] = '{\`o}';
150 $bibtex_special_chars['ù'] = '{\`u}';
151 $bibtex_special_chars['À'] = '{\`A}';
152 $bibtex_special_chars['È'] = '{\`E}';
153 $bibtex_special_chars['Ì'] = '{\`I}';
154 $bibtex_special_chars['Ò'] = '{\`O}';
155 $bibtex_special_chars['Ù'] = '{\`U}';
156 $bibtex_special_chars['ç'] = '{\c c}';
157 $bibtex_special_chars['ß'] = '{\ss}';
158 $bibtex_special_chars['ñ'] = '{\~n}';
159 $bibtex_special_chars['Ñ'] = '{\~N}';
160 $bibtex_special_chars['ń'] = "{\\'n}";
161 $bibtex_special_chars['l'] = "{\\'n}";
162 $bibtex_special_chars['&'] = "{\&}";
163 $bibtex_special_chars['@'] = "{\@}";
164
165 $this->setFileContent(str_replace(array_values($bibtex_special_chars), array_keys($bibtex_special_chars), $this->getFileContent()));
166 }
167
168
174 protected function removeBomUtf8($s)
175 {
176 if (substr($s, 0, 3) == chr(hexdec('EF')) . chr(hexdec('BB')) . chr(hexdec('BF'))) {
177 return substr($s, 3);
178 } else {
179 return $s;
180 }
181 }
182}
$result
An exception for terminatinating execution or to throw for unit testing.
Class ilBiblFileReaderBase.
Class ilBiblRisFileReader.
parseContent()
REFACTOR Implementierungen mit Objekten statt mit Arrays array
$key
Definition: croninfo.php:18
Interface ilBiblFileReaderInterface.
$s
Definition: pwgen.php:45
echo;exit;}function LogoutNotification($SessionID){ global $ilDB;$q="SELECT session_id, data FROM usr_session WHERE expires > (\w+)\|/" PREG_SPLIT_NO_EMPTY PREG_SPLIT_DELIM_CAPTURE