ILIAS  release_5-4 Revision v5.4.26-12-gabc799a52e6
ilBiblTexFileReader Class Reference

Class ilBiblRisFileReader. More...

+ Inheritance diagram for ilBiblTexFileReader:
+ Collaboration diagram for ilBiblTexFileReader:

Public Member Functions

 parseContent ()
 
Deprecated:
REFACTOR Implementierungen mit Objekten statt mit Arrays
Returns
array
More...
 
- Public Member Functions inherited from ilBiblFileReaderBase
 __construct (ilBiblEntryFactoryInterface $entry_factory, ilBiblFieldFactoryInterface $field_factory, ilBiblAttributeFactoryInterface $attribute_factory)
 ilBiblFileReaderBase constructor. More...
 
 getFileContent ()
 
 setFileContent ($file_content)
 
 getPathToFile ()
 
 setPathToFile ($path_to_file)
 
 getEntryFactory ()
 
Returns
ilBiblEntryFactoryInterface
More...
 
 getFieldFactory ()
 
Returns
ilBiblFieldFactoryInterface
More...
 
 getAttributeFactory ()
 
- Public Member Functions inherited from ilBiblFileReaderInterface
 readContent ($path_to_file)
 

Protected Member Functions

 normalizeContent ()
 
 convertBibSpecialChars ()
 
 removeBomUtf8 ($s)
 
- Protected Member Functions inherited from ilBiblFileReaderBase
 convertStringToUTF8 ($string)
 

Static Protected Attributes

static $ignored_keywords = array( 'Preamble' )
 

Additional Inherited Members

- Data Fields inherited from ilBiblFileReaderBase
const ATTRIBUTE_VALUE_MAXIMAL_TEXT_LENGTH = 4000
 
const ENCODING_UTF_8 = 'UTF-8'
 
const ENCODING_ASCII = 'ASCII'
 
const ENCODING_ISO_8859_1 = 'ISO-8859-1'
 
- Protected Attributes inherited from ilBiblFileReaderBase
 $file_content = ''
 
 $path_to_file = ''
 
 $entry_factory
 
 $field_factory
 
 $attribute_factory
 

Detailed Description

Member Function Documentation

◆ convertBibSpecialChars()

ilBiblTexFileReader::convertBibSpecialChars ( )
protected

Definition at line 104 of file class.ilBiblTexFileReader.php.

References ilBiblFileReaderBase\getFileContent(), and ilBiblFileReaderBase\setFileContent().

Referenced by parseContent().

105  {
106  $bibtex_special_chars['ä'] = '{\"a}';
107  $bibtex_special_chars['ë'] = '{\"e}';
108  $bibtex_special_chars['ï'] = '{\"i}';
109  $bibtex_special_chars['ö'] = '{\"o}';
110  $bibtex_special_chars['ü'] = '{\"u}';
111  $bibtex_special_chars['Ä'] = '{\"A}';
112  $bibtex_special_chars['Ë'] = '{\"E}';
113  $bibtex_special_chars['Ï'] = '{\"I}';
114  $bibtex_special_chars['Ö'] = '{\"O}';
115  $bibtex_special_chars['Ü'] = '{\"U}';
116  $bibtex_special_chars['â'] = '{\^a}';
117  $bibtex_special_chars['ê'] = '{\^e}';
118  $bibtex_special_chars['î'] = '{\^i}';
119  $bibtex_special_chars['ô'] = '{\^o}';
120  $bibtex_special_chars['û'] = '{\^u}';
121  $bibtex_special_chars['Â'] = '{\^A}';
122  $bibtex_special_chars['Ê'] = '{\^E}';
123  $bibtex_special_chars['Î'] = '{\^I}';
124  $bibtex_special_chars['Ô'] = '{\^O}';
125  $bibtex_special_chars['Û'] = '{\^U}';
126  $bibtex_special_chars['à'] = '{\`a}';
127  $bibtex_special_chars['è'] = '{\`e}';
128  $bibtex_special_chars['ì'] = '{\`i}';
129  $bibtex_special_chars['ò'] = '{\`o}';
130  $bibtex_special_chars['ù'] = '{\`u}';
131  $bibtex_special_chars['À'] = '{\`A}';
132  $bibtex_special_chars['È'] = '{\`E}';
133  $bibtex_special_chars['Ì'] = '{\`I}';
134  $bibtex_special_chars['Ò'] = '{\`O}';
135  $bibtex_special_chars['Ù'] = '{\`U}';
136  $bibtex_special_chars['á'] = '{\\\'a}';
137  $bibtex_special_chars['é'] = '{\\\'e}';
138  $bibtex_special_chars['í'] = '{\\\'i}';
139  $bibtex_special_chars['ó'] = '{\\\'o}';
140  $bibtex_special_chars['ú'] = '{\\\'u}';
141  $bibtex_special_chars['Á'] = '{\\\'A}';
142  $bibtex_special_chars['É'] = '{\\\'E}';
143  $bibtex_special_chars['Í'] = '{\\\'I}';
144  $bibtex_special_chars['Ó'] = '{\\\'O}';
145  $bibtex_special_chars['Ú'] = '{\\\'U}';
146  $bibtex_special_chars['à'] = '{\`a}';
147  $bibtex_special_chars['è'] = '{\`e}';
148  $bibtex_special_chars['ì'] = '{\`i}';
149  $bibtex_special_chars['ò'] = '{\`o}';
150  $bibtex_special_chars['ù'] = '{\`u}';
151  $bibtex_special_chars['À'] = '{\`A}';
152  $bibtex_special_chars['È'] = '{\`E}';
153  $bibtex_special_chars['Ì'] = '{\`I}';
154  $bibtex_special_chars['Ò'] = '{\`O}';
155  $bibtex_special_chars['Ù'] = '{\`U}';
156  $bibtex_special_chars['ç'] = '{\c c}';
157  $bibtex_special_chars['ß'] = '{\ss}';
158  $bibtex_special_chars['ñ'] = '{\~n}';
159  $bibtex_special_chars['Ñ'] = '{\~N}';
160  $bibtex_special_chars['ń'] = "{\\'n}";
161  $bibtex_special_chars['l'] = "{\\'n}";
162  $bibtex_special_chars['&'] = "{\&}";
163  $bibtex_special_chars['@'] = "{\@}";
164 
165  $this->setFileContent(str_replace(array_values($bibtex_special_chars), array_keys($bibtex_special_chars), $this->getFileContent()));
166  }
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ normalizeContent()

ilBiblTexFileReader::normalizeContent ( )
protected

Definition at line 73 of file class.ilBiblTexFileReader.php.

References $result, ilBiblFileReaderBase\getFileContent(), removeBomUtf8(), and ilBiblFileReaderBase\setFileContent().

Referenced by parseContent().

74  {
75  $result = $this->removeBomUtf8($this->getFileContent());
76  // remove emty newlines
77  $result = preg_replace("/^\n/um", "", $result);
78  // Remove lines with only whitespaces
79  $result = preg_replace("/^[\\s]*$/um", "\n", $result);
80  $result = preg_replace("/\\n\\n\\n/um", "\n\n", $result);
81 
82  // remove comments
83  $result = preg_replace("/^%.*\\n/um", "", $result);
84 
85  // Intend attributes with a tab
86  $result = preg_replace("/^[ ]+/um", "\t", $result);
87  $result = preg_replace("/^([\\w])/um", "\t$1", $result);
88 
89  // replace newline-braktes with brakets
90  $result = preg_replace('/\\n}/uimx', '}', $result);
91 
92  // move last bracket on newline
93  $result = preg_replace("/}[\\s]*$/um", "\n}", $result);
94 
95  // Support long lines (not working at the moment)
96  // $re = "/(\"[^\"\\n]*)\\r?\\n(?!(([^\"]*\"){2})*[^\"]*$)/";
97  // $subst = "$1";
98  // $result = preg_replace($re, $subst, $result);
99 
100  $this->setFileContent($result);
101  }
$result
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ parseContent()

ilBiblTexFileReader::parseContent ( )

Deprecated:
REFACTOR Implementierungen mit Objekten statt mit Arrays
Returns
array

Implements ilBiblFileReaderInterface.

Definition at line 20 of file class.ilBiblTexFileReader.php.

References $key, convertBibSpecialChars(), ilBiblFileReaderBase\getFileContent(), and normalizeContent().

21  {
22  $this->convertBibSpecialChars();
23  $this->normalizeContent();
24 
25  // get entries
26  $subject = $this->getFileContent();
27  $objects = preg_split("/\\@([\\w]*)/uix", $subject, null, PREG_SPLIT_DELIM_CAPTURE
28  | PREG_SPLIT_NO_EMPTY);
29 
30  if (in_array($objects[0], self::$ignored_keywords)) {
31  $objects = array_splice($objects, 2);
32  }
33  // some files lead to a empty first entry in the array with the fist bib-entry, we have to trow them away...
34  if (strlen($objects[0]) <= 3) {
35  $objects = array_splice($objects, 1);
36  }
37 
38  $entries = array();
39  foreach ($objects as $key => $object) {
40  if ((int) $key % 2 == 0 || (int) $key == 0) {
41  $entry = array();
42  $entry['entryType'] = strtolower($object);
43  } else {
44  // Citation
45  preg_match("/^{(?<cite>.*),\\n/um", $object, $cite_matches);
46  if ($cite_matches['cite']) {
47  $entry['cite'] = $cite_matches['cite'];
48  }
49 
50  // Edit at regex101.com: (?<attr>[\w]*)\s*=\s*[{"]*(?<content>(.*?))\s*[}"]*?\s*[,]*?\s*\n
51  $re = "/(?<attr>[\\w]*)\\s*=\\s*[{\"]*(?<content>(.*?))\\s*[}\"]*?\\s*[,]*?\\s*\\n/";
52 
53  preg_match_all($re, $object, $matches, PREG_SET_ORDER);
54 
55  foreach ($matches as $match) {
56  $clean = $match['content'];
57  $clean = preg_replace("/[\", \\t\\s]*\\n/u", "\n", $clean);
58 
59  $entry[strtolower($match['attr'])] = $clean;
60  }
61 
62  $entries[] = $entry;
63  }
64  }
65 
66  return $entries;
67  }
$key
Definition: croninfo.php:18
+ Here is the call graph for this function:

◆ removeBomUtf8()

ilBiblTexFileReader::removeBomUtf8 (   $s)
protected
Parameters
$s
Returns
bool|string

Definition at line 174 of file class.ilBiblTexFileReader.php.

References $s.

Referenced by normalizeContent().

175  {
176  if (substr($s, 0, 3) == chr(hexdec('EF')) . chr(hexdec('BB')) . chr(hexdec('BF'))) {
177  return substr($s, 3);
178  } else {
179  return $s;
180  }
181  }
$s
Definition: pwgen.php:45
+ Here is the caller graph for this function:

Field Documentation

◆ $ignored_keywords

ilBiblTexFileReader::$ignored_keywords = array( 'Preamble' )
staticprotected

Definition at line 14 of file class.ilBiblTexFileReader.php.


The documentation for this class was generated from the following file: