ILIAS  Release_5_0_x_branch Revision 61816
 All Data Structures Namespaces Files Functions Variables Groups Pages
RISReader.php
Go to the documentation of this file.
1 <?php
11 namespace LibRIS;
12 
60 class RISReader {
61 
62  const RIS_EOL = "\r\n";
63  const LINE_REGEX = '/^(([A-Z1-9]{2})\s+-(.*))|(.*)$/';
64  protected $data = NULL;
65 
66 
67  public function __construct($options = array()) {
68  }
69 
70 
86  public function parseFile($filename, $context = NULL) {
87  if (! is_file($filename)) {
88  throw new ParseException(sprintf('File %s not found.', htmlentities($filename)));
89  }
90  $flags = FILE_SKIP_EMPTY_LINES | FILE_TEXT;
91  $contents = file($filename, $flags, $context);
92  $this->parseArray($contents);
93  }
94 
95 
110  public function parseString($string) {
111  $contents = explode(RISReader::RIS_EOL, $string);
112  $this->parseArray($contents);
113  }
114 
115 
119  protected function parseArray($lines) {
120  $recordset = array();
121  // Do any cleaning and normalizing.
122  $this->cleanData($lines);
123  $record = array();
124  $lastTag = NULL;
125  foreach ($lines as $line) {
126  $line = trim($line);
127  $matches = array();
128  preg_match(self::LINE_REGEX, $line, $matches);
129  if (! empty($matches[3])) {
130  $lastTag = $matches[2];
131  $record[$matches[2]][] = trim($matches[3]);
132  } // End record and prep a new one.
133  elseif (! empty($matches[2]) && $matches[2] == 'ER') {
134  $lastTag = NULL;
135  $recordset[] = $record;
136  $record = array();
137  } elseif (! empty($matches[4])) {
138  // Append to the last one.
139  // We skip leading info (like BOMs).
140  if (! empty($lastTag)) {
141  $lastEntry = count($record[$lastTag]) - 1;
142  // We trim because some encoders add tabs or multiple spaces.
143  // Standard is silent on how this should be handled.
144  $record[$lastTag][$lastEntry] .= ' ' . trim($matches[4]);
145  }
146  }
147  }
148  if (! empty($record)) {
149  $recordset[] = $record;
150  }
151  $this->data = $recordset;
152  }
153 
154 
155  public function getRecords() {
156  return $this->data;
157  }
158 
159 
160  public function printRecords() {
161  $format = "%s:\n\t%s\n";
162  foreach ($this->data as $record) {
163  foreach ($record as $key => $values) {
164  foreach ($values as $value) {
165  printf($format, RISTags::describeTag($key), $value);
166  }
167  }
168  print PHP_EOL;
169  }
170  }
171 
172 
179  protected function cleanData(&$lines) {
180  if (empty($lines)) {
181  return;
182  }
183  // Currently, we only need to strip a BOM if it exists.
184  // Thanks to Derik Badman (http://madinkbeard.com/) for finding the
185  // bug and suggesting this fix:
186  // http://blog.philipp-michels.de/?p=32
187  $first = $lines[0];
188  if (substr($first, 0, 3) == pack('CCC', 0xef, 0xbb, 0xbf)) {
189  $lines[0] = substr($first, 3);
190  }
191  }
192 }