ILIAS  Release_4_4_x_branch Revision 61816
 All Data Structures Namespaces Files Functions Variables Groups Pages
RISReader.php
Go to the documentation of this file.
1 <?php
12 namespace LibRIS;
13 
60 class RISReader {
61 
62  const RIS_EOL = "\r\n";
63  const LINE_REGEX = '/^(([A-Z1-9]{2})\s+-(.*))|(.*)$/';
64 
65  protected $data = NULL;
66 
67  public function __construct($options = array()) {
68 
69  }
70 
85  public function parseFile($filename, $context = NULL) {
86  if (!is_file($filename)) {
87  throw new ParseException(sprintf('File %s not found.', htmlentities($filename)));
88  }
89  $flags = FILE_SKIP_EMPTY_LINES | FILE_TEXT;
90  $contents = file($filename, $flags, $context);
91 
92  $this->parseArray($contents);
93  }
94 
108  public function parseString($string) {
109  $contents = explode (RISReader::RIS_EOL, $string);
110  $this->parseArray($contents);
111  }
112 
116  protected function parseArray($lines) {
117  $recordset = array();
118 
119  // Do any cleaning and normalizing.
120  $this->cleanData($lines);
121 
122  $record = array();
123  $lastTag = NULL;
124  foreach ($lines as $line) {
125  $line = trim($line);
126  $matches = array();
127 
128  preg_match(self::LINE_REGEX, $line, $matches);
129  if (!empty($matches[3])) {
130  $lastTag = $matches[2];
131  $record[$matches[2]][] = trim($matches[3]);
132  }
133  // End record and prep a new one.
134  elseif (!empty($matches[2]) && $matches[2] == 'ER') {
135  $lastTag = NULL;
136  $recordset[] = $record;
137  $record = array();
138  }
139  elseif (!empty($matches[4])) {
140  // Append to the last one.
141  // We skip leading info (like BOMs).
142  if (!empty($lastTag)) {
143  $lastEntry = count($record[$lastTag]) - 1;
144  // We trim because some encoders add tabs or multiple spaces.
145  // Standard is silent on how this should be handled.
146  $record[$lastTag][$lastEntry] .= ' ' . trim($matches[4]);
147  }
148  }
149  }
150  if (!empty($record)) $recordset[] = $record;
151 
152  $this->data = $recordset;
153  }
154 
155  public function getRecords() {
156  return $this->data;
157  }
158 
159  public function printRecords() {
160  $format = "%s:\n\t%s\n";
161  foreach ($this->data as $record) {
162  foreach ($record as $key => $values) {
163  foreach ($values as $value) {
164  printf($format, RISTags::describeTag($key), $value);
165  }
166  }
167 
168  print PHP_EOL;
169  }
170  }
171 
178  protected function cleanData(&$lines) {
179 
180  if (empty($lines)) return;
181 
182  // Currently, we only need to strip a BOM if it exists.
183  // Thanks to Derik Badman (http://madinkbeard.com/) for finding the
184  // bug and suggesting this fix:
185  // http://blog.philipp-michels.de/?p=32
186  $first = $lines[0];
187  if (substr($first, 0, 3) == pack('CCC', 0xef, 0xbb, 0xbf)) {
188  $lines[0] = substr($first, 3);
189  }
190  }
191 
192 }