ILIAS  release_5-4 Revision v5.4.26-12-gabc799a52e6
Csv.php
Go to the documentation of this file.
1 <?php
2 
4 
10 
11 class Csv extends BaseReader
12 {
13  const DEFAULT_FALLBACK_ENCODING = 'CP1252';
14  const GUESS_ENCODING = 'guess';
15  const UTF8_BOM = "\xEF\xBB\xBF";
16  const UTF8_BOM_LEN = 3;
17  const UTF16BE_BOM = "\xfe\xff";
18  const UTF16BE_BOM_LEN = 2;
19  const UTF16BE_LF = "\x00\x0a";
20  const UTF16LE_BOM = "\xff\xfe";
21  const UTF16LE_BOM_LEN = 2;
22  const UTF16LE_LF = "\x0a\x00";
23  const UTF32BE_BOM = "\x00\x00\xfe\xff";
24  const UTF32BE_BOM_LEN = 4;
25  const UTF32BE_LF = "\x00\x00\x00\x0a";
26  const UTF32LE_BOM = "\xff\xfe\x00\x00";
27  const UTF32LE_BOM_LEN = 4;
28  const UTF32LE_LF = "\x0a\x00\x00\x00";
29 
35  private $inputEncoding = 'UTF-8';
36 
42  private $fallbackEncoding = self::DEFAULT_FALLBACK_ENCODING;
43 
49  private $delimiter;
50 
56  private $enclosure = '"';
57 
63  private $sheetIndex = 0;
64 
70  private $contiguous = false;
71 
77  private $escapeCharacter = '\\';
78 
84  private static $constructorCallback;
85 
89  public function __construct()
90  {
91  parent::__construct();
92  $callback = self::$constructorCallback;
93  if ($callback !== null) {
94  $callback($this);
95  }
96  }
97 
104  public static function setConstructorCallback(?callable $callback): void
105  {
106  self::$constructorCallback = $callback;
107  }
108 
109  public static function getConstructorCallback(): ?callable
110  {
111  return self::$constructorCallback;
112  }
113 
114  public function setInputEncoding(string $pValue): self
115  {
116  $this->inputEncoding = $pValue;
117 
118  return $this;
119  }
120 
121  public function getInputEncoding(): string
122  {
123  return $this->inputEncoding;
124  }
125 
126  public function setFallbackEncoding(string $pValue): self
127  {
128  $this->fallbackEncoding = $pValue;
129 
130  return $this;
131  }
132 
133  public function getFallbackEncoding(): string
134  {
136  }
137 
141  protected function skipBOM(): void
142  {
143  rewind($this->fileHandle);
144 
145  if (fgets($this->fileHandle, self::UTF8_BOM_LEN + 1) !== self::UTF8_BOM) {
146  rewind($this->fileHandle);
147  }
148  }
149 
153  protected function checkSeparator(): void
154  {
155  $line = fgets($this->fileHandle);
156  if ($line === false) {
157  return;
158  }
159 
160  if ((strlen(trim($line, "\r\n")) == 5) && (stripos($line, 'sep=') === 0)) {
161  $this->delimiter = substr($line, 4, 1);
162 
163  return;
164  }
165 
166  $this->skipBOM();
167  }
168 
172  protected function inferSeparator(): void
173  {
174  if ($this->delimiter !== null) {
175  return;
176  }
177 
178  $inferenceEngine = new Delimiter($this->fileHandle, $this->escapeCharacter, $this->enclosure);
179 
180  // If number of lines is 0, nothing to infer : fall back to the default
181  if ($inferenceEngine->linesCounted() === 0) {
182  $this->delimiter = $inferenceEngine->getDefaultDelimiter();
183  $this->skipBOM();
184 
185  return;
186  }
187 
188  $this->delimiter = $inferenceEngine->infer();
189 
190  // If no delimiter could be detected, fall back to the default
191  if ($this->delimiter === null) {
192  $this->delimiter = $inferenceEngine->getDefaultDelimiter();
193  }
194 
195  $this->skipBOM();
196  }
197 
201  public function listWorksheetInfo(string $pFilename): array
202  {
203  // Open file
204  $this->openFileOrMemory($pFilename);
206 
207  // Skip BOM, if any
208  $this->skipBOM();
209  $this->checkSeparator();
210  $this->inferSeparator();
211 
212  $worksheetInfo = [];
213  $worksheetInfo[0]['worksheetName'] = 'Worksheet';
214  $worksheetInfo[0]['lastColumnLetter'] = 'A';
215  $worksheetInfo[0]['lastColumnIndex'] = 0;
216  $worksheetInfo[0]['totalRows'] = 0;
217  $worksheetInfo[0]['totalColumns'] = 0;
218 
219  // Loop through each line of the file in turn
220  $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
221  while (is_array($rowData)) {
222  ++$worksheetInfo[0]['totalRows'];
223  $worksheetInfo[0]['lastColumnIndex'] = max($worksheetInfo[0]['lastColumnIndex'], count($rowData) - 1);
224  $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
225  }
226 
227  $worksheetInfo[0]['lastColumnLetter'] = Coordinate::stringFromColumnIndex($worksheetInfo[0]['lastColumnIndex'] + 1);
228  $worksheetInfo[0]['totalColumns'] = $worksheetInfo[0]['lastColumnIndex'] + 1;
229 
230  // Close file
231  fclose($fileHandle);
232 
233  return $worksheetInfo;
234  }
235 
243  public function load($pFilename)
244  {
245  // Create new Spreadsheet
246  $spreadsheet = new Spreadsheet();
247 
248  // Load into this instance
249  return $this->loadIntoExisting($pFilename, $spreadsheet);
250  }
251 
252  private function openFileOrMemory(string $pFilename): void
253  {
254  // Open file
255  $fhandle = $this->canRead($pFilename);
256  if (!$fhandle) {
257  throw new Exception($pFilename . ' is an Invalid Spreadsheet file.');
258  }
259  if ($this->inputEncoding === self::GUESS_ENCODING) {
260  $this->inputEncoding = self::guessEncoding($pFilename, $this->fallbackEncoding);
261  }
262  $this->openFile($pFilename);
263  if ($this->inputEncoding !== 'UTF-8') {
264  fclose($this->fileHandle);
265  $entireFile = file_get_contents($pFilename);
266  $this->fileHandle = fopen('php://memory', 'r+b');
267  if ($this->fileHandle !== false && $entireFile !== false) {
268  $data = StringHelper::convertEncoding($entireFile, 'UTF-8', $this->inputEncoding);
269  fwrite($this->fileHandle, $data);
270  $this->skipBOM();
271  }
272  }
273  }
274 
278  public function loadIntoExisting(string $pFilename, Spreadsheet $spreadsheet): Spreadsheet
279  {
280  $lineEnding = ini_get('auto_detect_line_endings') ?: '0';
281  ini_set('auto_detect_line_endings', '1');
282 
283  // Open file
284  $this->openFileOrMemory($pFilename);
286 
287  // Skip BOM, if any
288  $this->skipBOM();
289  $this->checkSeparator();
290  $this->inferSeparator();
291 
292  // Create new PhpSpreadsheet object
293  while ($spreadsheet->getSheetCount() <= $this->sheetIndex) {
294  $spreadsheet->createSheet();
295  }
296  $sheet = $spreadsheet->setActiveSheetIndex($this->sheetIndex);
297 
298  // Set our starting row based on whether we're in contiguous mode or not
299  $currentRow = 1;
300  $outRow = 0;
301 
302  // Loop through each line of the file in turn
303  $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
304  while (is_array($rowData)) {
305  $noOutputYet = true;
306  $columnLetter = 'A';
307  foreach ($rowData as $rowDatum) {
308  if ($rowDatum != '' && $this->readFilter->readCell($columnLetter, $currentRow)) {
309  if ($this->contiguous) {
310  if ($noOutputYet) {
311  $noOutputYet = false;
312  ++$outRow;
313  }
314  } else {
315  $outRow = $currentRow;
316  }
317  // Set cell value
318  $sheet->getCell($columnLetter . $outRow)->setValue($rowDatum);
319  }
320  ++$columnLetter;
321  }
322  $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
323  ++$currentRow;
324  }
325 
326  // Close file
327  fclose($fileHandle);
328 
329  ini_set('auto_detect_line_endings', $lineEnding);
330 
331  // Return
332  return $spreadsheet;
333  }
334 
335  public function getDelimiter(): ?string
336  {
337  return $this->delimiter;
338  }
339 
340  public function setDelimiter(string $delimiter): self
341  {
342  $this->delimiter = $delimiter;
343 
344  return $this;
345  }
346 
347  public function getEnclosure(): string
348  {
349  return $this->enclosure;
350  }
351 
352  public function setEnclosure(string $enclosure): self
353  {
354  if ($enclosure == '') {
355  $enclosure = '"';
356  }
357  $this->enclosure = $enclosure;
358 
359  return $this;
360  }
361 
362  public function getSheetIndex(): int
363  {
364  return $this->sheetIndex;
365  }
366 
367  public function setSheetIndex(int $pValue): self
368  {
369  $this->sheetIndex = $pValue;
370 
371  return $this;
372  }
373 
374  public function setContiguous(bool $contiguous): self
375  {
376  $this->contiguous = (bool) $contiguous;
377 
378  return $this;
379  }
380 
381  public function getContiguous(): bool
382  {
383  return $this->contiguous;
384  }
385 
386  public function setEscapeCharacter(string $escapeCharacter): self
387  {
388  $this->escapeCharacter = $escapeCharacter;
389 
390  return $this;
391  }
392 
393  public function getEscapeCharacter(): string
394  {
395  return $this->escapeCharacter;
396  }
397 
406  private static function extractStringLower($extension): string
407  {
408  return is_string($extension) ? strtolower($extension) : '';
409  }
410 
418  public function canRead($pFilename)
419  {
420  // Check if file exists
421  try {
422  $this->openFile($pFilename);
423  } catch (InvalidArgumentException $e) {
424  return false;
425  }
426 
427  fclose($this->fileHandle);
428 
429  // Trust file extension if any
430  $extension = self::extractStringLower(pathinfo($pFilename, PATHINFO_EXTENSION));
431  if (in_array($extension, ['csv', 'tsv'])) {
432  return true;
433  }
434 
435  // Attempt to guess mimetype
436  $type = mime_content_type($pFilename);
437  $supportedTypes = [
438  'application/csv',
439  'text/csv',
440  'text/plain',
441  'inode/x-empty',
442  ];
443 
444  return in_array($type, $supportedTypes, true);
445  }
446 
447  private static function guessEncodingTestNoBom(string &$encoding, string &$contents, string $compare, string $setEncoding): void
448  {
449  if ($encoding === '') {
450  $pos = strpos($contents, $compare);
451  if ($pos !== false && $pos % strlen($compare) === 0) {
452  $encoding = $setEncoding;
453  }
454  }
455  }
456 
457  private static function guessEncodingNoBom(string $filename): string
458  {
459  $encoding = '';
460  $contents = file_get_contents($filename);
461  self::guessEncodingTestNoBom($encoding, $contents, self::UTF32BE_LF, 'UTF-32BE');
462  self::guessEncodingTestNoBom($encoding, $contents, self::UTF32LE_LF, 'UTF-32LE');
463  self::guessEncodingTestNoBom($encoding, $contents, self::UTF16BE_LF, 'UTF-16BE');
464  self::guessEncodingTestNoBom($encoding, $contents, self::UTF16LE_LF, 'UTF-16LE');
465  if ($encoding === '' && preg_match('//u', $contents) === 1) {
466  $encoding = 'UTF-8';
467  }
468 
469  return $encoding;
470  }
471 
472  private static function guessEncodingTestBom(string &$encoding, string $first4, string $compare, string $setEncoding): void
473  {
474  if ($encoding === '') {
475  if ($compare === substr($first4, 0, strlen($compare))) {
476  $encoding = $setEncoding;
477  }
478  }
479  }
480 
481  private static function guessEncodingBom(string $filename): string
482  {
483  $encoding = '';
484  $first4 = file_get_contents($filename, false, null, 0, 4);
485  if ($first4 !== false) {
486  self::guessEncodingTestBom($encoding, $first4, self::UTF8_BOM, 'UTF-8');
487  self::guessEncodingTestBom($encoding, $first4, self::UTF16BE_BOM, 'UTF-16BE');
488  self::guessEncodingTestBom($encoding, $first4, self::UTF32BE_BOM, 'UTF-32BE');
489  self::guessEncodingTestBom($encoding, $first4, self::UTF32LE_BOM, 'UTF-32LE');
490  self::guessEncodingTestBom($encoding, $first4, self::UTF16LE_BOM, 'UTF-16LE');
491  }
492 
493  return $encoding;
494  }
495 
496  public static function guessEncoding(string $filename, string $dflt = self::DEFAULT_FALLBACK_ENCODING): string
497  {
498  $encoding = self::guessEncodingBom($filename);
499  if ($encoding === '') {
500  $encoding = self::guessEncodingNoBom($filename);
501  }
502 
503  return ($encoding === '') ? $dflt : $encoding;
504  }
505 }
$type
static extractStringLower($extension)
Scrutinizer believes, incorrectly, that the specific pathinfo call in canRead can return something ot...
Definition: Csv.php:406
static guessEncodingBom(string $filename)
Definition: Csv.php:481
__construct()
Create a new CSV Reader instance.
Definition: Csv.php:89
inferSeparator()
Infer the separator if it isn&#39;t explicitly set in the file or specified by the user.
Definition: Csv.php:172
setDelimiter(string $delimiter)
Definition: Csv.php:340
static setConstructorCallback(?callable $callback)
Set a callback to change the defaults.
Definition: Csv.php:104
createSheet($sheetIndex=null)
Create sheet and add it to this workbook.
checkSeparator()
Identify any separator that is explicitly set in the file.
Definition: Csv.php:153
setContiguous(bool $contiguous)
Definition: Csv.php:374
setActiveSheetIndex($pIndex)
Set active sheet index.
load($pFilename)
Loads Spreadsheet from file.
Definition: Csv.php:243
setEscapeCharacter(string $escapeCharacter)
Definition: Csv.php:386
static guessEncoding(string $filename, string $dflt=self::DEFAULT_FALLBACK_ENCODING)
Definition: Csv.php:496
static guessEncodingTestBom(string &$encoding, string $first4, string $compare, string $setEncoding)
Definition: Csv.php:472
static convertEncoding($value, $to, $from)
Convert string from one encoding to another.
$filename
Definition: buildRTE.php:89
setInputEncoding(string $pValue)
Definition: Csv.php:114
static guessEncodingNoBom(string $filename)
Definition: Csv.php:457
canRead($pFilename)
Can the current IReader read the file?
Definition: Csv.php:418
setEnclosure(string $enclosure)
Definition: Csv.php:352
loadIntoExisting(string $pFilename, Spreadsheet $spreadsheet)
Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
Definition: Csv.php:278
listWorksheetInfo(string $pFilename)
Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns)...
Definition: Csv.php:201
static guessEncodingTestNoBom(string &$encoding, string &$contents, string $compare, string $setEncoding)
Definition: Csv.php:447
static stringFromColumnIndex($columnIndex)
String from column index.
Definition: Coordinate.php:313
openFileOrMemory(string $pFilename)
Definition: Csv.php:252
openFile($pFilename)
Open file for reading.
Definition: BaseReader.php:145
setFallbackEncoding(string $pValue)
Definition: Csv.php:126
$data
Definition: bench.php:6
skipBOM()
Move filepointer past any BOM marker.
Definition: Csv.php:141