ILIAS  release_5-4 Revision v5.4.26-12-gabc799a52e6
Csv.php
Go to the documentation of this file.
1<?php
2
4
5use InvalidArgumentException;
10
11class Csv extends BaseReader
12{
13 const DEFAULT_FALLBACK_ENCODING = 'CP1252';
14 const GUESS_ENCODING = 'guess';
15 const UTF8_BOM = "\xEF\xBB\xBF";
16 const UTF8_BOM_LEN = 3;
17 const UTF16BE_BOM = "\xfe\xff";
18 const UTF16BE_BOM_LEN = 2;
19 const UTF16BE_LF = "\x00\x0a";
20 const UTF16LE_BOM = "\xff\xfe";
21 const UTF16LE_BOM_LEN = 2;
22 const UTF16LE_LF = "\x0a\x00";
23 const UTF32BE_BOM = "\x00\x00\xfe\xff";
24 const UTF32BE_BOM_LEN = 4;
25 const UTF32BE_LF = "\x00\x00\x00\x0a";
26 const UTF32LE_BOM = "\xff\xfe\x00\x00";
27 const UTF32LE_BOM_LEN = 4;
28 const UTF32LE_LF = "\x0a\x00\x00\x00";
29
35 private $inputEncoding = 'UTF-8';
36
43
49 private $delimiter;
50
56 private $enclosure = '"';
57
63 private $sheetIndex = 0;
64
70 private $contiguous = false;
71
77 private $escapeCharacter = '\\';
78
84 private static $constructorCallback;
85
89 public function __construct()
90 {
91 parent::__construct();
93 if ($callback !== null) {
94 $callback($this);
95 }
96 }
97
104 public static function setConstructorCallback(?callable $callback): void
105 {
106 self::$constructorCallback = $callback;
107 }
108
109 public static function getConstructorCallback(): ?callable
110 {
112 }
113
114 public function setInputEncoding(string $pValue): self
115 {
116 $this->inputEncoding = $pValue;
117
118 return $this;
119 }
120
121 public function getInputEncoding(): string
122 {
124 }
125
126 public function setFallbackEncoding(string $pValue): self
127 {
128 $this->fallbackEncoding = $pValue;
129
130 return $this;
131 }
132
133 public function getFallbackEncoding(): string
134 {
136 }
137
141 protected function skipBOM(): void
142 {
143 rewind($this->fileHandle);
144
145 if (fgets($this->fileHandle, self::UTF8_BOM_LEN + 1) !== self::UTF8_BOM) {
146 rewind($this->fileHandle);
147 }
148 }
149
153 protected function checkSeparator(): void
154 {
155 $line = fgets($this->fileHandle);
156 if ($line === false) {
157 return;
158 }
159
160 if ((strlen(trim($line, "\r\n")) == 5) && (stripos($line, 'sep=') === 0)) {
161 $this->delimiter = substr($line, 4, 1);
162
163 return;
164 }
165
166 $this->skipBOM();
167 }
168
172 protected function inferSeparator(): void
173 {
174 if ($this->delimiter !== null) {
175 return;
176 }
177
178 $inferenceEngine = new Delimiter($this->fileHandle, $this->escapeCharacter, $this->enclosure);
179
180 // If number of lines is 0, nothing to infer : fall back to the default
181 if ($inferenceEngine->linesCounted() === 0) {
182 $this->delimiter = $inferenceEngine->getDefaultDelimiter();
183 $this->skipBOM();
184
185 return;
186 }
187
188 $this->delimiter = $inferenceEngine->infer();
189
190 // If no delimiter could be detected, fall back to the default
191 if ($this->delimiter === null) {
192 $this->delimiter = $inferenceEngine->getDefaultDelimiter();
193 }
194
195 $this->skipBOM();
196 }
197
201 public function listWorksheetInfo(string $pFilename): array
202 {
203 // Open file
204 $this->openFileOrMemory($pFilename);
206
207 // Skip BOM, if any
208 $this->skipBOM();
209 $this->checkSeparator();
210 $this->inferSeparator();
211
212 $worksheetInfo = [];
213 $worksheetInfo[0]['worksheetName'] = 'Worksheet';
214 $worksheetInfo[0]['lastColumnLetter'] = 'A';
215 $worksheetInfo[0]['lastColumnIndex'] = 0;
216 $worksheetInfo[0]['totalRows'] = 0;
217 $worksheetInfo[0]['totalColumns'] = 0;
218
219 // Loop through each line of the file in turn
220 $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
221 while (is_array($rowData)) {
222 ++$worksheetInfo[0]['totalRows'];
223 $worksheetInfo[0]['lastColumnIndex'] = max($worksheetInfo[0]['lastColumnIndex'], count($rowData) - 1);
224 $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
225 }
226
227 $worksheetInfo[0]['lastColumnLetter'] = Coordinate::stringFromColumnIndex($worksheetInfo[0]['lastColumnIndex'] + 1);
228 $worksheetInfo[0]['totalColumns'] = $worksheetInfo[0]['lastColumnIndex'] + 1;
229
230 // Close file
231 fclose($fileHandle);
232
233 return $worksheetInfo;
234 }
235
243 public function load($pFilename)
244 {
245 // Create new Spreadsheet
246 $spreadsheet = new Spreadsheet();
247
248 // Load into this instance
249 return $this->loadIntoExisting($pFilename, $spreadsheet);
250 }
251
252 private function openFileOrMemory(string $pFilename): void
253 {
254 // Open file
255 $fhandle = $this->canRead($pFilename);
256 if (!$fhandle) {
257 throw new Exception($pFilename . ' is an Invalid Spreadsheet file.');
258 }
259 if ($this->inputEncoding === self::GUESS_ENCODING) {
260 $this->inputEncoding = self::guessEncoding($pFilename, $this->fallbackEncoding);
261 }
262 $this->openFile($pFilename);
263 if ($this->inputEncoding !== 'UTF-8') {
264 fclose($this->fileHandle);
265 $entireFile = file_get_contents($pFilename);
266 $this->fileHandle = fopen('php://memory', 'r+b');
267 if ($this->fileHandle !== false && $entireFile !== false) {
268 $data = StringHelper::convertEncoding($entireFile, 'UTF-8', $this->inputEncoding);
269 fwrite($this->fileHandle, $data);
270 $this->skipBOM();
271 }
272 }
273 }
274
278 public function loadIntoExisting(string $pFilename, Spreadsheet $spreadsheet): Spreadsheet
279 {
280 $lineEnding = ini_get('auto_detect_line_endings') ?: '0';
281 ini_set('auto_detect_line_endings', '1');
282
283 // Open file
284 $this->openFileOrMemory($pFilename);
286
287 // Skip BOM, if any
288 $this->skipBOM();
289 $this->checkSeparator();
290 $this->inferSeparator();
291
292 // Create new PhpSpreadsheet object
293 while ($spreadsheet->getSheetCount() <= $this->sheetIndex) {
294 $spreadsheet->createSheet();
295 }
296 $sheet = $spreadsheet->setActiveSheetIndex($this->sheetIndex);
297
298 // Set our starting row based on whether we're in contiguous mode or not
299 $currentRow = 1;
300 $outRow = 0;
301
302 // Loop through each line of the file in turn
303 $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
304 while (is_array($rowData)) {
305 $noOutputYet = true;
306 $columnLetter = 'A';
307 foreach ($rowData as $rowDatum) {
308 if ($rowDatum != '' && $this->readFilter->readCell($columnLetter, $currentRow)) {
309 if ($this->contiguous) {
310 if ($noOutputYet) {
311 $noOutputYet = false;
312 ++$outRow;
313 }
314 } else {
315 $outRow = $currentRow;
316 }
317 // Set cell value
318 $sheet->getCell($columnLetter . $outRow)->setValue($rowDatum);
319 }
320 ++$columnLetter;
321 }
322 $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
323 ++$currentRow;
324 }
325
326 // Close file
327 fclose($fileHandle);
328
329 ini_set('auto_detect_line_endings', $lineEnding);
330
331 // Return
332 return $spreadsheet;
333 }
334
335 public function getDelimiter(): ?string
336 {
337 return $this->delimiter;
338 }
339
340 public function setDelimiter(string $delimiter): self
341 {
342 $this->delimiter = $delimiter;
343
344 return $this;
345 }
346
347 public function getEnclosure(): string
348 {
349 return $this->enclosure;
350 }
351
352 public function setEnclosure(string $enclosure): self
353 {
354 if ($enclosure == '') {
355 $enclosure = '"';
356 }
357 $this->enclosure = $enclosure;
358
359 return $this;
360 }
361
362 public function getSheetIndex(): int
363 {
364 return $this->sheetIndex;
365 }
366
367 public function setSheetIndex(int $pValue): self
368 {
369 $this->sheetIndex = $pValue;
370
371 return $this;
372 }
373
374 public function setContiguous(bool $contiguous): self
375 {
376 $this->contiguous = (bool) $contiguous;
377
378 return $this;
379 }
380
381 public function getContiguous(): bool
382 {
383 return $this->contiguous;
384 }
385
386 public function setEscapeCharacter(string $escapeCharacter): self
387 {
388 $this->escapeCharacter = $escapeCharacter;
389
390 return $this;
391 }
392
393 public function getEscapeCharacter(): string
394 {
396 }
397
406 private static function extractStringLower($extension): string
407 {
408 return is_string($extension) ? strtolower($extension) : '';
409 }
410
418 public function canRead($pFilename)
419 {
420 // Check if file exists
421 try {
422 $this->openFile($pFilename);
423 } catch (InvalidArgumentException $e) {
424 return false;
425 }
426
427 fclose($this->fileHandle);
428
429 // Trust file extension if any
430 $extension = self::extractStringLower(pathinfo($pFilename, PATHINFO_EXTENSION));
431 if (in_array($extension, ['csv', 'tsv'])) {
432 return true;
433 }
434
435 // Attempt to guess mimetype
436 $type = mime_content_type($pFilename);
437 $supportedTypes = [
438 'application/csv',
439 'text/csv',
440 'text/plain',
441 'inode/x-empty',
442 ];
443
444 return in_array($type, $supportedTypes, true);
445 }
446
447 private static function guessEncodingTestNoBom(string &$encoding, string &$contents, string $compare, string $setEncoding): void
448 {
449 if ($encoding === '') {
450 $pos = strpos($contents, $compare);
451 if ($pos !== false && $pos % strlen($compare) === 0) {
452 $encoding = $setEncoding;
453 }
454 }
455 }
456
457 private static function guessEncodingNoBom(string $filename): string
458 {
459 $encoding = '';
460 $contents = file_get_contents($filename);
461 self::guessEncodingTestNoBom($encoding, $contents, self::UTF32BE_LF, 'UTF-32BE');
462 self::guessEncodingTestNoBom($encoding, $contents, self::UTF32LE_LF, 'UTF-32LE');
463 self::guessEncodingTestNoBom($encoding, $contents, self::UTF16BE_LF, 'UTF-16BE');
464 self::guessEncodingTestNoBom($encoding, $contents, self::UTF16LE_LF, 'UTF-16LE');
465 if ($encoding === '' && preg_match('//u', $contents) === 1) {
466 $encoding = 'UTF-8';
467 }
468
469 return $encoding;
470 }
471
472 private static function guessEncodingTestBom(string &$encoding, string $first4, string $compare, string $setEncoding): void
473 {
474 if ($encoding === '') {
475 if ($compare === substr($first4, 0, strlen($compare))) {
476 $encoding = $setEncoding;
477 }
478 }
479 }
480
481 private static function guessEncodingBom(string $filename): string
482 {
483 $encoding = '';
484 $first4 = file_get_contents($filename, false, null, 0, 4);
485 if ($first4 !== false) {
486 self::guessEncodingTestBom($encoding, $first4, self::UTF8_BOM, 'UTF-8');
487 self::guessEncodingTestBom($encoding, $first4, self::UTF16BE_BOM, 'UTF-16BE');
488 self::guessEncodingTestBom($encoding, $first4, self::UTF32BE_BOM, 'UTF-32BE');
489 self::guessEncodingTestBom($encoding, $first4, self::UTF32LE_BOM, 'UTF-32LE');
490 self::guessEncodingTestBom($encoding, $first4, self::UTF16LE_BOM, 'UTF-16LE');
491 }
492
493 return $encoding;
494 }
495
496 public static function guessEncoding(string $filename, string $dflt = self::DEFAULT_FALLBACK_ENCODING): string
497 {
499 if ($encoding === '') {
501 }
502
503 return ($encoding === '') ? $dflt : $encoding;
504 }
505}
$filename
Definition: buildRTE.php:89
An exception for terminatinating execution or to throw for unit testing.
Helper class to manipulate cell coordinates.
Definition: Coordinate.php:15
static stringFromColumnIndex($columnIndex)
String from column index.
Definition: Coordinate.php:313
openFile($pFilename)
Open file for reading.
Definition: BaseReader.php:145
load($pFilename)
Loads Spreadsheet from file.
Definition: Csv.php:243
setEscapeCharacter(string $escapeCharacter)
Definition: Csv.php:386
checkSeparator()
Identify any separator that is explicitly set in the file.
Definition: Csv.php:153
setInputEncoding(string $pValue)
Definition: Csv.php:114
listWorksheetInfo(string $pFilename)
Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
Definition: Csv.php:201
static extractStringLower($extension)
Scrutinizer believes, incorrectly, that the specific pathinfo call in canRead can return something ot...
Definition: Csv.php:406
static guessEncoding(string $filename, string $dflt=self::DEFAULT_FALLBACK_ENCODING)
Definition: Csv.php:496
loadIntoExisting(string $pFilename, Spreadsheet $spreadsheet)
Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
Definition: Csv.php:278
setFallbackEncoding(string $pValue)
Definition: Csv.php:126
static guessEncodingTestBom(string &$encoding, string $first4, string $compare, string $setEncoding)
Definition: Csv.php:472
inferSeparator()
Infer the separator if it isn't explicitly set in the file or specified by the user.
Definition: Csv.php:172
openFileOrMemory(string $pFilename)
Definition: Csv.php:252
setDelimiter(string $delimiter)
Definition: Csv.php:340
static guessEncodingBom(string $filename)
Definition: Csv.php:481
static guessEncodingNoBom(string $filename)
Definition: Csv.php:457
skipBOM()
Move filepointer past any BOM marker.
Definition: Csv.php:141
setEnclosure(string $enclosure)
Definition: Csv.php:352
static setConstructorCallback(?callable $callback)
Set a callback to change the defaults.
Definition: Csv.php:104
__construct()
Create a new CSV Reader instance.
Definition: Csv.php:89
static guessEncodingTestNoBom(string &$encoding, string &$contents, string $compare, string $setEncoding)
Definition: Csv.php:447
setContiguous(bool $contiguous)
Definition: Csv.php:374
canRead($pFilename)
Can the current IReader read the file?
Definition: Csv.php:418
static convertEncoding($value, $to, $from)
Convert string from one encoding to another.
createSheet($sheetIndex=null)
Create sheet and add it to this workbook.
setActiveSheetIndex($pIndex)
Set active sheet index.
$type
$data
Definition: bench.php:6