ILIAS  release_5-4 Revision v5.4.26-12-gabc799a52e6
Xml.php
Go to the documentation of this file.
1<?php
2
4
5use DateTime;
6use DateTimeZone;
21use SimpleXMLElement;
22
26class Xml extends BaseReader
27{
33 protected $styles = [];
34
38 public function __construct()
39 {
40 parent::__construct();
41 $this->securityScanner = XmlScanner::getInstance($this);
42 }
43
44 private $fileContents = '';
45
46 public static function xmlMappings(): array
47 {
48 return array_merge(
51 );
52 }
53
61 public function canRead($pFilename)
62 {
63 // Office xmlns:o="urn:schemas-microsoft-com:office:office"
64 // Excel xmlns:x="urn:schemas-microsoft-com:office:excel"
65 // XML Spreadsheet xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet"
66 // Spreadsheet component xmlns:c="urn:schemas-microsoft-com:office:component:spreadsheet"
67 // XML schema xmlns:s="uuid:BDC6E3F0-6DA3-11d1-A2A3-00AA00C14882"
68 // XML data type xmlns:dt="uuid:C2F41010-65B3-11d1-A29F-00AA00C14882"
69 // MS-persist recordset xmlns:rs="urn:schemas-microsoft-com:rowset"
70 // Rowset xmlns:z="#RowsetSchema"
71 //
72
73 $signature = [
74 '<?xml version="1.0"',
75 'xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet',
76 ];
77
78 // Open file
79 $data = file_get_contents($pFilename);
80
81 // Why?
82 //$data = str_replace("'", '"', $data); // fix headers with single quote
83
84 $valid = true;
85 foreach ($signature as $match) {
86 // every part of the signature must be present
87 if (strpos($data, $match) === false) {
88 $valid = false;
89
90 break;
91 }
92 }
93
94 // Retrieve charset encoding
95 if (preg_match('/<?xml.*encoding=[\'"](.*?)[\'"].*?>/m', $data, $matches)) {
96 $charSet = strtoupper($matches[1]);
97 if (1 == preg_match('/^ISO-8859-\d[\dL]?$/i', $charSet)) {
98 $data = StringHelper::convertEncoding($data, 'UTF-8', $charSet);
99 $data = preg_replace('/(<?xml.*encoding=[\'"]).*?([\'"].*?>)/um', '$1' . 'UTF-8' . '$2', $data, 1);
100 }
101 }
102 $this->fileContents = $data;
103
104 return $valid;
105 }
106
114 public function trySimpleXMLLoadString($pFilename)
115 {
116 try {
117 $xml = simplexml_load_string(
118 $this->securityScanner->scan($this->fileContents ?: file_get_contents($pFilename)),
119 'SimpleXMLElement',
121 );
122 } catch (\Exception $e) {
123 throw new Exception('Cannot load invalid XML file: ' . $pFilename, 0, $e);
124 }
125 $this->fileContents = '';
126
127 return $xml;
128 }
129
138 {
140 if (!$this->canRead($filename)) {
141 throw new Exception($filename . ' is an Invalid Spreadsheet file.');
142 }
143
144 $worksheetNames = [];
145
147 if ($xml === false) {
148 throw new Exception("Problem reading {$filename}");
149 }
150
151 $namespaces = $xml->getNamespaces(true);
152
153 $xml_ss = $xml->children($namespaces['ss']);
154 foreach ($xml_ss->Worksheet as $worksheet) {
155 $worksheet_ss = self::getAttributes($worksheet, $namespaces['ss']);
156 $worksheetNames[] = (string) $worksheet_ss['Name'];
157 }
158
159 return $worksheetNames;
160 }
161
170 {
172 if (!$this->canRead($filename)) {
173 throw new Exception($filename . ' is an Invalid Spreadsheet file.');
174 }
175
176 $worksheetInfo = [];
177
179 if ($xml === false) {
180 throw new Exception("Problem reading {$filename}");
181 }
182
183 $namespaces = $xml->getNamespaces(true);
184
185 $worksheetID = 1;
186 $xml_ss = $xml->children($namespaces['ss']);
187 foreach ($xml_ss->Worksheet as $worksheet) {
188 $worksheet_ss = self::getAttributes($worksheet, $namespaces['ss']);
189
190 $tmpInfo = [];
191 $tmpInfo['worksheetName'] = '';
192 $tmpInfo['lastColumnLetter'] = 'A';
193 $tmpInfo['lastColumnIndex'] = 0;
194 $tmpInfo['totalRows'] = 0;
195 $tmpInfo['totalColumns'] = 0;
196
197 $tmpInfo['worksheetName'] = "Worksheet_{$worksheetID}";
198 if (isset($worksheet_ss['Name'])) {
199 $tmpInfo['worksheetName'] = (string) $worksheet_ss['Name'];
200 }
201
202 if (isset($worksheet->Table->Row)) {
203 $rowIndex = 0;
204
205 foreach ($worksheet->Table->Row as $rowData) {
206 $columnIndex = 0;
207 $rowHasData = false;
208
209 foreach ($rowData->Cell as $cell) {
210 if (isset($cell->Data)) {
211 $tmpInfo['lastColumnIndex'] = max($tmpInfo['lastColumnIndex'], $columnIndex);
212 $rowHasData = true;
213 }
214
215 ++$columnIndex;
216 }
217
218 ++$rowIndex;
219
220 if ($rowHasData) {
221 $tmpInfo['totalRows'] = max($tmpInfo['totalRows'], $rowIndex);
222 }
223 }
224 }
225
226 $tmpInfo['lastColumnLetter'] = Coordinate::stringFromColumnIndex($tmpInfo['lastColumnIndex'] + 1);
227 $tmpInfo['totalColumns'] = $tmpInfo['lastColumnIndex'] + 1;
228
229 $worksheetInfo[] = $tmpInfo;
230 ++$worksheetID;
231 }
232
233 return $worksheetInfo;
234 }
235
243 public function load($filename)
244 {
245 // Create new Spreadsheet
246 $spreadsheet = new Spreadsheet();
247 $spreadsheet->removeSheetByIndex(0);
248
249 // Load into this instance
250 return $this->loadIntoExisting($filename, $spreadsheet);
251 }
252
260 public function loadIntoExisting($filename, Spreadsheet $spreadsheet)
261 {
263 if (!$this->canRead($filename)) {
264 throw new Exception($filename . ' is an Invalid Spreadsheet file.');
265 }
266
268 if ($xml === false) {
269 throw new Exception("Problem reading {$filename}");
270 }
271
272 $namespaces = $xml->getNamespaces(true);
273
274 (new Properties($spreadsheet))->readProperties($xml, $namespaces);
275
276 $this->styles = (new Style())->parseStyles($xml, $namespaces);
277
278 $worksheetID = 0;
279 $xml_ss = $xml->children($namespaces['ss']);
280
282 foreach ($xml_ss->Worksheet as $worksheetx) {
283 $worksheet = $worksheetx ?? new SimpleXMLElement('<xml></xml>');
284 $worksheet_ss = self::getAttributes($worksheet, $namespaces['ss']);
285
286 if (
287 isset($this->loadSheetsOnly, $worksheet_ss['Name']) &&
288 (!in_array($worksheet_ss['Name'], $this->loadSheetsOnly))
289 ) {
290 continue;
291 }
292
293 // Create new Worksheet
294 $spreadsheet->createSheet();
295 $spreadsheet->setActiveSheetIndex($worksheetID);
296 $worksheetName = '';
297 if (isset($worksheet_ss['Name'])) {
298 $worksheetName = (string) $worksheet_ss['Name'];
299 // Use false for $updateFormulaCellReferences to prevent adjustment of worksheet references in
300 // formula cells... during the load, all formulae should be correct, and we're simply bringing
301 // the worksheet name in line with the formula, not the reverse
302 $spreadsheet->getActiveSheet()->setTitle($worksheetName, false, false);
303 }
304
305 // locally scoped defined names
306 if (isset($worksheet->Names[0])) {
307 foreach ($worksheet->Names[0] as $definedName) {
308 $definedName_ss = self::getAttributes($definedName, $namespaces['ss']);
309 $name = (string) $definedName_ss['Name'];
310 $definedValue = (string) $definedName_ss['RefersTo'];
311 $convertedValue = AddressHelper::convertFormulaToA1($definedValue);
312 if ($convertedValue[0] === '=') {
313 $convertedValue = substr($convertedValue, 1);
314 }
315 $spreadsheet->addDefinedName(DefinedName::createInstance($name, $spreadsheet->getActiveSheet(), $convertedValue, true));
316 }
317 }
318
319 $columnID = 'A';
320 if (isset($worksheet->Table->Column)) {
321 foreach ($worksheet->Table->Column as $columnData) {
322 $columnData_ss = self::getAttributes($columnData, $namespaces['ss']);
323 if (isset($columnData_ss['Index'])) {
324 $columnID = Coordinate::stringFromColumnIndex((int) $columnData_ss['Index']);
325 }
326 if (isset($columnData_ss['Width'])) {
327 $columnWidth = $columnData_ss['Width'];
328 $spreadsheet->getActiveSheet()->getColumnDimension($columnID)->setWidth($columnWidth / 5.4);
329 }
330 ++$columnID;
331 }
332 }
333
334 $rowID = 1;
335 if (isset($worksheet->Table->Row)) {
336 $additionalMergedCells = 0;
337 foreach ($worksheet->Table->Row as $rowData) {
338 $rowHasData = false;
339 $row_ss = self::getAttributes($rowData, $namespaces['ss']);
340 if (isset($row_ss['Index'])) {
341 $rowID = (int) $row_ss['Index'];
342 }
343
344 $columnID = 'A';
345 foreach ($rowData->Cell as $cell) {
346 $cell_ss = self::getAttributes($cell, $namespaces['ss']);
347 if (isset($cell_ss['Index'])) {
348 $columnID = Coordinate::stringFromColumnIndex((int) $cell_ss['Index']);
349 }
350 $cellRange = $columnID . $rowID;
351
352 if ($this->getReadFilter() !== null) {
353 if (!$this->getReadFilter()->readCell($columnID, $rowID, $worksheetName)) {
354 ++$columnID;
355
356 continue;
357 }
358 }
359
360 if (isset($cell_ss['HRef'])) {
361 $spreadsheet->getActiveSheet()->getCell($cellRange)->getHyperlink()->setUrl((string) $cell_ss['HRef']);
362 }
363
364 if ((isset($cell_ss['MergeAcross'])) || (isset($cell_ss['MergeDown']))) {
365 $columnTo = $columnID;
366 if (isset($cell_ss['MergeAcross'])) {
367 $additionalMergedCells += (int) $cell_ss['MergeAcross'];
368 $columnTo = Coordinate::stringFromColumnIndex(Coordinate::columnIndexFromString($columnID) + $cell_ss['MergeAcross']);
369 }
370 $rowTo = $rowID;
371 if (isset($cell_ss['MergeDown'])) {
372 $rowTo = $rowTo + $cell_ss['MergeDown'];
373 }
374 $cellRange .= ':' . $columnTo . $rowTo;
375 $spreadsheet->getActiveSheet()->mergeCells($cellRange);
376 }
377
378 $hasCalculatedValue = false;
379 $cellDataFormula = '';
380 if (isset($cell_ss['Formula'])) {
381 $cellDataFormula = $cell_ss['Formula'];
382 $hasCalculatedValue = true;
383 }
384 if (isset($cell->Data)) {
385 $cellData = $cell->Data;
386 $cellValue = (string) $cellData;
388 $cellData_ss = self::getAttributes($cellData, $namespaces['ss']);
389 if (isset($cellData_ss['Type'])) {
390 $cellDataType = $cellData_ss['Type'];
391 switch ($cellDataType) {
392 /*
393 const TYPE_STRING = 's';
394 const TYPE_FORMULA = 'f';
395 const TYPE_NUMERIC = 'n';
396 const TYPE_BOOL = 'b';
397 const TYPE_NULL = 'null';
398 const TYPE_INLINE = 'inlineStr';
399 const TYPE_ERROR = 'e';
400 */
401 case 'String':
403
404 break;
405 case 'Number':
407 $cellValue = (float) $cellValue;
408 if (floor($cellValue) == $cellValue) {
409 $cellValue = (int) $cellValue;
410 }
411
412 break;
413 case 'Boolean':
415 $cellValue = ($cellValue != 0);
416
417 break;
418 case 'DateTime':
420 $dateTime = new DateTime($cellValue, new DateTimeZone('UTC'));
421 $cellValue = Date::PHPToExcel($dateTime);
422
423 break;
424 case 'Error':
426 $hasCalculatedValue = false;
427
428 break;
429 }
430 }
431
432 if ($hasCalculatedValue) {
434 $columnNumber = Coordinate::columnIndexFromString($columnID);
435 $cellDataFormula = AddressHelper::convertFormulaToA1($cellDataFormula, $rowID, $columnNumber);
436 }
437
438 $spreadsheet->getActiveSheet()->getCell($columnID . $rowID)->setValueExplicit((($hasCalculatedValue) ? $cellDataFormula : $cellValue), $type);
439 if ($hasCalculatedValue) {
440 $spreadsheet->getActiveSheet()->getCell($columnID . $rowID)->setCalculatedValue($cellValue);
441 }
442 $rowHasData = true;
443 }
444
445 if (isset($cell->Comment)) {
446 $this->parseCellComment($cell->Comment, $namespaces, $spreadsheet, $columnID, $rowID);
447 }
448
449 if (isset($cell_ss['StyleID'])) {
450 $style = (string) $cell_ss['StyleID'];
451 if ((isset($this->styles[$style])) && (!empty($this->styles[$style]))) {
452 //if (!$spreadsheet->getActiveSheet()->cellExists($columnID . $rowID)) {
453 // $spreadsheet->getActiveSheet()->getCell($columnID . $rowID)->setValue(null);
454 //}
455 $spreadsheet->getActiveSheet()->getStyle($cellRange)
456 ->applyFromArray($this->styles[$style]);
457 }
458 }
459 ++$columnID;
460 while ($additionalMergedCells > 0) {
461 ++$columnID;
462 --$additionalMergedCells;
463 }
464 }
465
466 if ($rowHasData) {
467 if (isset($row_ss['Height'])) {
468 $rowHeight = $row_ss['Height'];
469 $spreadsheet->getActiveSheet()->getRowDimension($rowID)->setRowHeight((float) $rowHeight);
470 }
471 }
472
473 ++$rowID;
474 }
475
476 if (isset($namespaces['x'])) {
477 $xmlX = $worksheet->children($namespaces['x']);
478 if (isset($xmlX->WorksheetOptions)) {
479 (new PageSettings($xmlX, $namespaces))->loadPageSettings($spreadsheet);
480 }
481 }
482 }
483 ++$worksheetID;
484 }
485
486 // Globally scoped defined names
487 $activeWorksheet = $spreadsheet->setActiveSheetIndex(0);
488 if (isset($xml->Names[0])) {
489 foreach ($xml->Names[0] as $definedName) {
490 $definedName_ss = self::getAttributes($definedName, $namespaces['ss']);
491 $name = (string) $definedName_ss['Name'];
492 $definedValue = (string) $definedName_ss['RefersTo'];
493 $convertedValue = AddressHelper::convertFormulaToA1($definedValue);
494 if ($convertedValue[0] === '=') {
495 $convertedValue = substr($convertedValue, 1);
496 }
497 $spreadsheet->addDefinedName(DefinedName::createInstance($name, $activeWorksheet, $convertedValue));
498 }
499 }
500
501 // Return
502 return $spreadsheet;
503 }
504
505 protected function parseCellComment(
506 SimpleXMLElement $comment,
507 array $namespaces,
508 Spreadsheet $spreadsheet,
509 string $columnID,
510 int $rowID
511 ): void {
512 $commentAttributes = $comment->attributes($namespaces['ss']);
513 $author = 'unknown';
514 if (isset($commentAttributes->Author)) {
515 $author = (string) $commentAttributes->Author;
516 }
517
518 $node = $comment->Data->asXML();
519 $annotation = strip_tags((string) $node);
520 $spreadsheet->getActiveSheet()->getComment($columnID . $rowID)
521 ->setAuthor($author)
522 ->setText($this->parseRichText($annotation));
523 }
524
525 protected function parseRichText(string $annotation): RichText
526 {
527 $value = new RichText();
528
529 $value->createText($annotation);
530
531 return $value;
532 }
533
534 private static function getAttributes(?SimpleXMLElement $simple, string $node): SimpleXMLElement
535 {
536 return ($simple === null)
537 ? new SimpleXMLElement('<xml></xml>')
538 : ($simple->attributes($node) ?? new SimpleXMLElement('<xml></xml>'));
539 }
540}
$filename
Definition: buildRTE.php:89
$comment
Definition: buildRTE.php:83
An exception for terminatinating execution or to throw for unit testing.
static convertFormulaToA1(string $formula, int $currentRowNumber=1, int $currentColumnNumber=1)
Converts a formula that uses R1C1/SpreadsheetXML format cell address to an A1 format cell address.
Helper class to manipulate cell coordinates.
Definition: Coordinate.php:15
static columnIndexFromString($pString)
Column index from string.
Definition: Coordinate.php:265
static stringFromColumnIndex($columnIndex)
String from column index.
Definition: Coordinate.php:313
static createInstance(string $name, ?Worksheet $worksheet=null, ?string $value=null, bool $localOnly=false, ?Worksheet $scope=null)
Create a new defined name, either a range or a formula.
Definition: DefinedName.php:84
static getInstance(Reader\IReader $reader)
Definition: XmlScanner.php:39
Reader for SpreadsheetML, the XML schema for Microsoft Office Excel 2003.
Definition: Xml.php:27
static getAttributes(?SimpleXMLElement $simple, string $node)
Definition: Xml.php:534
listWorksheetInfo($filename)
Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
Definition: Xml.php:169
trySimpleXMLLoadString($pFilename)
Check if the file is a valid SimpleXML.
Definition: Xml.php:114
__construct()
Create a new Excel2003XML Reader instance.
Definition: Xml.php:38
canRead($pFilename)
Can the current IReader read the file?
Definition: Xml.php:61
parseCellComment(SimpleXMLElement $comment, array $namespaces, Spreadsheet $spreadsheet, string $columnID, int $rowID)
Definition: Xml.php:505
load($filename)
Loads Spreadsheet from file.
Definition: Xml.php:243
listWorksheetNames($filename)
Reads names of the worksheets from a file, without parsing the whole file to a Spreadsheet object.
Definition: Xml.php:137
parseRichText(string $annotation)
Definition: Xml.php:525
static getLibXmlLoaderOptions()
Get default options for libxml loader.
Definition: Settings.php:116
static PHPToExcel($dateValue)
Convert a date from PHP to an MS Excel serialized date/time value.
Definition: Date.php:231
static assertFile($filename)
Assert that given path is an existing file and is readable, otherwise throw exception.
Definition: File.php:143
static convertEncoding($value, $to, $from)
Convert string from one encoding to another.
createSheet($sheetIndex=null)
Create sheet and add it to this workbook.
setActiveSheetIndex($pIndex)
Set active sheet index.
addDefinedName(DefinedName $definedName)
Add a defined name (either a named range or a named formula).
$valid
$style
Definition: example_012.php:70
$type
$data
Definition: bench.php:6