ILIAS  release_5-4 Revision v5.4.26-12-gabc799a52e6
Xml.php
Go to the documentation of this file.
1 <?php
2 
4 
5 use DateTime;
6 use DateTimeZone;
22 
26 class Xml extends BaseReader
27 {
33  protected $styles = [];
34 
38  public function __construct()
39  {
40  parent::__construct();
41  $this->securityScanner = XmlScanner::getInstance($this);
42  }
43 
44  private $fileContents = '';
45 
46  public static function xmlMappings(): array
47  {
48  return array_merge(
49  Style\Fill::FILL_MAPPINGS,
50  Style\Border::BORDER_MAPPINGS
51  );
52  }
53 
61  public function canRead($pFilename)
62  {
63  // Office xmlns:o="urn:schemas-microsoft-com:office:office"
64  // Excel xmlns:x="urn:schemas-microsoft-com:office:excel"
65  // XML Spreadsheet xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet"
66  // Spreadsheet component xmlns:c="urn:schemas-microsoft-com:office:component:spreadsheet"
67  // XML schema xmlns:s="uuid:BDC6E3F0-6DA3-11d1-A2A3-00AA00C14882"
68  // XML data type xmlns:dt="uuid:C2F41010-65B3-11d1-A29F-00AA00C14882"
69  // MS-persist recordset xmlns:rs="urn:schemas-microsoft-com:rowset"
70  // Rowset xmlns:z="#RowsetSchema"
71  //
72 
73  $signature = [
74  '<?xml version="1.0"',
75  'xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet',
76  ];
77 
78  // Open file
79  $data = file_get_contents($pFilename);
80 
81  // Why?
82  //$data = str_replace("'", '"', $data); // fix headers with single quote
83 
84  $valid = true;
85  foreach ($signature as $match) {
86  // every part of the signature must be present
87  if (strpos($data, $match) === false) {
88  $valid = false;
89 
90  break;
91  }
92  }
93 
94  // Retrieve charset encoding
95  if (preg_match('/<?xml.*encoding=[\'"](.*?)[\'"].*?>/m', $data, $matches)) {
96  $charSet = strtoupper($matches[1]);
97  if (1 == preg_match('/^ISO-8859-\d[\dL]?$/i', $charSet)) {
98  $data = StringHelper::convertEncoding($data, 'UTF-8', $charSet);
99  $data = preg_replace('/(<?xml.*encoding=[\'"]).*?([\'"].*?>)/um', '$1' . 'UTF-8' . '$2', $data, 1);
100  }
101  }
102  $this->fileContents = $data;
103 
104  return $valid;
105  }
106 
114  public function trySimpleXMLLoadString($pFilename)
115  {
116  try {
117  $xml = simplexml_load_string(
118  $this->securityScanner->scan($this->fileContents ?: file_get_contents($pFilename)),
119  'SimpleXMLElement',
121  );
122  } catch (\Exception $e) {
123  throw new Exception('Cannot load invalid XML file: ' . $pFilename, 0, $e);
124  }
125  $this->fileContents = '';
126 
127  return $xml;
128  }
129 
137  public function listWorksheetNames($filename)
138  {
140  if (!$this->canRead($filename)) {
141  throw new Exception($filename . ' is an Invalid Spreadsheet file.');
142  }
143 
144  $worksheetNames = [];
145 
147  if ($xml === false) {
148  throw new Exception("Problem reading {$filename}");
149  }
150 
151  $namespaces = $xml->getNamespaces(true);
152 
153  $xml_ss = $xml->children($namespaces['ss']);
154  foreach ($xml_ss->Worksheet as $worksheet) {
155  $worksheet_ss = self::getAttributes($worksheet, $namespaces['ss']);
156  $worksheetNames[] = (string) $worksheet_ss['Name'];
157  }
158 
159  return $worksheetNames;
160  }
161 
169  public function listWorksheetInfo($filename)
170  {
172  if (!$this->canRead($filename)) {
173  throw new Exception($filename . ' is an Invalid Spreadsheet file.');
174  }
175 
176  $worksheetInfo = [];
177 
179  if ($xml === false) {
180  throw new Exception("Problem reading {$filename}");
181  }
182 
183  $namespaces = $xml->getNamespaces(true);
184 
185  $worksheetID = 1;
186  $xml_ss = $xml->children($namespaces['ss']);
187  foreach ($xml_ss->Worksheet as $worksheet) {
188  $worksheet_ss = self::getAttributes($worksheet, $namespaces['ss']);
189 
190  $tmpInfo = [];
191  $tmpInfo['worksheetName'] = '';
192  $tmpInfo['lastColumnLetter'] = 'A';
193  $tmpInfo['lastColumnIndex'] = 0;
194  $tmpInfo['totalRows'] = 0;
195  $tmpInfo['totalColumns'] = 0;
196 
197  $tmpInfo['worksheetName'] = "Worksheet_{$worksheetID}";
198  if (isset($worksheet_ss['Name'])) {
199  $tmpInfo['worksheetName'] = (string) $worksheet_ss['Name'];
200  }
201 
202  if (isset($worksheet->Table->Row)) {
203  $rowIndex = 0;
204 
205  foreach ($worksheet->Table->Row as $rowData) {
206  $columnIndex = 0;
207  $rowHasData = false;
208 
209  foreach ($rowData->Cell as $cell) {
210  if (isset($cell->Data)) {
211  $tmpInfo['lastColumnIndex'] = max($tmpInfo['lastColumnIndex'], $columnIndex);
212  $rowHasData = true;
213  }
214 
215  ++$columnIndex;
216  }
217 
218  ++$rowIndex;
219 
220  if ($rowHasData) {
221  $tmpInfo['totalRows'] = max($tmpInfo['totalRows'], $rowIndex);
222  }
223  }
224  }
225 
226  $tmpInfo['lastColumnLetter'] = Coordinate::stringFromColumnIndex($tmpInfo['lastColumnIndex'] + 1);
227  $tmpInfo['totalColumns'] = $tmpInfo['lastColumnIndex'] + 1;
228 
229  $worksheetInfo[] = $tmpInfo;
230  ++$worksheetID;
231  }
232 
233  return $worksheetInfo;
234  }
235 
243  public function load($filename)
244  {
245  // Create new Spreadsheet
246  $spreadsheet = new Spreadsheet();
247  $spreadsheet->removeSheetByIndex(0);
248 
249  // Load into this instance
250  return $this->loadIntoExisting($filename, $spreadsheet);
251  }
252 
260  public function loadIntoExisting($filename, Spreadsheet $spreadsheet)
261  {
263  if (!$this->canRead($filename)) {
264  throw new Exception($filename . ' is an Invalid Spreadsheet file.');
265  }
266 
268  if ($xml === false) {
269  throw new Exception("Problem reading {$filename}");
270  }
271 
272  $namespaces = $xml->getNamespaces(true);
273 
274  (new Properties($spreadsheet))->readProperties($xml, $namespaces);
275 
276  $this->styles = (new Style())->parseStyles($xml, $namespaces);
277 
278  $worksheetID = 0;
279  $xml_ss = $xml->children($namespaces['ss']);
280 
282  foreach ($xml_ss->Worksheet as $worksheetx) {
283  $worksheet = $worksheetx ?? new SimpleXMLElement('<xml></xml>');
284  $worksheet_ss = self::getAttributes($worksheet, $namespaces['ss']);
285 
286  if (
287  isset($this->loadSheetsOnly, $worksheet_ss['Name']) &&
288  (!in_array($worksheet_ss['Name'], $this->loadSheetsOnly))
289  ) {
290  continue;
291  }
292 
293  // Create new Worksheet
294  $spreadsheet->createSheet();
295  $spreadsheet->setActiveSheetIndex($worksheetID);
296  $worksheetName = '';
297  if (isset($worksheet_ss['Name'])) {
298  $worksheetName = (string) $worksheet_ss['Name'];
299  // Use false for $updateFormulaCellReferences to prevent adjustment of worksheet references in
300  // formula cells... during the load, all formulae should be correct, and we're simply bringing
301  // the worksheet name in line with the formula, not the reverse
302  $spreadsheet->getActiveSheet()->setTitle($worksheetName, false, false);
303  }
304 
305  // locally scoped defined names
306  if (isset($worksheet->Names[0])) {
307  foreach ($worksheet->Names[0] as $definedName) {
308  $definedName_ss = self::getAttributes($definedName, $namespaces['ss']);
309  $name = (string) $definedName_ss['Name'];
310  $definedValue = (string) $definedName_ss['RefersTo'];
311  $convertedValue = AddressHelper::convertFormulaToA1($definedValue);
312  if ($convertedValue[0] === '=') {
313  $convertedValue = substr($convertedValue, 1);
314  }
315  $spreadsheet->addDefinedName(DefinedName::createInstance($name, $spreadsheet->getActiveSheet(), $convertedValue, true));
316  }
317  }
318 
319  $columnID = 'A';
320  if (isset($worksheet->Table->Column)) {
321  foreach ($worksheet->Table->Column as $columnData) {
322  $columnData_ss = self::getAttributes($columnData, $namespaces['ss']);
323  if (isset($columnData_ss['Index'])) {
324  $columnID = Coordinate::stringFromColumnIndex((int) $columnData_ss['Index']);
325  }
326  if (isset($columnData_ss['Width'])) {
327  $columnWidth = $columnData_ss['Width'];
328  $spreadsheet->getActiveSheet()->getColumnDimension($columnID)->setWidth($columnWidth / 5.4);
329  }
330  ++$columnID;
331  }
332  }
333 
334  $rowID = 1;
335  if (isset($worksheet->Table->Row)) {
336  $additionalMergedCells = 0;
337  foreach ($worksheet->Table->Row as $rowData) {
338  $rowHasData = false;
339  $row_ss = self::getAttributes($rowData, $namespaces['ss']);
340  if (isset($row_ss['Index'])) {
341  $rowID = (int) $row_ss['Index'];
342  }
343 
344  $columnID = 'A';
345  foreach ($rowData->Cell as $cell) {
346  $cell_ss = self::getAttributes($cell, $namespaces['ss']);
347  if (isset($cell_ss['Index'])) {
348  $columnID = Coordinate::stringFromColumnIndex((int) $cell_ss['Index']);
349  }
350  $cellRange = $columnID . $rowID;
351 
352  if ($this->getReadFilter() !== null) {
353  if (!$this->getReadFilter()->readCell($columnID, $rowID, $worksheetName)) {
354  ++$columnID;
355 
356  continue;
357  }
358  }
359 
360  if (isset($cell_ss['HRef'])) {
361  $spreadsheet->getActiveSheet()->getCell($cellRange)->getHyperlink()->setUrl((string) $cell_ss['HRef']);
362  }
363 
364  if ((isset($cell_ss['MergeAcross'])) || (isset($cell_ss['MergeDown']))) {
365  $columnTo = $columnID;
366  if (isset($cell_ss['MergeAcross'])) {
367  $additionalMergedCells += (int) $cell_ss['MergeAcross'];
368  $columnTo = Coordinate::stringFromColumnIndex(Coordinate::columnIndexFromString($columnID) + $cell_ss['MergeAcross']);
369  }
370  $rowTo = $rowID;
371  if (isset($cell_ss['MergeDown'])) {
372  $rowTo = $rowTo + $cell_ss['MergeDown'];
373  }
374  $cellRange .= ':' . $columnTo . $rowTo;
375  $spreadsheet->getActiveSheet()->mergeCells($cellRange);
376  }
377 
378  $hasCalculatedValue = false;
379  $cellDataFormula = '';
380  if (isset($cell_ss['Formula'])) {
381  $cellDataFormula = $cell_ss['Formula'];
382  $hasCalculatedValue = true;
383  }
384  if (isset($cell->Data)) {
385  $cellData = $cell->Data;
386  $cellValue = (string) $cellData;
388  $cellData_ss = self::getAttributes($cellData, $namespaces['ss']);
389  if (isset($cellData_ss['Type'])) {
390  $cellDataType = $cellData_ss['Type'];
391  switch ($cellDataType) {
392  /*
393  const TYPE_STRING = 's';
394  const TYPE_FORMULA = 'f';
395  const TYPE_NUMERIC = 'n';
396  const TYPE_BOOL = 'b';
397  const TYPE_NULL = 'null';
398  const TYPE_INLINE = 'inlineStr';
399  const TYPE_ERROR = 'e';
400  */
401  case 'String':
403 
404  break;
405  case 'Number':
407  $cellValue = (float) $cellValue;
408  if (floor($cellValue) == $cellValue) {
409  $cellValue = (int) $cellValue;
410  }
411 
412  break;
413  case 'Boolean':
415  $cellValue = ($cellValue != 0);
416 
417  break;
418  case 'DateTime':
420  $dateTime = new DateTime($cellValue, new DateTimeZone('UTC'));
421  $cellValue = Date::PHPToExcel($dateTime);
422 
423  break;
424  case 'Error':
426  $hasCalculatedValue = false;
427 
428  break;
429  }
430  }
431 
432  if ($hasCalculatedValue) {
434  $columnNumber = Coordinate::columnIndexFromString($columnID);
435  $cellDataFormula = AddressHelper::convertFormulaToA1($cellDataFormula, $rowID, $columnNumber);
436  }
437 
438  $spreadsheet->getActiveSheet()->getCell($columnID . $rowID)->setValueExplicit((($hasCalculatedValue) ? $cellDataFormula : $cellValue), $type);
439  if ($hasCalculatedValue) {
440  $spreadsheet->getActiveSheet()->getCell($columnID . $rowID)->setCalculatedValue($cellValue);
441  }
442  $rowHasData = true;
443  }
444 
445  if (isset($cell->Comment)) {
446  $this->parseCellComment($cell->Comment, $namespaces, $spreadsheet, $columnID, $rowID);
447  }
448 
449  if (isset($cell_ss['StyleID'])) {
450  $style = (string) $cell_ss['StyleID'];
451  if ((isset($this->styles[$style])) && (!empty($this->styles[$style]))) {
452  //if (!$spreadsheet->getActiveSheet()->cellExists($columnID . $rowID)) {
453  // $spreadsheet->getActiveSheet()->getCell($columnID . $rowID)->setValue(null);
454  //}
455  $spreadsheet->getActiveSheet()->getStyle($cellRange)
456  ->applyFromArray($this->styles[$style]);
457  }
458  }
459  ++$columnID;
460  while ($additionalMergedCells > 0) {
461  ++$columnID;
462  --$additionalMergedCells;
463  }
464  }
465 
466  if ($rowHasData) {
467  if (isset($row_ss['Height'])) {
468  $rowHeight = $row_ss['Height'];
469  $spreadsheet->getActiveSheet()->getRowDimension($rowID)->setRowHeight((float) $rowHeight);
470  }
471  }
472 
473  ++$rowID;
474  }
475 
476  if (isset($namespaces['x'])) {
477  $xmlX = $worksheet->children($namespaces['x']);
478  if (isset($xmlX->WorksheetOptions)) {
479  (new PageSettings($xmlX, $namespaces))->loadPageSettings($spreadsheet);
480  }
481  }
482  }
483  ++$worksheetID;
484  }
485 
486  // Globally scoped defined names
487  $activeWorksheet = $spreadsheet->setActiveSheetIndex(0);
488  if (isset($xml->Names[0])) {
489  foreach ($xml->Names[0] as $definedName) {
490  $definedName_ss = self::getAttributes($definedName, $namespaces['ss']);
491  $name = (string) $definedName_ss['Name'];
492  $definedValue = (string) $definedName_ss['RefersTo'];
493  $convertedValue = AddressHelper::convertFormulaToA1($definedValue);
494  if ($convertedValue[0] === '=') {
495  $convertedValue = substr($convertedValue, 1);
496  }
497  $spreadsheet->addDefinedName(DefinedName::createInstance($name, $activeWorksheet, $convertedValue));
498  }
499  }
500 
501  // Return
502  return $spreadsheet;
503  }
504 
505  protected function parseCellComment(
507  array $namespaces,
508  Spreadsheet $spreadsheet,
509  string $columnID,
510  int $rowID
511  ): void {
512  $commentAttributes = $comment->attributes($namespaces['ss']);
513  $author = 'unknown';
514  if (isset($commentAttributes->Author)) {
515  $author = (string) $commentAttributes->Author;
516  }
517 
518  $node = $comment->Data->asXML();
519  $annotation = strip_tags((string) $node);
520  $spreadsheet->getActiveSheet()->getComment($columnID . $rowID)
521  ->setAuthor($author)
522  ->setText($this->parseRichText($annotation));
523  }
524 
525  protected function parseRichText(string $annotation): RichText
526  {
527  $value = new RichText();
528 
529  $value->createText($annotation);
530 
531  return $value;
532  }
533 
534  private static function getAttributes(?SimpleXMLElement $simple, string $node): SimpleXMLElement
535  {
536  return ($simple === null)
537  ? new SimpleXMLElement('<xml></xml>')
538  : ($simple->attributes($node) ?? new SimpleXMLElement('<xml></xml>'));
539  }
540 }
$style
Definition: example_012.php:70
static createInstance(string $name, ?Worksheet $worksheet=null, ?string $value=null, bool $localOnly=false, ?Worksheet $scope=null)
Create a new defined name, either a range or a formula.
Definition: DefinedName.php:84
$type
parseCellComment(SimpleXMLElement $comment, array $namespaces, Spreadsheet $spreadsheet, string $columnID, int $rowID)
Definition: Xml.php:505
parseRichText(string $annotation)
Definition: Xml.php:525
$valid
static convertFormulaToA1(string $formula, int $currentRowNumber=1, int $currentColumnNumber=1)
Converts a formula that uses R1C1/SpreadsheetXML format cell address to an A1 format cell address...
static getAttributes(?SimpleXMLElement $simple, string $node)
Definition: Xml.php:534
trySimpleXMLLoadString($pFilename)
Check if the file is a valid SimpleXML.
Definition: Xml.php:114
listWorksheetNames($filename)
Reads names of the worksheets from a file, without parsing the whole file to a Spreadsheet object...
Definition: Xml.php:137
listWorksheetInfo($filename)
Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns)...
Definition: Xml.php:169
Reader for SpreadsheetML, the XML schema for Microsoft Office Excel 2003.
Definition: Xml.php:26
static PHPToExcel($dateValue)
Convert a date from PHP to an MS Excel serialized date/time value.
Definition: Date.php:231
createSheet($sheetIndex=null)
Create sheet and add it to this workbook.
canRead($pFilename)
Can the current IReader read the file?
Definition: Xml.php:61
setActiveSheetIndex($pIndex)
Set active sheet index.
static getInstance(Reader\IReader $reader)
Definition: XmlScanner.php:39
load($filename)
Loads Spreadsheet from file.
Definition: Xml.php:243
$comment
Definition: buildRTE.php:83
static convertEncoding($value, $to, $from)
Convert string from one encoding to another.
$filename
Definition: buildRTE.php:89
__construct()
Create a new Excel2003XML Reader instance.
Definition: Xml.php:38
addDefinedName(DefinedName $definedName)
Add a defined name (either a named range or a named formula).
static getLibXmlLoaderOptions()
Get default options for libxml loader.
Definition: Settings.php:116
static assertFile($filename)
Assert that given path is an existing file and is readable, otherwise throw exception.
Definition: File.php:143
static columnIndexFromString($pString)
Column index from string.
Definition: Coordinate.php:265
static stringFromColumnIndex($columnIndex)
String from column index.
Definition: Coordinate.php:313
$data
Definition: bench.php:6