ILIAS  release_5-4 Revision v5.4.26-12-gabc799a52e6
Gnumeric.php
Go to the documentation of this file.
1 <?php
2 
4 
19 use XMLReader;
20 
21 class Gnumeric extends BaseReader
22 {
23  const NAMESPACE_GNM = 'http://www.gnumeric.org/v10.dtd'; // gmr in old sheets
24 
25  const NAMESPACE_XSI = 'http://www.w3.org/2001/XMLSchema-instance';
26 
27  const NAMESPACE_OFFICE = 'urn:oasis:names:tc:opendocument:xmlns:office:1.0';
28 
29  const NAMESPACE_XLINK = 'http://www.w3.org/1999/xlink';
30 
31  const NAMESPACE_DC = 'http://purl.org/dc/elements/1.1/';
32 
33  const NAMESPACE_META = 'urn:oasis:names:tc:opendocument:xmlns:meta:1.0';
34 
35  const NAMESPACE_OOO = 'http://openoffice.org/2004/office';
36 
42  private $expressions = [];
43 
49  private $spreadsheet;
50 
53 
55  public static $mappings = [
56  'dataType' => [
57  '10' => DataType::TYPE_NULL,
58  '20' => DataType::TYPE_BOOL,
59  '30' => DataType::TYPE_NUMERIC, // Integer doesn't exist in Excel
60  '40' => DataType::TYPE_NUMERIC, // Float
61  '50' => DataType::TYPE_ERROR,
62  '60' => DataType::TYPE_STRING,
63  //'70': // Cell Range
64  //'80': // Array
65  ],
66  ];
67 
71  public function __construct()
72  {
73  parent::__construct();
74  $this->referenceHelper = ReferenceHelper::getInstance();
75  $this->securityScanner = XmlScanner::getInstance($this);
76  }
77 
85  public function canRead($pFilename)
86  {
87  File::assertFile($pFilename);
88 
89  // Check if gzlib functions are available
90  $data = '';
91  if (function_exists('gzread')) {
92  // Read signature data (first 3 bytes)
93  $fh = fopen($pFilename, 'rb');
94  if ($fh !== false) {
95  $data = fread($fh, 2);
96  fclose($fh);
97  }
98  }
99 
100  return $data === chr(0x1F) . chr(0x8B);
101  }
102 
103  private static function matchXml(XMLReader $xml, string $expectedLocalName): bool
104  {
105  return $xml->namespaceURI === self::NAMESPACE_GNM
106  && $xml->localName === $expectedLocalName
107  && $xml->nodeType === XMLReader::ELEMENT;
108  }
109 
117  public function listWorksheetNames($pFilename)
118  {
119  File::assertFile($pFilename);
120 
121  $xml = new XMLReader();
122  $xml->xml($this->securityScanner->scanFile('compress.zlib://' . realpath($pFilename)), null, Settings::getLibXmlLoaderOptions());
123  $xml->setParserProperty(2, true);
124 
125  $worksheetNames = [];
126  while ($xml->read()) {
127  if (self::matchXml($xml, 'SheetName')) {
128  $xml->read(); // Move onto the value node
129  $worksheetNames[] = (string) $xml->value;
130  } elseif (self::matchXml($xml, 'Sheets')) {
131  // break out of the loop once we've got our sheet names rather than parse the entire file
132  break;
133  }
134  }
135 
136  return $worksheetNames;
137  }
138 
146  public function listWorksheetInfo($pFilename)
147  {
148  File::assertFile($pFilename);
149 
150  $xml = new XMLReader();
151  $xml->xml($this->securityScanner->scanFile('compress.zlib://' . realpath($pFilename)), null, Settings::getLibXmlLoaderOptions());
152  $xml->setParserProperty(2, true);
153 
154  $worksheetInfo = [];
155  while ($xml->read()) {
156  if (self::matchXml($xml, 'Sheet')) {
157  $tmpInfo = [
158  'worksheetName' => '',
159  'lastColumnLetter' => 'A',
160  'lastColumnIndex' => 0,
161  'totalRows' => 0,
162  'totalColumns' => 0,
163  ];
164 
165  while ($xml->read()) {
166  if (self::matchXml($xml, 'Name')) {
167  $xml->read(); // Move onto the value node
168  $tmpInfo['worksheetName'] = (string) $xml->value;
169  } elseif (self::matchXml($xml, 'MaxCol')) {
170  $xml->read(); // Move onto the value node
171  $tmpInfo['lastColumnIndex'] = (int) $xml->value;
172  $tmpInfo['totalColumns'] = (int) $xml->value + 1;
173  } elseif (self::matchXml($xml, 'MaxRow')) {
174  $xml->read(); // Move onto the value node
175  $tmpInfo['totalRows'] = (int) $xml->value + 1;
176 
177  break;
178  }
179  }
180  $tmpInfo['lastColumnLetter'] = Coordinate::stringFromColumnIndex($tmpInfo['lastColumnIndex'] + 1);
181  $worksheetInfo[] = $tmpInfo;
182  }
183  }
184 
185  return $worksheetInfo;
186  }
187 
193  private function gzfileGetContents($filename)
194  {
195  $file = @gzopen($filename, 'rb');
196  $data = '';
197  if ($file !== false) {
198  while (!gzeof($file)) {
199  $data .= gzread($file, 1024);
200  }
201  gzclose($file);
202  }
203 
204  return $data;
205  }
206 
207  public static function gnumericMappings(): array
208  {
209  return array_merge(self::$mappings, Styles::$mappings);
210  }
211 
212  private function processComments(SimpleXMLElement $sheet): void
213  {
214  if ((!$this->readDataOnly) && (isset($sheet->Objects))) {
215  foreach ($sheet->Objects->children(self::NAMESPACE_GNM) as $key => $comment) {
216  $commentAttributes = $comment->attributes();
217  // Only comment objects are handled at the moment
218  if ($commentAttributes->Text) {
219  $this->spreadsheet->getActiveSheet()->getComment((string) $commentAttributes->ObjectBound)
220  ->setAuthor((string) $commentAttributes->Author)
221  ->setText($this->parseRichText((string) $commentAttributes->Text));
222  }
223  }
224  }
225  }
226 
230  private static function testSimpleXml($value): SimpleXMLElement
231  {
232  return ($value instanceof SimpleXMLElement) ? $value : new SimpleXMLElement('<?xml version="1.0" encoding="UTF-8"?><root></root>');
233  }
234 
242  public function load($pFilename)
243  {
244  // Create new Spreadsheet
245  $spreadsheet = new Spreadsheet();
246  $spreadsheet->removeSheetByIndex(0);
247 
248  // Load into this instance
249  return $this->loadIntoExisting($pFilename, $spreadsheet);
250  }
251 
255  public function loadIntoExisting(string $pFilename, Spreadsheet $spreadsheet): Spreadsheet
256  {
257  $this->spreadsheet = $spreadsheet;
258  File::assertFile($pFilename);
259 
260  $gFileData = $this->gzfileGetContents($pFilename);
261 
262  $xml2 = simplexml_load_string($this->securityScanner->scan($gFileData), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions());
263  $xml = self::testSimpleXml($xml2);
264 
265  $gnmXML = $xml->children(self::NAMESPACE_GNM);
266  (new Properties($this->spreadsheet))->readProperties($xml, $gnmXML);
267 
268  $worksheetID = 0;
269  foreach ($gnmXML->Sheets->Sheet as $sheet) {
270  $worksheetName = (string) $sheet->Name;
271  if ((isset($this->loadSheetsOnly)) && (!in_array($worksheetName, $this->loadSheetsOnly))) {
272  continue;
273  }
274 
275  $maxRow = $maxCol = 0;
276 
277  // Create new Worksheet
278  $this->spreadsheet->createSheet();
279  $this->spreadsheet->setActiveSheetIndex($worksheetID);
280  // Use false for $updateFormulaCellReferences to prevent adjustment of worksheet references in formula
281  // cells... during the load, all formulae should be correct, and we're simply bringing the worksheet
282  // name in line with the formula, not the reverse
283  $this->spreadsheet->getActiveSheet()->setTitle($worksheetName, false, false);
284 
285  if (!$this->readDataOnly) {
286  (new PageSetup($this->spreadsheet))
287  ->printInformation($sheet)
288  ->sheetMargins($sheet);
289  }
290 
291  foreach ($sheet->Cells->Cell as $cell) {
292  $cellAttributes = $cell->attributes();
293  $row = (int) $cellAttributes->Row + 1;
294  $column = (int) $cellAttributes->Col;
295 
296  if ($row > $maxRow) {
297  $maxRow = $row;
298  }
299  if ($column > $maxCol) {
300  $maxCol = $column;
301  }
302 
303  $column = Coordinate::stringFromColumnIndex($column + 1);
304 
305  // Read cell?
306  if ($this->getReadFilter() !== null) {
307  if (!$this->getReadFilter()->readCell($column, $row, $worksheetName)) {
308  continue;
309  }
310  }
311 
312  $ValueType = $cellAttributes->ValueType;
313  $ExprID = (string) $cellAttributes->ExprID;
315  if ($ExprID > '') {
316  if (((string) $cell) > '') {
317  $this->expressions[$ExprID] = [
318  'column' => $cellAttributes->Col,
319  'row' => $cellAttributes->Row,
320  'formula' => (string) $cell,
321  ];
322  } else {
323  $expression = $this->expressions[$ExprID];
324 
325  $cell = $this->referenceHelper->updateFormulaReferences(
326  $expression['formula'],
327  'A1',
328  $cellAttributes->Col - $expression['column'],
329  $cellAttributes->Row - $expression['row'],
330  $worksheetName
331  );
332  }
334  } else {
335  $vtype = (string) $ValueType;
336  if (array_key_exists($vtype, self::$mappings['dataType'])) {
337  $type = self::$mappings['dataType'][$vtype];
338  }
339  if ($vtype === '20') { // Boolean
340  $cell = $cell == 'TRUE';
341  }
342  }
343  $this->spreadsheet->getActiveSheet()->getCell($column . $row)->setValueExplicit((string) $cell, $type);
344  }
345 
346  if ($sheet->Styles !== null) {
347  (new Styles($this->spreadsheet, $this->readDataOnly))->read($sheet, $maxRow, $maxCol);
348  }
349 
350  $this->processComments($sheet);
351  $this->processColumnWidths($sheet, $maxCol);
352  $this->processRowHeights($sheet, $maxRow);
353  $this->processMergedCells($sheet);
354  $this->processAutofilter($sheet);
355 
356  ++$worksheetID;
357  }
358 
359  $this->processDefinedNames($gnmXML);
360 
361  // Return
362  return $this->spreadsheet;
363  }
364 
365  private function processMergedCells(?SimpleXMLElement $sheet): void
366  {
367  // Handle Merged Cells in this worksheet
368  if ($sheet !== null && isset($sheet->MergedRegions)) {
369  foreach ($sheet->MergedRegions->Merge as $mergeCells) {
370  if (strpos($mergeCells, ':') !== false) {
371  $this->spreadsheet->getActiveSheet()->mergeCells($mergeCells);
372  }
373  }
374  }
375  }
376 
377  private function processAutofilter(?SimpleXMLElement $sheet): void
378  {
379  if ($sheet !== null && isset($sheet->Filters)) {
380  foreach ($sheet->Filters->Filter as $autofilter) {
381  if ($autofilter !== null) {
382  $attributes = $autofilter->attributes();
383  if (isset($attributes['Area'])) {
384  $this->spreadsheet->getActiveSheet()->setAutoFilter((string) $attributes['Area']);
385  }
386  }
387  }
388  }
389  }
390 
391  private function setColumnWidth(int $whichColumn, float $defaultWidth): void
392  {
393  $columnDimension = $this->spreadsheet->getActiveSheet()->getColumnDimension(Coordinate::stringFromColumnIndex($whichColumn + 1));
394  if ($columnDimension !== null) {
395  $columnDimension->setWidth($defaultWidth);
396  }
397  }
398 
399  private function setColumnInvisible(int $whichColumn): void
400  {
401  $columnDimension = $this->spreadsheet->getActiveSheet()->getColumnDimension(Coordinate::stringFromColumnIndex($whichColumn + 1));
402  if ($columnDimension !== null) {
403  $columnDimension->setVisible(false);
404  }
405  }
406 
407  private function processColumnLoop(int $whichColumn, int $maxCol, SimpleXMLElement $columnOverride, float $defaultWidth): int
408  {
409  $columnAttributes = self::testSimpleXml($columnOverride->attributes());
410  $column = $columnAttributes['No'];
411  $columnWidth = ((float) $columnAttributes['Unit']) / 5.4;
412  $hidden = (isset($columnAttributes['Hidden'])) && ((string) $columnAttributes['Hidden'] == '1');
413  $columnCount = (int) ($columnAttributes['Count'] ?? 1);
414  while ($whichColumn < $column) {
415  $this->setColumnWidth($whichColumn, $defaultWidth);
416  ++$whichColumn;
417  }
418  while (($whichColumn < ($column + $columnCount)) && ($whichColumn <= $maxCol)) {
419  $this->setColumnWidth($whichColumn, $columnWidth);
420  if ($hidden) {
421  $this->setColumnInvisible($whichColumn);
422  }
423  ++$whichColumn;
424  }
425 
426  return $whichColumn;
427  }
428 
429  private function processColumnWidths(?SimpleXMLElement $sheet, int $maxCol): void
430  {
431  if ((!$this->readDataOnly) && $sheet !== null && (isset($sheet->Cols))) {
432  // Column Widths
433  $defaultWidth = 0;
434  $columnAttributes = $sheet->Cols->attributes();
435  if ($columnAttributes !== null) {
436  $defaultWidth = $columnAttributes['DefaultSizePts'] / 5.4;
437  }
438  $whichColumn = 0;
439  foreach ($sheet->Cols->ColInfo as $columnOverride) {
440  $whichColumn = $this->processColumnLoop($whichColumn, $maxCol, $columnOverride, $defaultWidth);
441  }
442  while ($whichColumn <= $maxCol) {
443  $this->setColumnWidth($whichColumn, $defaultWidth);
444  ++$whichColumn;
445  }
446  }
447  }
448 
449  private function setRowHeight(int $whichRow, float $defaultHeight): void
450  {
451  $rowDimension = $this->spreadsheet->getActiveSheet()->getRowDimension($whichRow);
452  if ($rowDimension !== null) {
453  $rowDimension->setRowHeight($defaultHeight);
454  }
455  }
456 
457  private function setRowInvisible(int $whichRow): void
458  {
459  $rowDimension = $this->spreadsheet->getActiveSheet()->getRowDimension($whichRow);
460  if ($rowDimension !== null) {
461  $rowDimension->setVisible(false);
462  }
463  }
464 
465  private function processRowLoop(int $whichRow, int $maxRow, SimpleXMLElement $rowOverride, float $defaultHeight): int
466  {
467  $rowAttributes = self::testSimpleXml($rowOverride->attributes());
468  $row = $rowAttributes['No'];
469  $rowHeight = (float) $rowAttributes['Unit'];
470  $hidden = (isset($rowAttributes['Hidden'])) && ((string) $rowAttributes['Hidden'] == '1');
471  $rowCount = (int) ($rowAttributes['Count'] ?? 1);
472  while ($whichRow < $row) {
473  ++$whichRow;
474  $this->setRowHeight($whichRow, $defaultHeight);
475  }
476  while (($whichRow < ($row + $rowCount)) && ($whichRow < $maxRow)) {
477  ++$whichRow;
478  $this->setRowHeight($whichRow, $rowHeight);
479  if ($hidden) {
480  $this->setRowInvisible($whichRow);
481  }
482  }
483 
484  return $whichRow;
485  }
486 
487  private function processRowHeights(?SimpleXMLElement $sheet, int $maxRow): void
488  {
489  if ((!$this->readDataOnly) && $sheet !== null && (isset($sheet->Rows))) {
490  // Row Heights
491  $defaultHeight = 0;
492  $rowAttributes = $sheet->Rows->attributes();
493  if ($rowAttributes !== null) {
494  $defaultHeight = (float) $rowAttributes['DefaultSizePts'];
495  }
496  $whichRow = 0;
497 
498  foreach ($sheet->Rows->RowInfo as $rowOverride) {
499  $whichRow = $this->processRowLoop($whichRow, $maxRow, $rowOverride, $defaultHeight);
500  }
501  // never executed, I can't figure out any circumstances
502  // under which it would be executed, and, even if
503  // such exist, I'm not convinced this is needed.
504  //while ($whichRow < $maxRow) {
505  // ++$whichRow;
506  // $this->spreadsheet->getActiveSheet()->getRowDimension($whichRow)->setRowHeight($defaultHeight);
507  //}
508  }
509  }
510 
511  private function processDefinedNames(?SimpleXMLElement $gnmXML): void
512  {
513  // Loop through definedNames (global named ranges)
514  if ($gnmXML !== null && isset($gnmXML->Names)) {
515  foreach ($gnmXML->Names->Name as $definedName) {
516  $name = (string) $definedName->name;
517  $value = (string) $definedName->value;
518  if (stripos($value, '#REF!') !== false) {
519  continue;
520  }
521 
522  [$worksheetName] = Worksheet::extractSheetTitle($value, true);
523  $worksheetName = trim($worksheetName, "'");
524  $worksheet = $this->spreadsheet->getSheetByName($worksheetName);
525  // Worksheet might still be null if we're only loading selected sheets rather than the full spreadsheet
526  if ($worksheet !== null) {
527  $this->spreadsheet->addDefinedName(DefinedName::createInstance($name, $worksheet, $value));
528  }
529  }
530  }
531  }
532 
533  private function parseRichText(string $is): RichText
534  {
535  $value = new RichText();
536  $value->createText($is);
537 
538  return $value;
539  }
540 }
setColumnWidth(int $whichColumn, float $defaultWidth)
Definition: Gnumeric.php:391
static matchXml(XMLReader $xml, string $expectedLocalName)
Definition: Gnumeric.php:103
static createInstance(string $name, ?Worksheet $worksheet=null, ?string $value=null, bool $localOnly=false, ?Worksheet $scope=null)
Create a new defined name, either a range or a formula.
Definition: DefinedName.php:84
processDefinedNames(?SimpleXMLElement $gnmXML)
Definition: Gnumeric.php:511
if(! $row) $columnCount
listWorksheetInfo($pFilename)
Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns)...
Definition: Gnumeric.php:146
$type
static extractSheetTitle($pRange, $returnRange=false)
Extract worksheet title from range.
Definition: Worksheet.php:2719
processRowHeights(?SimpleXMLElement $sheet, int $maxRow)
Definition: Gnumeric.php:487
setRowHeight(int $whichRow, float $defaultHeight)
Definition: Gnumeric.php:449
processColumnWidths(?SimpleXMLElement $sheet, int $maxCol)
Definition: Gnumeric.php:429
load($pFilename)
Loads Spreadsheet from file.
Definition: Gnumeric.php:242
loadIntoExisting(string $pFilename, Spreadsheet $spreadsheet)
Loads from file into Spreadsheet instance.
Definition: Gnumeric.php:255
if(!file_exists(getcwd() . '/ilias.ini.php'))
registration confirmation script for ilias
Definition: confirmReg.php:12
static getInstance(Reader\IReader $reader)
Definition: XmlScanner.php:39
processComments(SimpleXMLElement $sheet)
Definition: Gnumeric.php:212
processColumnLoop(int $whichColumn, int $maxCol, SimpleXMLElement $columnOverride, float $defaultWidth)
Definition: Gnumeric.php:407
listWorksheetNames($pFilename)
Reads names of the worksheets from a file, without parsing the whole file to a Spreadsheet object...
Definition: Gnumeric.php:117
static getInstance()
Get an instance of this class.
if(array_key_exists('yes', $_REQUEST)) $attributes
Definition: getconsent.php:85
processRowLoop(int $whichRow, int $maxRow, SimpleXMLElement $rowOverride, float $defaultHeight)
Definition: Gnumeric.php:465
$comment
Definition: buildRTE.php:83
$filename
Definition: buildRTE.php:89
$row
static __construct()
Create a new Gnumeric.
Definition: Gnumeric.php:71
processAutofilter(?SimpleXMLElement $sheet)
Definition: Gnumeric.php:377
static getLibXmlLoaderOptions()
Get default options for libxml loader.
Definition: Settings.php:116
processMergedCells(?SimpleXMLElement $sheet)
Definition: Gnumeric.php:365
canRead($pFilename)
Can the current IReader read the file?
Definition: Gnumeric.php:85
static assertFile($filename)
Assert that given path is an existing file and is readable, otherwise throw exception.
Definition: File.php:143
$key
Definition: croninfo.php:18
static stringFromColumnIndex($columnIndex)
String from column index.
Definition: Coordinate.php:313
$data
Definition: bench.php:6