ILIAS  release_5-2 Revision v5.2.25-18-g3f80b828510
HTML.php
Go to the documentation of this file.
1<?php
2
29if (!defined('PHPEXCEL_ROOT')) {
33 define('PHPEXCEL_ROOT', dirname(__FILE__) . '/../../');
34 require(PHPEXCEL_ROOT . 'PHPExcel/Autoloader.php');
35}
36
45{
46
52 protected $_inputEncoding = 'ANSI';
53
59 protected $_sheetIndex = 0;
60
66 protected $_formats = array(
67 'h1' => array('font' => array('bold' => true,
68 'size' => 24,
69 ),
70 ), // Bold, 24pt
71 'h2' => array('font' => array('bold' => true,
72 'size' => 18,
73 ),
74 ), // Bold, 18pt
75 'h3' => array('font' => array('bold' => true,
76 'size' => 13.5,
77 ),
78 ), // Bold, 13.5pt
79 'h4' => array('font' => array('bold' => true,
80 'size' => 12,
81 ),
82 ), // Bold, 12pt
83 'h5' => array('font' => array('bold' => true,
84 'size' => 10,
85 ),
86 ), // Bold, 10pt
87 'h6' => array('font' => array('bold' => true,
88 'size' => 7.5,
89 ),
90 ), // Bold, 7.5pt
91 'a' => array('font' => array('underline' => true,
92 'color' => array('argb' => PHPExcel_Style_Color::COLOR_BLUE,
93 ),
94 ),
95 ), // Blue underlined
96 'hr' => array('borders' => array('bottom' => array('style' => PHPExcel_Style_Border::BORDER_THIN,
97 'color' => array(\PHPExcel_Style_Color::COLOR_BLACK,
98 ),
99 ),
100 ),
101 ), // Bottom border
102 );
103
104 protected $rowspan = array();
105
109 public function __construct()
110 {
111 $this->_readFilter = new PHPExcel_Reader_DefaultReadFilter();
112 }
113
119 protected function _isValidFormat()
120 {
121 // Reading 2048 bytes should be enough to validate that the format is HTML
122 $data = fread($this->_fileHandle, 2048);
123 if ((strpos($data, '<') !== FALSE) &&
124 (strlen($data) !== strlen(strip_tags($data)))) {
125 return TRUE;
126 }
127
128 return FALSE;
129 }
130
138 public function load($pFilename)
139 {
140 // Create new PHPExcel
141 $objPHPExcel = new PHPExcel();
142
143 // Load into this instance
144 return $this->loadIntoExisting($pFilename, $objPHPExcel);
145 }
146
152 public function setInputEncoding($pValue = 'ANSI')
153 {
154 $this->_inputEncoding = $pValue;
155
156 return $this;
157 }
158
164 public function getInputEncoding()
165 {
167 }
168
169 // Data Array used for testing only, should write to PHPExcel object on completion of tests
170 protected $_dataArray = array();
171 protected $_tableLevel = 0;
172 protected $_nestedColumn = array('A');
173
174 protected function _setTableStartColumn($column)
175 {
176 if ($this->_tableLevel == 0)
177 $column = 'A';
179 $this->_nestedColumn[$this->_tableLevel] = $column;
180
181 return $this->_nestedColumn[$this->_tableLevel];
182 }
183
184 protected function _getTableStartColumn()
185 {
186 return $this->_nestedColumn[$this->_tableLevel];
187 }
188
189 protected function _releaseTableStartColumn()
190 {
192
193 return array_pop($this->_nestedColumn);
194 }
195
196 protected function _flushCell($sheet, $column, $row, &$cellContent)
197 {
198 if (is_string($cellContent)) {
199 // Simple String content
200 if (trim($cellContent) > '') {
201 // Only actually write it if there's content in the string
202// echo 'FLUSH CELL: ' , $column , $row , ' => ' , $cellContent , '<br />';
203 // Write to worksheet to be done here...
204 // ... we return the cell so we can mess about with styles more easily
205 $sheet->setCellValue($column . $row, $cellContent, true);
206 $this->_dataArray[$row][$column] = $cellContent;
207 }
208 } else {
209 // We have a Rich Text run
210 // TODO
211 $this->_dataArray[$row][$column] = 'RICH TEXT: ' . $cellContent;
212 }
213 $cellContent = (string) '';
214 }
215
216 protected function _processDomElement(DOMNode $element, $sheet, &$row, &$column, &$cellContent, $format = null)
217 {
218 foreach ($element->childNodes as $child) {
219 if ($child instanceof DOMText) {
220 $domText = preg_replace('/\s+/u', ' ', trim($child->nodeValue));
221 if (is_string($cellContent)) {
222 // simply append the text if the cell content is a plain text string
223 $cellContent .= $domText;
224 } else {
225 // but if we have a rich text run instead, we need to append it correctly
226 // TODO
227 }
228 } elseif ($child instanceof DOMElement) {
229// echo '<b>DOM ELEMENT: </b>' , strtoupper($child->nodeName) , '<br />';
230
231 $attributeArray = array();
232 foreach ($child->attributes as $attribute) {
233// echo '<b>ATTRIBUTE: </b>' , $attribute->name , ' => ' , $attribute->value , '<br />';
234 $attributeArray[$attribute->name] = $attribute->value;
235 }
236
237 switch ($child->nodeName) {
238 case 'meta' :
239 foreach ($attributeArray as $attributeName => $attributeValue) {
240 switch ($attributeName) {
241 case 'content':
242 // TODO
243 // Extract character set, so we can convert to UTF-8 if required
244 break;
245 }
246 }
247 $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
248 break;
249 case 'title' :
250 $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
251 $sheet->setTitle($cellContent);
252 $cellContent = '';
253 break;
254 case 'span' :
255 case 'div' :
256 case 'font' :
257 case 'i' :
258 case 'em' :
259 case 'strong':
260 case 'b' :
261// echo 'STYLING, SPAN OR DIV<br />';
262 if ($cellContent > '')
263 $cellContent .= ' ';
264 $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
265 if ($cellContent > '')
266 $cellContent .= ' ';
267// echo 'END OF STYLING, SPAN OR DIV<br />';
268 break;
269 case 'hr' :
270 $this->_flushCell($sheet, $column, $row, $cellContent);
271 ++$row;
272 if (isset($this->_formats[$child->nodeName])) {
273 $sheet->getStyle($column . $row)->applyFromArray($this->_formats[$child->nodeName]);
274 } else {
275 $cellContent = '----------';
276 $this->_flushCell($sheet, $column, $row, $cellContent);
277 }
278 ++$row;
279 case 'br' :
280 if ($this->_tableLevel > 0) {
281 // If we're inside a table, replace with a \n
282 $cellContent .= "\n";
283 } else {
284 // Otherwise flush our existing content and move the row cursor on
285 $this->_flushCell($sheet, $column, $row, $cellContent);
286 ++$row;
287 }
288// echo 'HARD LINE BREAK: ' , '<br />';
289 break;
290 case 'a' :
291// echo 'START OF HYPERLINK: ' , '<br />';
292 foreach ($attributeArray as $attributeName => $attributeValue) {
293 switch ($attributeName) {
294 case 'href':
295// echo 'Link to ' , $attributeValue , '<br />';
296 $sheet->getCell($column . $row)->getHyperlink()->setUrl($attributeValue);
297 if (isset($this->_formats[$child->nodeName])) {
298 $sheet->getStyle($column . $row)->applyFromArray($this->_formats[$child->nodeName]);
299 }
300 break;
301 }
302 }
303 $cellContent .= ' ';
304 $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
305// echo 'END OF HYPERLINK:' , '<br />';
306 break;
307 case 'h1' :
308 case 'h2' :
309 case 'h3' :
310 case 'h4' :
311 case 'h5' :
312 case 'h6' :
313 case 'ol' :
314 case 'ul' :
315 case 'p' :
316 if ($this->_tableLevel > 0) {
317 // If we're inside a table, replace with a \n
318 $cellContent .= "\n";
319// echo 'LIST ENTRY: ' , '<br />';
320 $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
321// echo 'END OF LIST ENTRY:' , '<br />';
322 } else {
323 if ($cellContent > '') {
324 $this->_flushCell($sheet, $column, $row, $cellContent);
325 $row++;
326 }
327// echo 'START OF PARAGRAPH: ' , '<br />';
328 $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
329// echo 'END OF PARAGRAPH:' , '<br />';
330 $this->_flushCell($sheet, $column, $row, $cellContent);
331
332 if (isset($this->_formats[$child->nodeName])) {
333 $sheet->getStyle($column . $row)->applyFromArray($this->_formats[$child->nodeName]);
334 }
335
336 $row++;
337 $column = 'A';
338 }
339 break;
340 case 'li' :
341 if ($this->_tableLevel > 0) {
342 // If we're inside a table, replace with a \n
343 $cellContent .= "\n";
344// echo 'LIST ENTRY: ' , '<br />';
345 $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
346// echo 'END OF LIST ENTRY:' , '<br />';
347 } else {
348 if ($cellContent > '') {
349 $this->_flushCell($sheet, $column, $row, $cellContent);
350 }
351 ++$row;
352// echo 'LIST ENTRY: ' , '<br />';
353 $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
354// echo 'END OF LIST ENTRY:' , '<br />';
355 $this->_flushCell($sheet, $column, $row, $cellContent);
356 $column = 'A';
357 }
358 break;
359 case 'table' :
360 $this->_flushCell($sheet, $column, $row, $cellContent);
362// echo 'START OF TABLE LEVEL ' , $this->_tableLevel , '<br />';
363 if ($this->_tableLevel > 1)
364 --$row;
365 $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
366// echo 'END OF TABLE LEVEL ' , $this->_tableLevel , '<br />';
368 if ($this->_tableLevel > 1) {
369 ++$column;
370 } else {
371 ++$row;
372 }
373 break;
374 case 'thead' :
375 case 'tbody' :
376 $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
377 break;
378 case 'tr' :
379 $column = $this->_getTableStartColumn();
380 $cellContent = '';
381// echo 'START OF TABLE ' , $this->_tableLevel , ' ROW<br />';
382 $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
383 ++$row;
384// echo 'END OF TABLE ' , $this->_tableLevel , ' ROW<br />';
385 break;
386 case 'th' :
387 case 'td' :
388// echo 'START OF TABLE ' , $this->_tableLevel , ' CELL<br />';
389 $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
390// echo 'END OF TABLE ' , $this->_tableLevel , ' CELL<br />';
391
392 while (isset($this->rowspan[$column . $row])) {
393 ++$column;
394 }
395
396 $this->_flushCell($sheet, $column, $row, $cellContent);
397
398// if (isset($attributeArray['style']) && !empty($attributeArray['style'])) {
399// $styleAry = $this->getPhpExcelStyleArray($attributeArray['style']);
400//
401// if (!empty($styleAry)) {
402// $sheet->getStyle($column . $row)->applyFromArray($styleAry);
403// }
404// }
405
406 if (isset($attributeArray['rowspan']) && isset($attributeArray['colspan'])) {
407 //create merging rowspan and colspan
408 $columnTo = $column;
409 for ($i = 0; $i < $attributeArray['colspan'] - 1; $i++) {
410 ++$columnTo;
411 }
412 $range = $column . $row . ':' . $columnTo . ($row + $attributeArray['rowspan'] - 1);
413 foreach (\PHPExcel_Cell::extractAllCellReferencesInRange($range) as $value) {
414 $this->rowspan[$value] = true;
415 }
416 $sheet->mergeCells($range);
417 $column = $columnTo;
418 } elseif (isset($attributeArray['rowspan'])) {
419 //create merging rowspan
420 $range = $column . $row . ':' . $column . ($row + $attributeArray['rowspan'] - 1);
421 foreach (\PHPExcel_Cell::extractAllCellReferencesInRange($range) as $value) {
422 $this->rowspan[$value] = true;
423 }
424 $sheet->mergeCells($range);
425 } elseif (isset($attributeArray['colspan'])) {
426 //create merging colspan
427 $columnTo = $column;
428 for ($i = 0; $i < $attributeArray['colspan'] - 1; $i++) {
429 ++$columnTo;
430 }
431 $sheet->mergeCells($column . $row . ':' . $columnTo . $row);
432 $column = $columnTo;
433 }
434 ++$column;
435 break;
436 case 'body' :
437 $row = 1;
438 $column = 'A';
439 $content = '';
440 $this->_tableLevel = 0;
441 $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
442 break;
443 default:
444 $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
445 }
446 }
447 }
448 }
449
458 public function loadIntoExisting($pFilename, PHPExcel $objPHPExcel)
459 {
460 // Open file to validate
461 $this->_openFile($pFilename);
462 if (!$this->_isValidFormat()) {
463 fclose($this->_fileHandle);
464 throw new PHPExcel_Reader_Exception($pFilename . " is an Invalid HTML file.");
465 }
466 // Close after validating
467 fclose($this->_fileHandle);
468
469 // Create new PHPExcel
470 while ($objPHPExcel->getSheetCount() <= $this->_sheetIndex) {
471 $objPHPExcel->createSheet();
472 }
473 $objPHPExcel->setActiveSheetIndex($this->_sheetIndex);
474
475 // Create a new DOM object
476 $dom = new domDocument;
477 // Reload the HTML file into the DOM object
478 $loaded = $dom->loadHTML($this->securityScanFile($pFilename));
479 if ($loaded === FALSE) {
480 throw new PHPExcel_Reader_Exception('Failed to load ', $pFilename, ' as a DOM Document');
481 }
482
483 // Discard white space
484 $dom->preserveWhiteSpace = false;
485
486 $row = 0;
487 $column = 'A';
488 $content = '';
489 $this->_processDomElement($dom, $objPHPExcel->getActiveSheet(), $row, $column, $content);
490
491 // Return
492 return $objPHPExcel;
493 }
494
500 public function getSheetIndex()
501 {
502 return $this->_sheetIndex;
503 }
504
511 public function setSheetIndex($pValue = 0)
512 {
513 $this->_sheetIndex = $pValue;
514
515 return $this;
516 }
517
524 public function securityScan($xml)
525 {
526 $pattern = '/\\0?' . implode('\\0?', str_split('<!ENTITY')) . '\\0?/';
527 if (preg_match($pattern, $xml)) {
528 throw new PHPExcel_Reader_Exception('Detected use of ENTITY in XML, spreadsheet file load() aborted to prevent XXE/XEE attacks');
529 }
530 return $xml;
531 }
532
533}
534
$objPHPExcel
$column
Definition: 39dropdown.php:62
An exception for terminatinating execution or to throw for unit testing.
static extractAllCellReferencesInRange($pRange='A1')
Extract all cell references in range.
Definition: Cell.php:854
securityScanFile($filestream)
Scan theXML for use of <!ENTITY to prevent XXE/XEE attacks.
Definition: Abstract.php:251
_openFile($pFilename)
Open file for reading.
Definition: Abstract.php:195
_isValidFormat()
Validate that the current file is an HTML file.
Definition: HTML.php:119
load($pFilename)
Loads PHPExcel from file.
Definition: HTML.php:138
securityScan($xml)
Scan theXML for use of <!ENTITY to prevent XXE/XEE attacks.
Definition: HTML.php:524
loadIntoExisting($pFilename, PHPExcel $objPHPExcel)
Loads PHPExcel from file into PHPExcel instance.
Definition: HTML.php:458
_processDomElement(DOMNode $element, $sheet, &$row, &$column, &$cellContent, $format=null)
Definition: HTML.php:216
setSheetIndex($pValue=0)
Set sheet index.
Definition: HTML.php:511
__construct()
Create a new PHPExcel_Reader_HTML.
Definition: HTML.php:109
setInputEncoding($pValue='ANSI')
Set input encoding.
Definition: HTML.php:152
_setTableStartColumn($column)
Definition: HTML.php:174
getSheetIndex()
Get sheet index.
Definition: HTML.php:500
_releaseTableStartColumn()
Definition: HTML.php:189
_flushCell($sheet, $column, $row, &$cellContent)
Definition: HTML.php:196
getInputEncoding()
Get input encoding.
Definition: HTML.php:164
defined( 'APPLICATION_ENV')||define( 'APPLICATION_ENV'
Definition: bootstrap.php:27