132 parent::__construct();
153 $startWithTag = self::startsWithTag($beginning);
154 $containsTags = self::containsTags($beginning);
155 $endsWithTag = self::endsWithTag($this->
readEnding());
157 fclose($this->fileHandle);
159 return $startWithTag && $containsTags && $endsWithTag;
164 fseek($this->fileHandle, 0);
166 return fread($this->fileHandle, self::TEST_SAMPLE_SIZE);
171 $meta = stream_get_meta_data($this->fileHandle);
179 $blockSize = self::TEST_SAMPLE_SIZE;
180 if (
$size < $blockSize) {
184 fseek($this->fileHandle,
$size - $blockSize);
186 return fread($this->fileHandle, $blockSize);
191 return '<' === substr(trim(
$data), 0, 1);
196 return '>' === substr(trim(
$data), -1, 1);
201 return strlen(
$data) !== strlen(strip_tags(
$data));
211 public function load($pFilename)
233 $this->inputEncoding = $pValue;
261 if ($this->tableLevel == 0) {
279 return array_pop($this->nestedColumn);
284 if (is_string($cellContent)) {
286 if (trim($cellContent) >
'') {
291 $this->dataArray[
$row][$column] = $cellContent;
296 $this->dataArray[
$row][$column] =
'RICH TEXT: ' . $cellContent;
298 $cellContent = (string)
'';
303 $attributeArray = [];
304 foreach ($child->attributes as $attribute) {
305 $attributeArray[$attribute->name] = $attribute->value;
308 if ($child->nodeName ===
'body') {
312 $this->tableLevel = 0;
321 if ($child->nodeName ===
'title') {
323 $sheet->
setTitle($cellContent,
true,
true);
330 private static $spanEtc = [
'span',
'div',
'font',
'i',
'em',
'strong',
'b'];
334 if (in_array($child->nodeName, self::$spanEtc)) {
335 if (isset($attributeArray[
'class']) && $attributeArray[
'class'] ===
'comment') {
338 ->createTextRun($child->textContent);
342 if (isset($this->formats[$child->nodeName])) {
343 $sheet->
getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
352 if ($child->nodeName ===
'hr') {
353 $this->
flushCell($sheet, $column, $row, $cellContent);
355 if (isset($this->formats[$child->nodeName])) {
356 $sheet->
getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
366 if ($child->nodeName ===
'br' || $child->nodeName ===
'hr') {
367 if ($this->tableLevel > 0) {
369 $cellContent .=
"\n";
370 $sheet->
getStyle($column . $row)->getAlignment()->setWrapText(
true);
373 $this->
flushCell($sheet, $column, $row, $cellContent);
377 $this->
processDomElementA($sheet, $row, $column, $cellContent, $child, $attributeArray);
383 if ($child->nodeName ===
'a') {
384 foreach ($attributeArray as $attributeName => $attributeValue) {
385 switch ($attributeName) {
387 $sheet->getCell($column . $row)->
getHyperlink()->setUrl($attributeValue);
388 if (isset($this->formats[$child->nodeName])) {
389 $sheet->
getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
394 if ($attributeValue ===
'comment-indicator') {
407 private static $h1Etc = [
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'ol',
'ul',
'p'];
411 if (in_array($child->nodeName, self::$h1Etc)) {
412 if ($this->tableLevel > 0) {
414 $cellContent .= $cellContent ?
"\n" :
'';
415 $sheet->
getStyle($column . $row)->getAlignment()->setWrapText(
true);
418 if ($cellContent >
'') {
419 $this->
flushCell($sheet, $column, $row, $cellContent);
423 $this->
flushCell($sheet, $column, $row, $cellContent);
425 if (isset($this->formats[$child->nodeName])) {
426 $sheet->
getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
439 if ($child->nodeName ===
'li') {
440 if ($this->tableLevel > 0) {
442 $cellContent .= $cellContent ?
"\n" :
'';
445 if ($cellContent >
'') {
446 $this->
flushCell($sheet, $column, $row, $cellContent);
450 $this->
flushCell($sheet, $column, $row, $cellContent);
460 if ($child->nodeName ===
'img') {
461 $this->
insertImage($sheet, $column, $row, $attributeArray);
469 if ($child->nodeName ===
'table') {
470 $this->
flushCell($sheet, $column, $row, $cellContent);
472 if ($this->tableLevel > 1 && $row > 1) {
477 if ($this->tableLevel > 1) {
489 if ($child->nodeName ===
'tr') {
494 if (isset($attributeArray[
'height'])) {
495 $sheet->
getRowDimension($row)->setRowHeight($attributeArray[
'height']);
506 if ($child->nodeName !==
'td' && $child->nodeName !==
'th') {
515 if (isset($attributeArray[
'bgcolor'])) {
516 $sheet->
getStyle(
"$column$row")->applyFromArray(
520 'color' => [
'rgb' => $this->
getStyleColor($attributeArray[
'bgcolor'])],
529 if (isset($attributeArray[
'width'])) {
536 if (isset($attributeArray[
'height'])) {
537 $sheet->
getRowDimension($row)->setRowHeight($attributeArray[
'height']);
543 if (isset($attributeArray[
'align'])) {
544 $sheet->
getStyle($column . $row)->getAlignment()->setHorizontal($attributeArray[
'align']);
550 if (isset($attributeArray[
'valign'])) {
551 $sheet->
getStyle($column . $row)->getAlignment()->setVertical($attributeArray[
'valign']);
557 if (isset($attributeArray[
'data-format'])) {
558 $sheet->
getStyle($column . $row)->getNumberFormat()->setFormatCode($attributeArray[
'data-format']);
564 while (isset($this->rowspan[$column . $row])) {
572 $this->
flushCell($sheet, $column, $row, $cellContent);
581 if (isset($attributeArray[
'rowspan'], $attributeArray[
'colspan'])) {
584 for (
$i = 0;
$i < (int) $attributeArray[
'colspan'] - 1; ++
$i) {
587 $range = $column . $row .
':' . $columnTo . ($row + (int) $attributeArray[
'rowspan'] - 1);
589 $this->rowspan[$value] =
true;
593 } elseif (isset($attributeArray[
'rowspan'])) {
595 $range = $column . $row .
':' . $column . ($row + (int) $attributeArray[
'rowspan'] - 1);
597 $this->rowspan[$value] =
true;
600 } elseif (isset($attributeArray[
'colspan'])) {
603 for (
$i = 0;
$i < (int) $attributeArray[
'colspan'] - 1; ++
$i) {
606 $sheet->
mergeCells($column . $row .
':' . $columnTo . $row);
615 foreach ($element->childNodes as $child) {
616 if ($child instanceof
DOMText) {
617 $domText = preg_replace(
'/\s+/u',
' ', trim($child->nodeValue));
618 if (is_string($cellContent)) {
620 $cellContent .= $domText;
640 if (!$this->
canRead($pFilename)) {
641 throw new Exception($pFilename .
' is an Invalid HTML file.');
648 $loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanner->scanFile($pFilename),
'HTML-ENTITIES',
'UTF-8'));
652 if ($loaded ===
false) {
653 throw new Exception(
'Failed to load ' . $pFilename .
' as a DOM Document', 0, $e ?? null);
670 $loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanner->scan($content),
'HTML-ENTITIES',
'UTF-8'));
674 if ($loaded ===
false) {
675 throw new Exception(
'Failed to load content as a DOM Document', 0, $e ?? null);
692 $document->preserveWhiteSpace =
false;
723 $this->sheetIndex = $pValue;
745 if (!isset($attributeArray[
'style'])) {
749 if (isset($attributeArray[
'rowspan'], $attributeArray[
'colspan'])) {
751 for (
$i = 0;
$i < (int) $attributeArray[
'colspan'] - 1; ++
$i) {
754 $range = $column .
$row .
':' . $columnTo . (
$row + (int) $attributeArray[
'rowspan'] - 1);
755 $cellStyle = $sheet->getStyle($range);
756 } elseif (isset($attributeArray[
'rowspan'])) {
757 $range = $column .
$row .
':' . $column . (
$row + (int) $attributeArray[
'rowspan'] - 1);
758 $cellStyle = $sheet->getStyle($range);
759 } elseif (isset($attributeArray[
'colspan'])) {
761 for (
$i = 0;
$i < (int) $attributeArray[
'colspan'] - 1; ++
$i) {
764 $range = $column .
$row .
':' . $columnTo .
$row;
765 $cellStyle = $sheet->getStyle($range);
767 $cellStyle = $sheet->getStyle($column .
$row);
771 $styles = explode(
';', $attributeArray[
'style']);
772 foreach ($styles as $st) {
773 $value = explode(
':', $st);
774 $styleName = isset($value[0]) ? trim($value[0]) : null;
775 $styleValue = isset($value[1]) ? trim($value[1]) : null;
781 switch ($styleName) {
783 case 'background-color':
790 $cellStyle->applyFromArray([
'fill' => [
'fillType' =>
Fill::FILL_SOLID,
'color' => [
'rgb' => $styleColor]]]);
800 $cellStyle->applyFromArray([
'font' => [
'color' => [
'rgb' => $styleColor]]]);
814 case 'border-bottom':
830 $cellStyle->getFont()->setSize(
837 if ($styleValue ===
'bold' || $styleValue >= 500) {
838 $cellStyle->getFont()->setBold(
true);
844 if ($styleValue ===
'italic') {
845 $cellStyle->getFont()->setItalic(
true);
851 $cellStyle->getFont()->setName(str_replace(
'\'',
'', $styleValue));
855 case 'text-decoration':
856 switch ($styleValue) {
858 $cellStyle->getFont()->setUnderline(Font::UNDERLINE_SINGLE);
862 $cellStyle->getFont()->setStrikethrough(
true);
870 $cellStyle->getAlignment()->setHorizontal($styleValue);
874 case 'vertical-align':
875 $cellStyle->getAlignment()->setVertical($styleValue);
880 $sheet->getColumnDimension($column)->setWidth(
881 (
float) str_replace([
'px',
'pt'],
'', $styleValue)
887 $sheet->getRowDimension(
$row)->setRowHeight(
888 (
float) str_replace([
'px',
'pt'],
'', $styleValue)
894 $cellStyle->getAlignment()->setWrapText(
895 $styleValue ===
'break-word' 901 $cellStyle->getAlignment()->setIndent(
902 (
int) str_replace([
'px'],
'', $styleValue)
917 if (strpos($value,
'#') === 0) {
918 return substr($value, 1);
921 return \PhpOffice\PhpSpreadsheet\Helper\Html::colourNameLookup((
string) $value);
930 if (!isset($attributes[
'src'])) {
934 $src = urldecode($attributes[
'src']);
935 $width = isset($attributes[
'width']) ? (float) $attributes[
'width'] : null;
936 $height = isset($attributes[
'height']) ? (float) $attributes[
'height'] : null;
937 $name = $attributes[
'alt'] ?? null;
940 $drawing->setPath($src);
941 $drawing->setWorksheet($sheet);
942 $drawing->setCoordinates($column .
$row);
943 $drawing->setOffsetX(0);
944 $drawing->setOffsetY(10);
945 $drawing->setResizeProportional(
true);
948 $drawing->setName(
$name);
952 $drawing->setWidth((
int) $width);
956 $drawing->setHeight((
int) $height);
960 $drawing->getWidth() / 6
964 $drawing->getHeight() * 0.9
987 return self::$borderMappings;
999 return (array_key_exists(
$style, self::$borderMappings)) ? self::$borderMappings[
$style] : null;
1012 $borderArray = explode(
' ', $styleValue);
1013 $borderCount = count($borderArray);
1014 if ($borderCount >= 3) {
1015 $borderStyle = $borderArray[1];
1016 $color = $borderArray[2];
1018 $borderStyle = $borderArray[0];
1019 $color = $borderArray[1] ?? null;
1023 $cellStyle->applyFromArray([
getStyleColor($value)
Check if has #, so we can get clean hex.
processDomElementTr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray)
processDomElementLi(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray)
loadIntoExisting($pFilename, Spreadsheet $spreadsheet)
Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
getActiveSheet()
Get active sheet.
processDomElementBody(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child)
mergeCells($pRange)
Set merge on a cell range.
processDomElementBgcolor(Worksheet $sheet, int $row, string $column, array $attributeArray)
setTableStartColumn($column)
processDomElementTable(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray)
insertImage(Worksheet $sheet, $column, $row, array $attributes)
const TEST_SAMPLE_SIZE
Sample size to read to determine if it's HTML or not.
flushCell(Worksheet $sheet, $column, $row, &$cellContent)
processDomElementH1Etc(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray)
loadDocument(DOMDocument $document, Spreadsheet $spreadsheet)
Loads PhpSpreadsheet from DOMDocument into PhpSpreadsheet instance.
processDomElementHeight(Worksheet $sheet, int $row, array $attributeArray)
__construct()
Create a new HTML Reader instance.
applyInlineStyle(&$sheet, $row, $column, $attributeArray)
Apply inline css inline style.
processDomElementA(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray)
setBorderStyle(Style $cellStyle, $styleValue, $type)
processDomElementHr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray)
processDomElementVAlign(Worksheet $sheet, int $row, string $column, array $attributeArray)
getStyle($pCellCoordinate)
Get style for cell.
setTitle($title, $updateFormulaCellReferences=true, $validate=true)
Set title.
setCellValue($pCoordinate, $pValue)
Set a cell value.
static static getBorderMappings()
processDomElementSpanEtc(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray)
processDomElement(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent)
const BORDER_MEDIUMDASHED
load($pFilename)
Loads Spreadsheet from file.
getHyperlink($pCellCoordinate)
Get hyperlink.
processDomElementWidth(Worksheet $sheet, string $column, array $attributeArray)
createSheet($sheetIndex=null)
Create sheet and add it to this workbook.
setActiveSheetIndex($pIndex)
Set active sheet index.
loadFromString($content, ?Spreadsheet $spreadsheet=null)
Spreadsheet from content.
getRowDimension(int $pRow)
Get row dimension at a specific row.
static getInstance(Reader\IReader $reader)
getSheetIndex()
Get sheet index.
canRead($pFilename)
Validate that the current file is an HTML file.
if(array_key_exists('yes', $_REQUEST)) $attributes
processDomElementBr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray)
static endsWithTag($data)
static containsTags($data)
processDomElementThTdOther(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray)
const BORDER_SLANTDASHDOT
const BORDER_MEDIUMDASHDOTDOT
processDomElementImg(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray)
static startsWithTag($data)
PhpSpreadsheet root directory.
getInputEncoding()
Get input encoding.
getColumnDimension(string $pColumn)
Get column dimension at a specific column.
getBorderStyle($style)
Map html border style to PhpSpreadsheet border style.
processDomElementThTd(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray)
static extractAllCellReferencesInRange($cellRange)
Extract all cell references in range, which may be comprised of multiple cell ranges.
releaseTableStartColumn()
processDomElementTitle(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray)
processDomElementAlign(Worksheet $sheet, int $row, string $column, array $attributeArray)
getComment($pCellCoordinate)
Get comment for cell.
setInputEncoding($pValue)
Set input encoding.
openFile($pFilename)
Open file for reading.
processDomElementDataFormat(Worksheet $sheet, int $row, string $column, array $attributeArray)
setSheetIndex($pValue)
Set sheet index.
const BORDER_MEDIUMDASHDOT
getSheetCount()
Get sheet count.