50 $html = $this->
normalize($html, $config, $context);
54 if ($config->get(
'Core.AggressivelyFixLt')) {
57 $html = preg_replace_callback(
$comment, array($this,
'callbackArmorCommentEntities'), $html);
60 $html = preg_replace(
"/<($char)/i",
'<\\1', $html);
61 }
while ($html !== $old);
62 $html = preg_replace_callback(
$comment, array($this,
'callbackUndoCommentSubst'), $html);
66 $html = $this->
wrapHTML($html, $config, $context);
68 $doc =
new DOMDocument();
69 $doc->encoding =
'UTF-8';
71 set_error_handler(array($this,
'muteErrorHandler'));
72 $doc->loadHTML($html);
73 restore_error_handler();
77 $doc->getElementsByTagName(
'html')->item(0)->
78 getElementsByTagName(
'body')->item(0)->
79 getElementsByTagName(
'div')->item(0),
96 $closingNodes = array();
98 while (!$nodes[$level]->isEmpty()) {
99 $node = $nodes[$level]->shift();
100 $collect = $level > 0 ?
true :
false;
102 if ($needEndingTag) {
103 $closingNodes[$level][] = $node;
105 if ($node->childNodes && $node->childNodes->length) {
108 foreach ($node->childNodes as $childNode) {
109 $nodes[$level]->push($childNode);
114 if ($level && isset($closingNodes[$level])) {
115 while ($node = array_pop($closingNodes[$level])) {
119 }
while ($level > 0);
136 if ($node->nodeType === XML_TEXT_NODE) {
137 $tokens[] = $this->factory->createText($node->data);
139 } elseif ($node->nodeType === XML_CDATA_SECTION_NODE) {
141 $last = end($tokens);
145 $new_data = trim($data);
146 if (substr($new_data, 0, 4) ===
'<!--') {
147 $data = substr($new_data, 4);
148 if (substr($data, -3) ===
'-->') {
149 $data = substr($data, 0, -3);
155 $tokens[] = $this->factory->createText($this->
parseData($data));
157 } elseif ($node->nodeType === XML_COMMENT_NODE) {
161 $tokens[] = $this->factory->createComment($node->data);
163 } elseif ($node->nodeType !== XML_ELEMENT_NODE) {
171 if (!$node->childNodes->length) {
173 $tokens[] = $this->factory->createEmpty($node->tagName, $attr);
178 $tokens[] = $this->factory->createStart(
179 $tag_name = $node->tagName,
193 $tokens[] = $this->factory->createEnd($node->tagName);
208 if ($node_map->length === 0) {
212 foreach ($node_map as $attr) {
213 $array[$attr->name] = $attr->value;
235 return '<!--' . strtr($matches[1], array(
'&' =>
'&',
'<' =>
'<')) . $matches[2];
246 return '<!--' . str_replace(
'&',
'&', $matches[1]) . $matches[2];
256 protected function wrapHTML($html, $config, $context)
258 $def = $config->getDefinition(
'HTML');
261 if (!empty($def->doctype->dtdPublic) || !empty($def->doctype->dtdSystem)) {
262 $ret .=
'<!DOCTYPE html ';
263 if (!empty($def->doctype->dtdPublic)) {
264 $ret .=
'PUBLIC "' . $def->doctype->dtdPublic .
'" ';
266 if (!empty($def->doctype->dtdSystem)) {
267 $ret .=
'"' . $def->doctype->dtdSystem .
'" ';
272 $ret .=
'<html><head>';
273 $ret .=
'<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />';
275 $ret .=
'</head><body><div>' . $html .
'</div></body></html>';