44 if (
$config->get(
'Core.AggressivelyFixLt')) {
47 $html = preg_replace_callback(
$comment, array($this,
'callbackArmorCommentEntities'), $html);
50 $html = preg_replace(
"/<($char)/i",
'<\\1', $html);
51 }
while ($html !== $old);
52 $html = preg_replace_callback(
$comment, array($this,
'callbackUndoCommentSubst'), $html);
58 $doc =
new DOMDocument();
59 $doc->encoding =
'UTF-8';
61 set_error_handler(array($this,
'muteErrorHandler'));
62 $doc->loadHTML($html);
63 restore_error_handler();
67 $doc->getElementsByTagName(
'html')->item(0)->
68 getElementsByTagName(
'body')->item(0)->
69 getElementsByTagName(
'div')->item(0)
84 protected function tokenizeDOM($node, &$tokens, $collect =
false) {
89 if ($node->nodeType === XML_TEXT_NODE) {
90 $tokens[] = $this->factory->createText($node->data);
92 } elseif ($node->nodeType === XML_CDATA_SECTION_NODE) {
98 $new_data = trim(
$data);
99 if (substr($new_data, 0, 4) ===
'<!--') {
100 $data = substr($new_data, 4);
101 if (substr(
$data, -3) ===
'-->') {
110 } elseif ($node->nodeType === XML_COMMENT_NODE) {
114 $tokens[] = $this->factory->createComment($node->data);
118 $node->nodeType !== XML_ELEMENT_NODE
123 $attr = $node->hasAttributes() ?
128 if (!$node->childNodes->length) {
130 $tokens[] = $this->factory->createEmpty($node->tagName, $attr);
134 $tokens[] = $this->factory->createStart(
135 $tag_name = $node->tagName,
139 foreach ($node->childNodes as $node) {
145 $tokens[] = $this->factory->createEnd($tag_name);
161 if ($node_map->length === 0)
return array();
163 foreach ($node_map as $attr) {
164 $array[$attr->name] = $attr->value;
179 return '<!--' . strtr($matches[1], array(
'&'=>
'&',
'<'=>
'<')) . $matches[2];
187 return '<!--' . str_replace(
'&',
'&', $matches[1]) . $matches[2];
194 $def =
$config->getDefinition(
'HTML');
197 if (!empty($def->doctype->dtdPublic) || !empty($def->doctype->dtdSystem)) {
198 $ret .=
'<!DOCTYPE html ';
199 if (!empty($def->doctype->dtdPublic))
$ret .=
'PUBLIC "' . $def->doctype->dtdPublic .
'" ';
200 if (!empty($def->doctype->dtdSystem))
$ret .=
'"' . $def->doctype->dtdSystem .
'" ';
204 $ret .=
'<html><head>';
205 $ret .=
'<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />';
207 $ret .=
'</head><body><div>'.$html.
'</div></body></html>';