38 parent::__construct();
54 if (
$config->get(
'Core.AggressivelyFixLt')) {
60 $html = preg_replace(
"/<($char)/i",
'<\\1',
$html);
69 $doc->encoding =
'UTF-8';
71 set_error_handler(
array($this,
'muteErrorHandler'));
72 $doc->loadHTML(
$html);
73 restore_error_handler();
75 $body = $doc->getElementsByTagName(
'html')->item(0)->
76 getElementsByTagName(
'body')->item(0);
78 $div = $body->getElementsByTagName(
'div')->item(0);
85 if ($div->nextSibling) {
86 $body->removeChild($div);
103 $closingNodes =
array();
105 while (!$nodes[$level]->isEmpty()) {
106 $node = $nodes[$level]->shift();
107 $collect = $level > 0 ? true :
false;
109 if ($needEndingTag) {
110 $closingNodes[$level][] = $node;
112 if ($node->childNodes && $node->childNodes->length) {
115 foreach ($node->childNodes as $childNode) {
116 $nodes[$level]->push($childNode);
121 if ($level && isset($closingNodes[$level])) {
122 while ($node = array_pop($closingNodes[$level])) {
126 }
while ($level > 0);
143 if ($node->nodeType === XML_TEXT_NODE) {
144 $tokens[] = $this->factory->createText($node->data);
146 } elseif ($node->nodeType === XML_CDATA_SECTION_NODE) {
148 $last = end($tokens);
152 $new_data = trim(
$data);
153 if (substr($new_data, 0, 4) ===
'<!--') {
154 $data = substr($new_data, 4);
155 if (substr(
$data, -3) ===
'-->') {
164 } elseif ($node->nodeType === XML_COMMENT_NODE) {
168 $tokens[] = $this->factory->createComment($node->data);
170 } elseif ($node->nodeType !== XML_ELEMENT_NODE) {
178 if (!$node->childNodes->length) {
180 $tokens[] = $this->factory->createEmpty($node->tagName, $attr);
185 $tokens[] = $this->factory->createStart(
186 $tag_name = $node->tagName,
200 $tokens[] = $this->factory->createEnd($node->tagName);
215 if ($node_map->length === 0) {
219 foreach ($node_map as $attr) {
220 $array[$attr->name] = $attr->value;
242 return '<!--' . strtr($matches[1],
array(
'&' =>
'&',
'<' =>
'<')) . $matches[2];
253 return '<!--' . str_replace(
'&',
'&', $matches[1]) . $matches[2];
268 if (!empty(
$def->doctype->dtdPublic) || !empty(
$def->doctype->dtdSystem)) {
269 $ret .=
'<!DOCTYPE html ';
270 if (!empty(
$def->doctype->dtdPublic)) {
271 $ret .=
'PUBLIC "' .
$def->doctype->dtdPublic .
'" ';
273 if (!empty(
$def->doctype->dtdSystem)) {
274 $ret .=
'"' .
$def->doctype->dtdSystem .
'" ';
279 $ret .=
'<html><head>';
280 $ret .=
'<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />';
282 $ret .=
'</head><body>';
283 if ($use_div)
$ret .=
'<div>';
285 if ($use_div)
$ret .=
'</div>';
286 $ret .=
'</body></html>';
parseText($string, $config)
tokenizeDOM($node, &$tokens, $config)
Iterative function that tokenizes a node, putting it into an accumulator.
A simple array-backed queue, based off of the classic Okasaki persistent amortized queue...
Forgivingly lexes HTML (SGML-style) markup into tokens.
createEndNode($node, &$tokens)
wrapHTML($html, $config, $context, $use_div=true)
Wraps an HTML fragment in the necessary HTML.
transformAttrToAssoc($node_map)
Converts a DOMNamedNodeMap of DOMAttr objects into an assoc array.
Concrete start token class.
Factory for token generation.
Parser that uses PHP 5's DOM extension (part of the core).
muteErrorHandler($errno, $errstr)
An error handler that mutes all errors.
callbackUndoCommentSubst($matches)
Callback function for undoing escaping of stray angled brackets in comments.
normalize($html, $config, $context)
Takes a piece of HTML and normalizes it by converting entities, fixing encoding, extracting bits...
createStartNode($node, &$tokens, $collect, $config)
$factory
HTMLPurifier_TokenFactory
Create styles array
The data for the language used.
callbackArmorCommentEntities($matches)
Callback function that entity-izes ampersands in comments so that callbackUndoCommentSubst doesn't cl...
tokenizeHTML($html, $config, $context)