38 parent::__construct();
54 if (
$config->get(
'Core.AggressivelyFixLt')) {
60 $html = preg_replace(
"/<($char)/i",
'<\\1',
$html);
69 $doc->encoding =
'UTF-8';
71 set_error_handler(
array($this,
'muteErrorHandler'));
72 $doc->loadHTML(
$html);
73 restore_error_handler();
77 $doc->getElementsByTagName(
'html')->item(0)->
78 getElementsByTagName(
'body')->item(0),
95 $closingNodes =
array();
97 while (!$nodes[$level]->isEmpty()) {
98 $node = $nodes[$level]->shift();
99 $collect = $level > 0 ? true :
false;
101 if ($needEndingTag) {
102 $closingNodes[$level][] = $node;
104 if ($node->childNodes && $node->childNodes->length) {
107 foreach ($node->childNodes as $childNode) {
108 $nodes[$level]->push($childNode);
113 if ($level && isset($closingNodes[$level])) {
114 while ($node = array_pop($closingNodes[$level])) {
118 }
while ($level > 0);
135 if ($node->nodeType === XML_TEXT_NODE) {
136 $tokens[] = $this->factory->createText($node->data);
138 } elseif ($node->nodeType === XML_CDATA_SECTION_NODE) {
140 $last = end($tokens);
144 $new_data = trim(
$data);
145 if (substr($new_data, 0, 4) ===
'<!--') {
146 $data = substr($new_data, 4);
147 if (substr(
$data, -3) ===
'-->') {
156 } elseif ($node->nodeType === XML_COMMENT_NODE) {
160 $tokens[] = $this->factory->createComment($node->data);
162 } elseif ($node->nodeType !== XML_ELEMENT_NODE) {
170 if (!$node->childNodes->length) {
172 $tokens[] = $this->factory->createEmpty($node->tagName, $attr);
177 $tokens[] = $this->factory->createStart(
178 $tag_name = $node->tagName,
192 $tokens[] = $this->factory->createEnd($node->tagName);
207 if ($node_map->length === 0) {
211 foreach ($node_map as $attr) {
212 $array[$attr->name] = $attr->value;
234 return '<!--' . strtr($matches[1],
array(
'&' =>
'&',
'<' =>
'<')) . $matches[2];
245 return '<!--' . str_replace(
'&',
'&', $matches[1]) . $matches[2];
257 $def =
$config->getDefinition(
'HTML');
260 if (!empty($def->doctype->dtdPublic) || !empty($def->doctype->dtdSystem)) {
261 $ret .=
'<!DOCTYPE html ';
262 if (!empty($def->doctype->dtdPublic)) {
263 $ret .=
'PUBLIC "' . $def->doctype->dtdPublic .
'" ';
265 if (!empty($def->doctype->dtdSystem)) {
266 $ret .=
'"' . $def->doctype->dtdSystem .
'" ';
271 $ret .=
'<html><head>';
272 $ret .=
'<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />';
274 $ret .=
'</head><body>' .
$html .
'</body></html>';
tokenizeDOM($node, &$tokens)
Iterative function that tokenizes a node, putting it into an accumulator.
wrapHTML($html, $config, $context)
Wraps an HTML fragment in the necessary HTML.
A simple array-backed queue, based off of the classic Okasaki persistent amortized queue...
Forgivingly lexes HTML (SGML-style) markup into tokens.
createEndNode($node, &$tokens)
transformAttrToAssoc($node_map)
Converts a DOMNamedNodeMap of DOMAttr objects into an assoc array.
Concrete start token class.
Factory for token generation.
Parser that uses PHP 5's DOM extension (part of the core).
parseData($string)
Parses special entities into the proper characters.
muteErrorHandler($errno, $errstr)
An error handler that mutes all errors.
callbackUndoCommentSubst($matches)
Callback function for undoing escaping of stray angled brackets in comments.
normalize($html, $config, $context)
Takes a piece of HTML and normalizes it by converting entities, fixing encoding, extracting bits...
createStartNode($node, &$tokens, $collect)
$factory
HTMLPurifier_TokenFactory
Create styles array
The data for the language used.
callbackArmorCommentEntities($matches)
Callback function that entity-izes ampersands in comments so that callbackUndoCommentSubst doesn't cl...
tokenizeHTML($html, $config, $context)