38        parent::__construct();
 
   54        if (
$config->get(
'Core.AggressivelyFixLt')) {
 
   57            $html = preg_replace_callback(
$comment, array($this, 
'callbackArmorCommentEntities'), 
$html);
 
   60                $html = preg_replace(
"/<($char)/i", 
'<\\1', 
$html);
 
   62            $html = preg_replace_callback(
$comment, array($this, 
'callbackUndoCommentSubst'), 
$html); 
 
   68        $doc = 
new DOMDocument();
 
   69        $doc->encoding = 
'UTF-8'; 
 
   71        set_error_handler(array($this, 
'muteErrorHandler'));
 
   72        $doc->loadHTML(
$html);
 
   73        restore_error_handler();
 
   75        $body = $doc->getElementsByTagName(
'html')->item(0)-> 
 
   76                      getElementsByTagName(
'body')->item(0);  
 
   78        $div = $body->getElementsByTagName(
'div')->item(0); 
 
   85        if ($div->nextSibling) {
 
   86            $body->removeChild($div);
 
  103        $closingNodes = array();
 
  105            while (!$nodes[$level]->isEmpty()) {
 
  106                $node = $nodes[$level]->shift(); 
 
  107                $collect = $level > 0 ? true : 
false;
 
  109                if ($needEndingTag) {
 
  110                    $closingNodes[$level][] = $node;
 
  112                if ($node->childNodes && $node->childNodes->length) {
 
  115                    foreach ($node->childNodes as $childNode) {
 
  116                        $nodes[$level]->push($childNode);
 
  121            if ($level && isset($closingNodes[$level])) {
 
  122                while ($node = array_pop($closingNodes[$level])) {
 
  126        } 
while ($level > 0);
 
  143        if ($node->nodeType === XML_TEXT_NODE) {
 
  144            $tokens[] = $this->factory->createText($node->data);
 
  146        } elseif ($node->nodeType === XML_CDATA_SECTION_NODE) {
 
  148            $last = end($tokens);
 
  152                $new_data = trim(
$data);
 
  153                if (substr($new_data, 0, 4) === 
'<!--') {
 
  154                    $data = substr($new_data, 4);
 
  155                    if (substr(
$data, -3) === 
'-->') {
 
  164        } elseif ($node->nodeType === XML_COMMENT_NODE) {
 
  168            $tokens[] = $this->factory->createComment($node->data);
 
  170        } elseif ($node->nodeType !== XML_ELEMENT_NODE) {
 
  178        if (!$node->childNodes->length) {
 
  180                $tokens[] = $this->factory->createEmpty($node->tagName, $attr);
 
  185                $tokens[] = $this->factory->createStart(
 
  186                    $tag_name = $node->tagName, 
 
  200        $tokens[] = $this->factory->createEnd($node->tagName);
 
  215        if ($node_map->length === 0) {
 
  219        foreach ($node_map as $attr) {
 
  220            $array[$attr->name] = $attr->value;
 
  242        return '<!--' . strtr($matches[1], array(
'&' => 
'&', 
'<' => 
'<')) . $matches[2];
 
  253        return '<!--' . str_replace(
'&', 
'&', $matches[1]) . $matches[2];
 
  268        if (!empty(
$def->doctype->dtdPublic) || !empty(
$def->doctype->dtdSystem)) {
 
  269            $ret .= 
'<!DOCTYPE html ';
 
  270            if (!empty(
$def->doctype->dtdPublic)) {
 
  271                $ret .= 
'PUBLIC "' . 
$def->doctype->dtdPublic . 
'" ';
 
  273            if (!empty(
$def->doctype->dtdSystem)) {
 
  274                $ret .= 
'"' . 
$def->doctype->dtdSystem . 
'" ';
 
  279        $ret .= 
'<html><head>';
 
  280        $ret .= 
'<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />';
 
  282        $ret .= 
'</head><body>';
 
  283        if ($use_div) 
$ret .= 
'<div>';
 
  285        if ($use_div) 
$ret .= 
'</div>';
 
  286        $ret .= 
'</body></html>';
 
An exception for terminatinating execution or to throw for unit testing.
Parser that uses PHP 5's DOM extension (part of the core).
callbackArmorCommentEntities($matches)
Callback function that entity-izes ampersands in comments so that callbackUndoCommentSubst doesn't cl...
callbackUndoCommentSubst($matches)
Callback function for undoing escaping of stray angled brackets in comments.
createEndNode($node, &$tokens)
tokenizeDOM($node, &$tokens, $config)
Iterative function that tokenizes a node, putting it into an accumulator.
wrapHTML($html, $config, $context, $use_div=true)
Wraps an HTML fragment in the necessary HTML.
tokenizeHTML($html, $config, $context)
createStartNode($node, &$tokens, $collect, $config)
transformAttrToAssoc($node_map)
Converts a DOMNamedNodeMap of DOMAttr objects into an assoc array.
muteErrorHandler($errno, $errstr)
An error handler that mutes all errors.
$factory
@type HTMLPurifier_TokenFactory
Forgivingly lexes HTML (SGML-style) markup into tokens.
normalize($html, $config, $context)
Takes a piece of HTML and normalizes it by converting entities, fixing encoding, extracting bits,...
parseText($string, $config)
A simple array-backed queue, based off of the classic Okasaki persistent amortized queue.
Factory for token generation.
Concrete start token class.