38        parent::__construct();
 
   54        if (
$config->get(
'Core.AggressivelyFixLt')) {
 
   57            $html = preg_replace_callback(
$comment, array($this, 
'callbackArmorCommentEntities'), 
$html);
 
   60                $html = preg_replace(
"/<($char)/i", 
'<\\1', 
$html);
 
   62            $html = preg_replace_callback(
$comment, array($this, 
'callbackUndoCommentSubst'), 
$html); 
 
   68        $doc = 
new DOMDocument();
 
   69        $doc->encoding = 
'UTF-8'; 
 
   71        set_error_handler(array($this, 
'muteErrorHandler'));
 
   72        $doc->loadHTML(
$html);
 
   73        restore_error_handler();
 
   77            $doc->getElementsByTagName(
'html')->item(0)-> 
 
   78            getElementsByTagName(
'body')->item(0), 
 
   95        $closingNodes = array();
 
   97            while (!$nodes[$level]->isEmpty()) {
 
   98                $node = $nodes[$level]->shift(); 
 
   99                $collect = $level > 0 ? true : 
false;
 
  101                if ($needEndingTag) {
 
  102                    $closingNodes[$level][] = $node;
 
  104                if ($node->childNodes && $node->childNodes->length) {
 
  107                    foreach ($node->childNodes as $childNode) {
 
  108                        $nodes[$level]->push($childNode);
 
  113            if ($level && isset($closingNodes[$level])) {
 
  114                while ($node = array_pop($closingNodes[$level])) {
 
  118        } 
while ($level > 0);
 
  135        if ($node->nodeType === XML_TEXT_NODE) {
 
  136            $tokens[] = $this->factory->createText($node->data);
 
  138        } elseif ($node->nodeType === XML_CDATA_SECTION_NODE) {
 
  140            $last = end($tokens);
 
  144                $new_data = trim(
$data);
 
  145                if (substr($new_data, 0, 4) === 
'<!--') {
 
  146                    $data = substr($new_data, 4);
 
  147                    if (substr(
$data, -3) === 
'-->') {
 
  156        } elseif ($node->nodeType === XML_COMMENT_NODE) {
 
  160            $tokens[] = $this->factory->createComment($node->data);
 
  162        } elseif ($node->nodeType !== XML_ELEMENT_NODE) {
 
  170        if (!$node->childNodes->length) {
 
  172                $tokens[] = $this->factory->createEmpty($node->tagName, $attr);
 
  177                $tokens[] = $this->factory->createStart(
 
  178                    $tag_name = $node->tagName, 
 
  192        $tokens[] = $this->factory->createEnd($node->tagName);
 
  207        if ($node_map->length === 0) {
 
  211        foreach ($node_map as $attr) {
 
  212            $array[$attr->name] = $attr->value;
 
  234        return '<!--' . strtr($matches[1], array(
'&' => 
'&', 
'<' => 
'<')) . $matches[2];
 
  245        return '<!--' . str_replace(
'&', 
'&', $matches[1]) . $matches[2];
 
  257        $def = 
$config->getDefinition(
'HTML');
 
  260        if (!empty($def->doctype->dtdPublic) || !empty($def->doctype->dtdSystem)) {
 
  261            $ret .= 
'<!DOCTYPE html ';
 
  262            if (!empty($def->doctype->dtdPublic)) {
 
  263                $ret .= 
'PUBLIC "' . $def->doctype->dtdPublic . 
'" ';
 
  265            if (!empty($def->doctype->dtdSystem)) {
 
  266                $ret .= 
'"' . $def->doctype->dtdSystem . 
'" ';
 
  271        $ret .= 
'<html><head>';
 
  272        $ret .= 
'<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />';
 
  274        $ret .= 
'</head><body>' . 
$html . 
'</body></html>';
 
An exception for terminatinating execution or to throw for unit testing.
Parser that uses PHP 5's DOM extension (part of the core).
callbackArmorCommentEntities($matches)
Callback function that entity-izes ampersands in comments so that callbackUndoCommentSubst doesn't cl...
callbackUndoCommentSubst($matches)
Callback function for undoing escaping of stray angled brackets in comments.
createEndNode($node, &$tokens)
wrapHTML($html, $config, $context)
Wraps an HTML fragment in the necessary HTML.
tokenizeHTML($html, $config, $context)
transformAttrToAssoc($node_map)
Converts a DOMNamedNodeMap of DOMAttr objects into an assoc array.
muteErrorHandler($errno, $errstr)
An error handler that mutes all errors.
tokenizeDOM($node, &$tokens)
Iterative function that tokenizes a node, putting it into an accumulator.
createStartNode($node, &$tokens, $collect)
$factory
@type HTMLPurifier_TokenFactory
Forgivingly lexes HTML (SGML-style) markup into tokens.
normalize($html, $config, $context)
Takes a piece of HTML and normalizes it by converting entities, fixing encoding, extracting bits,...
parseData($string)
Parses special entities into the proper characters.
A simple array-backed queue, based off of the classic Okasaki persistent amortized queue.
Factory for token generation.
Concrete start token class.