24 $new_html = $this->
wrapHTML($new_html, $config, $context);
26 $parser =
new HTML5($new_html);
27 $doc = $parser->save();
28 }
catch (DOMException $e) {
31 $context->register(
'PH5PError', $e);
32 return $lexer->tokenizeHTML(
$html, $config, $context);
36 $doc->getElementsByTagName(
'html')->item(0)->
37 getElementsByTagName(
'body')->item(0)
469 $this->state =
'data';
471 while ($this->state !==
null) {
472 $this->{$this->state .
'State'}();
478 return $this->tree->save();
483 return ($this->char < $this->
EOF)
490 if ($s + $l < $this->
EOF) {
492 return $this->data[$s];
494 return substr($this->data, $s,
$l);
501 return preg_replace(
'#^([' . $char_class .
']+).*#s',
'\\1', substr($this->data, $start));
510 if (
$char ===
'&' && ($this->content_model === self::PCDATA || $this->content_model === self::RCDATA)) {
515 $this->state =
'entityData';
517 } elseif (
$char ===
'-') {
524 if (($this->content_model === self::RCDATA || $this->content_model ===
525 self::CDATA) && $this->escape ===
false &&
526 $this->
char >= 3 && $this->
character($this->
char - 4, 4) ===
'<!--'
528 $this->escape =
true;
535 'type' => self::CHARACTR,
541 } elseif (
$char ===
'<' && ($this->content_model === self::PCDATA ||
542 (($this->content_model === self::RCDATA ||
543 $this->content_model === self::CDATA) && $this->escape ===
false))
553 $this->state =
'tagOpen';
556 } elseif (
$char ===
'>') {
562 if (($this->content_model === self::RCDATA ||
563 $this->content_model === self::CDATA) && $this->escape ===
true &&
564 $this->
character($this->
char, 3) ===
'-->'
566 $this->escape =
false;
573 'type' => self::CHARACTR,
578 } elseif ($this->
char === $this->
EOF) {
583 } elseif ($this->content_model === self::PLAINTEXT) {
589 'type' => self::CHARACTR,
590 'data' => substr($this->data, $this->
char)
601 $len = strcspn($this->data,
'<&', $this->
char);
602 $char = substr($this->data, $this->
char, $len);
603 $this->
char += $len - 1;
607 'type' => self::CHARACTR,
612 $this->state =
'data';
619 $entity = $this->
entity();
623 $char = (!$entity) ?
'&' : $entity;
626 'type' => self::CHARACTR,
632 $this->state =
'data';
637 switch ($this->content_model) {
645 if ($this->
character($this->
char + 1) ===
'/') {
647 $this->state =
'closeTagOpen';
652 'type' => self::CHARACTR,
657 $this->state =
'data';
670 $this->state =
'markupDeclarationOpen';
672 } elseif (
$char ===
'/') {
675 $this->state =
'closeTagOpen';
677 } elseif (preg_match(
'/^[A-Za-z]$/',
$char)) {
683 $this->token = array(
684 'name' => strtolower(
$char),
685 'type' => self::STARTTAG,
689 $this->state =
'tagName';
691 } elseif (
$char ===
'>') {
697 'type' => self::CHARACTR,
702 $this->state =
'data';
704 } elseif (
$char ===
'?') {
707 $this->state =
'bogusComment';
715 'type' => self::CHARACTR,
721 $this->state =
'data';
729 $next_node = strtolower($this->
characters(
'A-Za-z', $this->
char + 1));
730 $the_same = count($this->tree->stack) > 0 && $next_node === end($this->tree->stack)->nodeName;
732 if (($this->content_model === self::RCDATA || $this->content_model === self::CDATA) &&
733 (!$the_same || ($the_same && (!preg_match(
734 '/[\t\n\x0b\x0c >\/]/',
735 $this->
character($this->
char + 1 + strlen($next_node))
736 ) || $this->
EOF === $this->
char)))
755 'type' => self::CHARACTR,
760 $this->state =
'data';
769 if (preg_match(
'/^[A-Za-z]$/',
$char)) {
775 $this->token = array(
776 'name' => strtolower(
$char),
777 'type' => self::ENDTAG
780 $this->state =
'tagName';
782 } elseif (
$char ===
'>') {
785 $this->state =
'data';
787 } elseif ($this->
char === $this->
EOF) {
793 'type' => self::CHARACTR,
799 $this->state =
'data';
803 $this->state =
'bogusComment';
814 if (preg_match(
'/^[\t\n\x0b\x0c ]$/',
$char)) {
821 $this->state =
'beforeAttributeName';
823 } elseif (
$char ===
'>') {
827 $this->state =
'data';
829 } elseif ($this->
char === $this->
EOF) {
836 $this->state =
'data';
838 } elseif (
$char ===
'/') {
842 $this->state =
'beforeAttributeName';
848 $this->token[
'name'] .= strtolower(
$char);
849 $this->state =
'tagName';
859 if (preg_match(
'/^[\t\n\x0b\x0c ]$/',
$char)) {
866 $this->state =
'beforeAttributeName';
868 } elseif (
$char ===
'>') {
872 $this->state =
'data';
874 } elseif (
$char ===
'/') {
878 $this->state =
'beforeAttributeName';
880 } elseif ($this->
char === $this->
EOF) {
887 $this->state =
'data';
894 $this->token[
'attr'][] = array(
895 'name' => strtolower(
$char),
899 $this->state =
'attributeName';
909 if (preg_match(
'/^[\t\n\x0b\x0c ]$/',
$char)) {
916 $this->state =
'afterAttributeName';
918 } elseif (
$char ===
'=') {
921 $this->state =
'beforeAttributeValue';
923 } elseif (
$char ===
'>') {
927 $this->state =
'data';
929 } elseif (
$char ===
'/' && $this->
character($this->
char + 1) !==
'>') {
933 $this->state =
'beforeAttributeName';
935 } elseif ($this->
char === $this->
EOF) {
942 $this->state =
'data';
948 $last = count($this->token[
'attr']) - 1;
949 $this->token[
'attr'][$last][
'name'] .= strtolower(
$char);
951 $this->state =
'attributeName';
961 if (preg_match(
'/^[\t\n\x0b\x0c ]$/',
$char)) {
968 $this->state =
'afterAttributeName';
970 } elseif (
$char ===
'=') {
973 $this->state =
'beforeAttributeValue';
975 } elseif (
$char ===
'>') {
979 $this->state =
'data';
981 } elseif (
$char ===
'/' && $this->
character($this->
char + 1) !==
'>') {
985 $this->state =
'beforeAttributeName';
987 } elseif ($this->
char === $this->
EOF) {
994 $this->state =
'data';
1001 $this->token[
'attr'][] = array(
1002 'name' => strtolower(
$char),
1006 $this->state =
'attributeName';
1016 if (preg_match(
'/^[\t\n\x0b\x0c ]$/',
$char)) {
1023 $this->state =
'beforeAttributeValue';
1025 } elseif (
$char ===
'"') {
1028 $this->state =
'attributeValueDoubleQuoted';
1030 } elseif (
$char ===
'&') {
1035 $this->state =
'attributeValueUnquoted';
1037 } elseif (
$char ===
'\'') {
1040 $this->state =
'attributeValueSingleQuoted';
1042 } elseif (
$char ===
'>') {
1046 $this->state =
'data';
1052 $last = count($this->token[
'attr']) - 1;
1053 $this->token[
'attr'][$last][
'value'] .=
$char;
1055 $this->state =
'attributeValueUnquoted';
1065 if (
$char ===
'"') {
1068 $this->state =
'beforeAttributeName';
1070 } elseif (
$char ===
'&') {
1075 } elseif ($this->
char === $this->
EOF) {
1082 $this->state =
'data';
1088 $last = count($this->token[
'attr']) - 1;
1089 $this->token[
'attr'][$last][
'value'] .=
$char;
1091 $this->state =
'attributeValueDoubleQuoted';
1101 if (
$char ===
'\'') {
1104 $this->state =
'beforeAttributeName';
1106 } elseif (
$char ===
'&') {
1111 } elseif ($this->
char === $this->
EOF) {
1118 $this->state =
'data';
1124 $last = count($this->token[
'attr']) - 1;
1125 $this->token[
'attr'][$last][
'value'] .=
$char;
1127 $this->state =
'attributeValueSingleQuoted';
1137 if (preg_match(
'/^[\t\n\x0b\x0c ]$/',
$char)) {
1144 $this->state =
'beforeAttributeName';
1146 } elseif (
$char ===
'&') {
1151 } elseif (
$char ===
'>') {
1155 $this->state =
'data';
1161 $last = count($this->token[
'attr']) - 1;
1162 $this->token[
'attr'][$last][
'value'] .=
$char;
1164 $this->state =
'attributeValueUnquoted';
1171 $entity = $this->
entity();
1180 $last = count($this->token[
'attr']) - 1;
1181 $this->token[
'attr'][$last][
'value'] .=
$char;
1198 'type' => self::COMMENT
1202 $this->
char += strlen(
$data);
1205 $this->state =
'data';
1208 if ($this->
char === $this->
EOF) {
1209 $this->
char = $this->
EOF - 1;
1218 if ($this->
character($this->
char + 1, 2) ===
'--') {
1220 $this->state =
'comment';
1221 $this->token = array(
1223 'type' => self::COMMENT
1229 } elseif (strtolower($this->
character($this->
char + 1, 7)) ===
'doctype') {
1231 $this->state =
'doctype';
1238 $this->state =
'bogusComment';
1249 if (
$char ===
'-') {
1251 $this->state =
'commentDash';
1254 } elseif ($this->
char === $this->
EOF) {
1259 $this->state =
'data';
1265 $this->token[
'data'] .=
$char;
1276 if (
$char ===
'-') {
1278 $this->state =
'commentEnd';
1281 } elseif ($this->
char === $this->
EOF) {
1286 $this->state =
'data';
1292 $this->token[
'data'] .=
'-' .
$char;
1293 $this->state =
'comment';
1303 if (
$char ===
'>') {
1305 $this->state =
'data';
1307 } elseif (
$char ===
'-') {
1308 $this->token[
'data'] .=
'-';
1310 } elseif ($this->
char === $this->
EOF) {
1313 $this->state =
'data';
1316 $this->token[
'data'] .=
'--' .
$char;
1317 $this->state =
'comment';
1327 if (preg_match(
'/^[\t\n\x0b\x0c ]$/',
$char)) {
1328 $this->state =
'beforeDoctypeName';
1332 $this->state =
'beforeDoctypeName';
1342 if (preg_match(
'/^[\t\n\x0b\x0c ]$/',
$char)) {
1345 } elseif (preg_match(
'/^[a-z]$/',
$char)) {
1346 $this->token = array(
1347 'name' => strtoupper(
$char),
1348 'type' => self::DOCTYPE,
1352 $this->state =
'doctypeName';
1354 } elseif (
$char ===
'>') {
1358 'type' => self::DOCTYPE,
1363 $this->state =
'data';
1365 } elseif ($this->
char === $this->
EOF) {
1369 'type' => self::DOCTYPE,
1375 $this->state =
'data';
1378 $this->token = array(
1380 'type' => self::DOCTYPE,
1384 $this->state =
'doctypeName';
1394 if (preg_match(
'/^[\t\n\x0b\x0c ]$/',
$char)) {
1395 $this->state =
'AfterDoctypeName';
1397 } elseif (
$char ===
'>') {
1399 $this->state =
'data';
1401 } elseif (preg_match(
'/^[a-z]$/',
$char)) {
1402 $this->token[
'name'] .= strtoupper(
$char);
1404 } elseif ($this->
char === $this->
EOF) {
1407 $this->state =
'data';
1410 $this->token[
'name'] .=
$char;
1413 $this->token[
'error'] = ($this->token[
'name'] ===
'HTML')
1424 if (preg_match(
'/^[\t\n\x0b\x0c ]$/',
$char)) {
1427 } elseif (
$char ===
'>') {
1429 $this->state =
'data';
1431 } elseif ($this->
char === $this->
EOF) {
1434 $this->state =
'data';
1437 $this->token[
'error'] =
true;
1438 $this->state =
'bogusDoctype';
1448 if (
$char ===
'>') {
1450 $this->state =
'data';
1452 } elseif ($this->
char === $this->
EOF) {
1455 $this->state =
'data';
1472 switch ($this->
character($this->
char + 1)) {
1478 switch ($this->
character($this->
char + 1)) {
1490 $char_class =
'0-9A-Fa-f';
1499 $char_class =
'0-9';
1507 $entity = $this->
character($start, $this->
char);
1508 $cond = strlen($e_name) > 0;
1518 $e_name = $this->
characters(
'0-9A-Za-z;', $this->
char + 1);
1519 $len = strlen($e_name);
1521 for ($c = 1; $c <= $len; $c++) {
1522 $id = substr($e_name, 0, $c);
1525 if (in_array($id, $this->entities)) {
1526 if ($e_name[$c - 1] !==
';') {
1527 if ($c < $len && $e_name[$c] ==
';') {
1536 $cond = isset($entity);
1544 $this->
char = $start;
1550 return html_entity_decode(
'&' . $entity .
';', ENT_QUOTES,
'UTF-8');
1555 $emit = $this->tree->emitToken(
$token);
1557 if (is_int($emit)) {
1558 $this->content_model = $emit;
1560 } elseif (
$token[
'type'] === self::ENDTAG) {
1567 $this->state =
null;
1568 $this->tree->emitToken(
1589 private $scoping = array(
'button',
'caption',
'html',
'marquee',
'object',
'table',
'td',
'th');
1703 $this->dom =
new DOMDocument;
1705 $this->dom->encoding =
'UTF-8';
1706 $this->dom->preserveWhiteSpace =
true;
1707 $this->dom->substituteEntities =
true;
1708 $this->dom->strictErrorChecking =
false;
1714 switch ($this->phase) {
1743 if ((isset($token[
'error']) && $token[
'error']) ||
1749 !preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data']))
1760 } elseif (isset($token[
'error']) && !$token[
'error']) {
1765 $doctype =
new DOMDocumentType(
null,
null,
'HTML');
1774 } elseif (isset($token[
'data']) && preg_match(
1775 '/^[\t\n\x0b\x0c ]+$/',
1780 $text = $this->dom->createTextNode($token[
'data']);
1781 $this->dom->appendChild(
$text);
1798 $comment = $this->dom->createComment($token[
'data']);
1805 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])
1808 $text = $this->dom->createTextNode($token[
'data']);
1809 $this->dom->appendChild(
$text);
1818 !preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])) ||
1826 $html = $this->dom->createElement(
'html');
1827 $this->dom->appendChild(
$html);
1828 $this->stack[] =
$html;
1844 } elseif ($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'html') {
1852 foreach ($token[
'attr'] as $attr) {
1853 if (!$this->stack[0]->hasAttribute($attr[
'name'])) {
1854 $this->stack[0]->setAttribute($attr[
'name'], $attr[
'value']);
1866 switch ($this->mode) {
1871 return $this->
inHead($token);
1877 return $this->
inBody($token);
1880 return $this->
inTable($token);
1892 return $this->
inRow($token);
1895 return $this->
inCell($token);
1924 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])
1936 } elseif ($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'head') {
1942 $this->head_pointer = $element;
1953 ($token[
'type'] ===
HTML5::ENDTAG && $token[
'name'] ===
'html') ||
1955 '/^[\t\n\x0b\x0c ]$/',
1969 return $this->
inHead($token);
1989 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])) || (
1991 end($this->stack)->nodeName,
1992 array(
'title',
'style',
'script')
2005 in_array($token[
'name'], array(
'title',
'style',
'script'))
2007 array_pop($this->stack);
2011 } elseif ($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'title') {
2015 if ($this->head_pointer !==
null) {
2017 $this->head_pointer->appendChild($element);
2027 } elseif ($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'style') {
2031 if ($this->head_pointer !==
null) {
2033 $this->head_pointer->appendChild($element);
2043 } elseif ($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'script') {
2046 $this->head_pointer->appendChild($element);
2054 array(
'base',
'link',
'meta')
2060 if ($this->head_pointer !==
null) {
2062 $this->head_pointer->appendChild($element);
2063 array_pop($this->stack);
2070 } elseif ($token[
'type'] ===
HTML5::ENDTAG && $token[
'name'] ===
'head') {
2073 if ($this->head_pointer->isSameNode(end($this->stack))) {
2074 array_pop($this->stack);
2085 } elseif (($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'head') ||
2086 ($token[
'type'] ===
HTML5::ENDTAG && $token[
'name'] !==
'html')
2094 if ($this->head_pointer->isSameNode(end($this->stack))) {
2120 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])
2132 } elseif ($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'body') {
2140 } elseif ($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'frameset') {
2151 array(
'base',
'link',
'meta',
'script',
'style',
'title')
2157 return $this->
inHead($token);
2171 return $this->
inBody($token);
2179 switch ($token[
'type']) {
2197 switch ($token[
'name']) {
2204 return $this->
inHead($token);
2215 return $this->
inHead($token);
2224 if (count($this->stack) === 1 || $this->stack[1]->nodeName !==
'body') {
2233 foreach ($token[
'attr'] as $attr) {
2234 if (!$this->stack[1]->hasAttribute($attr[
'name'])) {
2235 $this->stack[1]->setAttribute($attr[
'name'], $attr[
'value']);
2276 if ($this->form_pointer !==
null) {
2296 $this->form_pointer = $element;
2316 $stack_length = count($this->stack) - 1;
2318 for (
$n = $stack_length; 0 <=
$n;
$n--) {
2322 $node = $this->stack[
$n];
2328 if ($token[
'name'] === $node->tagName || ($token[
'name'] !==
'li'
2329 && ($node->tagName ===
'dd' || $node->tagName ===
'dt'))
2331 for (
$x = $stack_length;
$x >=
$n;
$x--) {
2332 array_pop($this->stack);
2341 if ($cat !== self::FORMATTING && $cat !== self::PHRASING &&
2342 $node->tagName !==
'address' && $node->tagName !==
'div'
2397 while ($this->
elementInScope(array(
'h1',
'h2',
'h3',
'h4',
'h5',
'h6'))) {
2398 array_pop($this->stack);
2416 $leng = count($this->a_formatting);
2418 for (
$n = $leng - 1;
$n >= 0;
$n--) {
2419 if ($this->a_formatting[
$n] === self::MARKER) {
2422 } elseif ($this->a_formatting[
$n]->nodeName ===
'a') {
2441 $this->a_formatting[] = $el;
2466 $this->a_formatting[] = $el;
2559 array_pop($this->stack);
2579 array_pop($this->stack);
2586 $token[
'name'] =
'img';
2587 return $this->
inBody($token);
2601 $this->form_pointer !==
null
2602 ? $this->form_pointer->appendChild($element)
2603 : end($this->stack)->appendChild($element);
2606 array_pop($this->stack);
2616 if ($this->form_pointer ===
null) {
2659 'This is a searchable index. ' .
2660 'Insert your search keywords here: '
2667 $attr = $token[
'attr'];
2668 $attr[] = array(
'name' =>
'name',
'value' =>
'isindex');
2681 'This is a searchable index. ' .
2682 'Insert your search keywords here: '
2778 case 'event-source':
2801 switch ($token[
'name']) {
2807 if (count($this->stack) < 2 || $this->stack[1]->nodeName !==
'body') {
2812 } elseif (end($this->stack)->nodeName !==
'body') {
2865 for (
$n = count($this->stack) - 1;
$n >= 0;
$n--) {
2866 if ($this->stack[
$n]->nodeName === $token[
'name']) {
2870 array_pop($this->stack);
2885 if (end($this->stack)->nodeName !== $token[
'name']) {
2895 array_pop($this->stack);
2899 $this->form_pointer =
null;
2916 for (
$n = count($this->stack) - 1;
$n >= 0;
$n--) {
2918 array_pop($this->stack);
2946 for (
$n = count($this->stack) - 1;
$n >= 0;
$n--) {
2947 if ($this->stack[
$n]->nodeName === $token[
'name']) {
2951 array_pop($this->stack);
2964 $elements = array(
'h1',
'h2',
'h3',
'h4',
'h5',
'h6');
2981 array_pop($this->stack);
3009 for ($a = count($this->a_formatting) - 1; $a >= 0; $a--) {
3010 if ($this->a_formatting[$a] === self::MARKER) {
3013 } elseif ($this->a_formatting[$a]->tagName === $token[
'name']) {
3014 $formatting_element = $this->a_formatting[$a];
3015 $in_stack = in_array($formatting_element, $this->stack,
true);
3025 if (!isset($formatting_element) || ($in_stack &&
3034 } elseif (isset($formatting_element) && !$in_stack) {
3035 unset($this->a_formatting[$fe_af_pos]);
3036 $this->a_formatting = array_merge($this->a_formatting);
3045 $fe_s_pos = array_search($formatting_element, $this->stack,
true);
3046 $length = count($this->stack);
3048 for ($s = $fe_s_pos + 1; $s < $length; $s++) {
3051 if ($category !== self::PHRASING && $category !== self::FORMATTING) {
3052 $furthest_block = $this->stack[$s];
3062 if (!isset($furthest_block)) {
3063 for (
$n = $length - 1;
$n >= $fe_s_pos;
$n--) {
3064 array_pop($this->stack);
3067 unset($this->a_formatting[$fe_af_pos]);
3068 $this->a_formatting = array_merge($this->a_formatting);
3075 $common_ancestor = $this->stack[$fe_s_pos - 1];
3079 if ($furthest_block->parentNode !==
null) {
3080 $furthest_block->parentNode->removeChild($furthest_block);
3087 $bookmark = $fe_af_pos;
3091 $node = $furthest_block;
3092 $last_node = $furthest_block;
3095 for (
$n = array_search($node, $this->stack,
true) - 1;
$n >= 0;
$n--) {
3098 $node = $this->stack[
$n];
3104 if (!in_array($node, $this->a_formatting,
true)) {
3105 unset($this->stack[
$n]);
3106 $this->stack = array_merge($this->stack);
3116 if ($node === $formatting_element) {
3123 } elseif ($last_node === $furthest_block) {
3124 $bookmark = array_search($node, $this->a_formatting,
true) + 1;
3133 if ($node->hasChildNodes()) {
3134 $clone = $node->cloneNode();
3135 $s_pos = array_search($node, $this->stack,
true);
3136 $a_pos = array_search($node, $this->a_formatting,
true);
3138 $this->stack[$s_pos] = $clone;
3139 $this->a_formatting[$a_pos] = $clone;
3145 if ($last_node->parentNode !==
null) {
3146 $last_node->parentNode->removeChild($last_node);
3149 $node->appendChild($last_node);
3159 if ($last_node->parentNode !==
null) {
3160 $last_node->parentNode->removeChild($last_node);
3163 $common_ancestor->appendChild($last_node);
3167 $clone = $formatting_element->cloneNode();
3172 while ($furthest_block->hasChildNodes()) {
3173 $child = $furthest_block->firstChild;
3174 $furthest_block->removeChild($child);
3175 $clone->appendChild($child);
3179 $furthest_block->appendChild($clone);
3185 $fe_af_pos = array_search($formatting_element, $this->a_formatting,
true);
3186 unset($this->a_formatting[$fe_af_pos]);
3187 $this->a_formatting = array_merge($this->a_formatting);
3189 $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1);
3190 $af_part2 = array_slice($this->a_formatting, $bookmark, count($this->a_formatting));
3191 $this->a_formatting = array_merge($af_part1, array($clone), $af_part2);
3198 $fe_s_pos = array_search($formatting_element, $this->stack,
true);
3199 $fb_s_pos = array_search($furthest_block, $this->stack,
true);
3200 unset($this->stack[$fe_s_pos]);
3202 $s_part1 = array_slice($this->stack, 0, $fb_s_pos);
3203 $s_part2 = array_slice($this->stack, $fb_s_pos + 1, count($this->stack));
3204 $this->stack = array_merge($s_part1, array($clone), $s_part2);
3207 unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block);
3231 for (
$n = count($this->stack) - 1;
$n >= 0;
$n--) {
3232 if ($this->stack[
$n]->nodeName === $token[
'name']) {
3236 array_pop($this->stack);
3239 $marker = end(array_keys($this->a_formatting, self::MARKER,
true));
3241 for (
$n = count($this->a_formatting) - 1;
$n > $marker;
$n--) {
3242 array_pop($this->a_formatting);
3275 for (
$n = count($this->stack) - 1;
$n >= 0;
$n--) {
3278 $node = end($this->stack);
3282 if ($token[
'name'] === $node->nodeName) {
3293 for (
$x = count($this->stack) -
$n;
$x >=
$n;
$x--) {
3294 array_pop($this->stack);
3300 if ($category !== self::SPECIAL && $category !== self::SCOPING) {
3317 $clear = array(
'html',
'table');
3323 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])
3326 $text = $this->dom->createTextNode($token[
'data']);
3327 end($this->stack)->appendChild(
$text);
3333 $comment = $this->dom->createComment($token[
'data']);
3334 end($this->stack)->appendChild(
$comment);
3338 $token[
'name'] ===
'caption'
3354 $token[
'name'] ===
'colgroup'
3366 $token[
'name'] ===
'col'
3370 'name' =>
'colgroup',
3381 array(
'tbody',
'tfoot',
'thead')
3394 in_array($token[
'name'], array(
'td',
'th',
'tr'))
3410 $token[
'name'] ===
'table'
3426 $token[
'name'] ===
'table'
3446 $current = end($this->stack)->nodeName;
3447 array_pop($this->stack);
3449 if ($current ===
'table') {
3488 end($this->stack)->nodeName,
3489 array(
'table',
'tbody',
'tfoot',
'thead',
'tr')
3503 for (
$n = count($this->stack) - 1;
$n >= 0;
$n--) {
3504 if ($this->stack[
$n]->nodeName ===
'table') {
3505 $table = $this->stack[
$n];
3510 if (isset($table) && $table->parentNode !==
null) {
3511 $this->foster_parent = $table->parentNode;
3513 } elseif (!isset($table)) {
3514 $this->foster_parent = $this->stack[0];
3516 } elseif (isset($table) && ($table->parentNode ===
null ||
3517 $table->parentNode->nodeType !== XML_ELEMENT_NODE)
3519 $this->foster_parent = $this->stack[
$n - 1];
3530 if ($token[
'type'] ===
HTML5::ENDTAG && $token[
'name'] ===
'caption') {
3549 $node = end($this->stack)->nodeName;
3550 array_pop($this->stack);
3552 if ($node ===
'caption') {
3582 $token[
'name'] ===
'table')
3589 'name' =>
'caption',
3594 return $this->
inTable($token);
3628 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])
3631 $text = $this->dom->createTextNode($token[
'data']);
3632 end($this->stack)->appendChild(
$text);
3638 $comment = $this->dom->createComment($token[
'data']);
3639 end($this->stack)->appendChild(
$comment);
3642 } elseif ($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'col') {
3646 array_pop($this->stack);
3650 $token[
'name'] ===
'colgroup'
3654 if (end($this->stack)->nodeName ===
'html') {
3661 array_pop($this->stack);
3666 } elseif ($token[
'type'] ===
HTML5::ENDTAG && $token[
'name'] ===
'col') {
3675 'name' =>
'colgroup',
3680 return $this->
inTable($token);
3686 $clear = array(
'tbody',
'tfoot',
'thead',
'html');
3700 ($token[
'name'] ===
'th' || $token[
'name'] ===
'td')
3712 return $this->
inRow($token);
3716 in_array($token[
'name'], array(
'tbody',
'tfoot',
'thead'))
3731 array_pop($this->stack);
3739 array(
'caption',
'col',
'colgroup',
'tbody',
'tfoor',
'thead')
3746 if (!$this->
elementInScope(array(
'tbody',
'thead',
'tfoot'),
true)) {
3759 'name' => end($this->stack)->nodeName,
3771 array(
'body',
'caption',
'col',
'colgroup',
'html',
'td',
'th',
'tr')
3785 $clear = array(
'tr',
'html');
3789 ($token[
'name'] ===
'th' || $token[
'name'] ===
'td')
3804 } elseif ($token[
'type'] ===
HTML5::ENDTAG && $token[
'name'] ===
'tr') {
3819 array_pop($this->stack);
3827 array(
'caption',
'col',
'colgroup',
'tbody',
'tfoot',
'thead',
'tr')
3839 return $this->
inCell($token);
3843 in_array($token[
'name'], array(
'tbody',
'tfoot',
'thead'))
3862 return $this->
inCell($token);
3869 array(
'body',
'caption',
'col',
'colgroup',
'html',
'td',
'th',
'tr')
3885 ($token[
'name'] ===
'td' || $token[
'name'] ===
'th')
3906 $node = end($this->stack)->nodeName;
3907 array_pop($this->stack);
3909 if ($node === $token[
'name']) {
3950 return $this->
inRow($token);
3980 return $this->
inRow($token);
3987 array(
'body',
'caption',
'col',
'colgroup',
'html')
3996 array(
'table',
'tbody',
'tfoot',
'thead',
'tr')
4010 return $this->
inRow($token);
4037 $token[
'name'] ===
'option'
4041 if (end($this->stack)->nodeName ===
'option') {
4055 $token[
'name'] ===
'optgroup'
4059 if (end($this->stack)->nodeName ===
'option') {
4070 if (end($this->stack)->nodeName ===
'optgroup') {
4073 'name' =>
'optgroup',
4084 $token[
'name'] ===
'optgroup'
4090 $elements_in_stack = count($this->stack);
4092 if ($this->stack[$elements_in_stack - 1]->nodeName ===
'option' &&
4093 $this->stack[$elements_in_stack - 2]->nodeName ===
'optgroup'
4106 if ($this->stack[$elements_in_stack - 1] ===
'optgroup') {
4107 array_pop($this->stack);
4112 $token[
'name'] ===
'option'
4117 if (end($this->stack)->nodeName ===
'option') {
4118 array_pop($this->stack);
4123 $token[
'name'] ===
'select'
4136 $current = end($this->stack)->nodeName;
4137 array_pop($this->stack);
4139 if ($current ===
'select') {
4149 } elseif ($token[
'name'] ===
'select' &&
4209 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])
4220 $comment = $this->dom->createComment($token[
'data']);
4221 $this->stack[0]->appendChild(
$comment);
4224 } elseif ($token[
'type'] ===
HTML5::ENDTAG && $token[
'name'] ===
'html') {
4238 return $this->
inBody($token);
4250 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])
4262 } elseif ($token[
'name'] ===
'frameset' &&
4268 } elseif ($token[
'name'] ===
'frameset' &&
4273 if (end($this->stack)->nodeName ===
'html') {
4279 array_pop($this->stack);
4289 } elseif ($token[
'name'] ===
'frame' &&
4296 array_pop($this->stack);
4299 } elseif ($token[
'name'] ===
'noframes' &&
4319 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])
4331 } elseif ($token[
'name'] ===
'html' &&
4338 } elseif ($token[
'name'] ===
'noframes' &&
4363 $comment = $this->dom->createComment($token[
'data']);
4370 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])
4379 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])) ||
4399 $token[
'name'] = preg_replace(
'/[^a-z0-9-]/i',
'', $token[
'name']);
4401 $token[
'name'] = ltrim($token[
'name'],
'-0..9');
4403 if ($token[
'name'] ===
'') {
4404 $token[
'name'] =
'span';
4408 $el = $this->dom->createElement($token[
'name']);
4410 foreach ($token[
'attr'] as $attr) {
4411 if (!$el->hasAttribute($attr[
'name'])) {
4412 $el->setAttribute($attr[
'name'], $attr[
'value']);
4417 $this->stack[] = $el;
4436 if ($this->foster_parent ===
null) {
4437 end($this->stack)->appendChild($node);
4439 } elseif ($this->foster_parent !==
null) {
4446 for (
$n = count($this->stack) - 1;
$n >= 0;
$n--) {
4447 if ($this->stack[
$n]->nodeName ===
'table' &&
4448 $this->stack[
$n]->parentNode !==
null
4450 $table = $this->stack[
$n];
4455 if (isset($table) && $this->foster_parent->isSameNode($table->parentNode)) {
4456 $this->foster_parent->insertBefore($node, $table);
4458 $this->foster_parent->appendChild($node);
4461 $this->foster_parent =
null;
4467 if (is_array($el)) {
4468 foreach ($el as $element) {
4477 $leng = count($this->stack);
4479 for (
$n = 0;
$n < $leng;
$n++) {
4482 $node = $this->stack[$leng - 1 -
$n];
4484 if ($node->tagName === $el) {
4488 } elseif ($node->tagName ===
'table') {
4493 } elseif ($table ===
true && in_array(
4510 } elseif ($node === $node->ownerDocument->documentElement) {
4529 $formatting_elements = count($this->a_formatting);
4531 if ($formatting_elements === 0) {
4537 $entry = end($this->a_formatting);
4543 if ($entry === self::MARKER || in_array($entry, $this->stack,
true)) {
4547 for ($a = $formatting_elements - 1; $a >= 0;
true) {
4551 $step_seven =
false;
4558 $entry = $this->a_formatting[$a];
4562 if ($entry === self::MARKER || in_array($entry, $this->stack,
true)) {
4570 if (isset($step_seven) && $step_seven ===
true) {
4572 $entry = $this->a_formatting[$a];
4576 $clone = $entry->cloneNode();
4580 end($this->stack)->appendChild($clone);
4581 $this->stack[] = $clone;
4585 $this->a_formatting[$a] = $clone;
4589 if (end($this->a_formatting) !== $clone) {
4606 $entry = end($this->a_formatting);
4609 array_pop($this->a_formatting);
4613 if ($entry === self::MARKER) {
4626 $node = end($this->stack);
4627 $elements = array_diff(array(
'dd',
'dt',
'li',
'p',
'td',
'th',
'tr'),
$exclude);
4629 while (in_array(end($this->stack)->nodeName, $elements)) {
4630 array_pop($this->stack);
4636 $name = $node->tagName;
4637 if (in_array($name, $this->special)) {
4639 } elseif (in_array($name, $this->scoping)) {
4641 } elseif (in_array($name, $this->formatting)) {
4656 $node = end($this->stack)->nodeName;
4658 if (in_array($node, $elements)) {
4661 array_pop($this->stack);
4670 $leng = count($this->stack);
4672 for (
$n = $leng - 1;
$n >= 0;
$n--) {
4674 $node = $this->stack[
$n];
4680 if ($this->stack[0]->isSameNode($node)) {
4686 if ($node->nodeName ===
'select') {
4692 } elseif ($node->nodeName ===
'td' || $node->nodeName ===
'th') {
4698 } elseif ($node->nodeName ===
'tr') {
4704 } elseif (in_array($node->nodeName, array(
'tbody',
'thead',
'tfoot'))) {
4710 } elseif ($node->nodeName ===
'caption') {
4716 } elseif ($node->nodeName ===
'colgroup') {
4722 } elseif ($node->nodeName ===
'table') {
4729 } elseif ($node->nodeName ===
'head') {
4735 } elseif ($node->nodeName ===
'body') {
4741 } elseif ($node->nodeName ===
'frameset') {
4749 } elseif ($node->nodeName ===
'html') {
4750 $this->mode = ($this->head_pointer ===
null)
4769 foreach (array(
'td',
'th') as $cell) {
const EOF
How fgetc() reports an End Of File.
getElementCategory($node)
elementInScope($el, $table=false)
clearStackToTableContext($elements)
appendToRealParent($node)
clearTheActiveFormattingElementsUpToTheLastMarker()
insertElement($token, $append=true, $check=false)
generateImpliedEndTags($exclude=array())
reconstructActiveFormattingElements()
beforeAttributeValueState()
beforeAttributeNameState()
attributeValueDoubleQuotedState()
markupDeclarationOpenState()
attributeValueSingleQuotedState()
attributeValueUnquotedState()
characters($char_class, $start)
afterAttributeNameState()
entityInAttributeValueState()
Parser that uses PHP 5's DOM extension (part of the core).
wrapHTML($html, $config, $context)
Wraps an HTML fragment in the necessary HTML.
tokenizeDOM($node, &$tokens)
Iterative function that tokenizes a node, putting it into an accumulator.
Our in-house implementation of a parser.
Experimental HTML5-based parser using Jeroen van der Meer's PH5P library.
tokenizeHTML($html, $config, $context)
normalize($html, $config, $context)
Takes a piece of HTML and normalizes it by converting entities, fixing encoding, extracting bits,...