23 $new_html = $this->
normalize($html, $config, $context);
24 $new_html = $this->
wrapHTML($new_html, $config, $context);
26 $parser =
new HTML5($new_html);
27 $doc = $parser->save();
28 }
catch (DOMException $e) {
31 $context->register(
'PH5PError', $e);
32 return $lexer->tokenizeHTML($html, $config, $context);
36 $doc->getElementsByTagName(
'html')->item(0)->
37 getElementsByTagName(
'body')->item(0)->
38 getElementsByTagName(
'div')->item(0)
470 $this->state =
'data';
472 while ($this->state !== null) {
473 $this->{$this->state .
'State'}();
479 return $this->tree->save();
484 return ($this->char < $this->
EOF)
491 if ($s + $l < $this->
EOF) {
493 return $this->data[$s];
495 return substr($this->data, $s, $l);
502 return preg_replace(
'#^([' . $char_class .
']+).*#s',
'\\1', substr($this->data, $start));
511 if (
$char ===
'&' && ($this->content_model === self::PCDATA || $this->content_model === self::RCDATA)) {
516 $this->state =
'entityData';
518 } elseif (
$char ===
'-') {
525 if (($this->content_model === self::RCDATA || $this->content_model ===
526 self::CDATA) && $this->escape ===
false &&
527 $this->
char >= 3 && $this->
character($this->
char - 4, 4) ===
'<!--'
529 $this->escape =
true;
536 'type' => self::CHARACTR,
542 } elseif (
$char ===
'<' && ($this->content_model === self::PCDATA ||
543 (($this->content_model === self::RCDATA ||
544 $this->content_model === self::CDATA) && $this->escape ===
false))
554 $this->state =
'tagOpen';
557 } elseif (
$char ===
'>') {
563 if (($this->content_model === self::RCDATA ||
564 $this->content_model === self::CDATA) && $this->escape ===
true &&
565 $this->
character($this->
char, 3) ===
'-->'
567 $this->escape =
false;
574 'type' => self::CHARACTR,
579 } elseif ($this->
char === $this->
EOF) {
584 } elseif ($this->content_model === self::PLAINTEXT) {
590 'type' => self::CHARACTR,
591 'data' => substr($this->data, $this->
char)
602 $len = strcspn($this->data,
'<&', $this->
char);
603 $char = substr($this->data, $this->
char, $len);
604 $this->
char += $len - 1;
608 'type' => self::CHARACTR,
613 $this->state =
'data';
620 $entity = $this->
entity();
624 $char = (!$entity) ?
'&' : $entity;
627 'type' => self::CHARACTR,
633 $this->state =
'data';
638 switch ($this->content_model) {
646 if ($this->
character($this->
char + 1) ===
'/') {
648 $this->state =
'closeTagOpen';
653 'type' => self::CHARACTR,
658 $this->state =
'data';
671 $this->state =
'markupDeclarationOpen';
673 } elseif (
$char ===
'/') {
676 $this->state =
'closeTagOpen';
678 } elseif (preg_match(
'/^[A-Za-z]$/',
$char)) {
684 $this->token = array(
685 'name' => strtolower(
$char),
686 'type' => self::STARTTAG,
690 $this->state =
'tagName';
692 } elseif (
$char ===
'>') {
698 'type' => self::CHARACTR,
703 $this->state =
'data';
705 } elseif (
$char ===
'?') {
708 $this->state =
'bogusComment';
716 'type' => self::CHARACTR,
722 $this->state =
'data';
730 $next_node = strtolower($this->
characters(
'A-Za-z', $this->
char + 1));
731 $the_same = count($this->tree->stack) > 0 && $next_node === end($this->tree->stack)->nodeName;
733 if (($this->content_model === self::RCDATA || $this->content_model === self::CDATA) &&
734 (!$the_same || ($the_same && (!preg_match(
735 '/[\t\n\x0b\x0c >\/]/',
736 $this->
character($this->
char + 1 + strlen($next_node))
737 ) || $this->
EOF === $this->
char)))
756 'type' => self::CHARACTR,
761 $this->state =
'data';
770 if (preg_match(
'/^[A-Za-z]$/',
$char)) {
776 $this->token = array(
777 'name' => strtolower(
$char),
778 'type' => self::ENDTAG
781 $this->state =
'tagName';
783 } elseif (
$char ===
'>') {
786 $this->state =
'data';
788 } elseif ($this->
char === $this->
EOF) {
794 'type' => self::CHARACTR,
800 $this->state =
'data';
804 $this->state =
'bogusComment';
815 if (preg_match(
'/^[\t\n\x0b\x0c ]$/',
$char)) {
822 $this->state =
'beforeAttributeName';
824 } elseif (
$char ===
'>') {
828 $this->state =
'data';
830 } elseif ($this->
char === $this->
EOF) {
837 $this->state =
'data';
839 } elseif (
$char ===
'/') {
843 $this->state =
'beforeAttributeName';
849 $this->token[
'name'] .= strtolower(
$char);
850 $this->state =
'tagName';
860 if (preg_match(
'/^[\t\n\x0b\x0c ]$/',
$char)) {
867 $this->state =
'beforeAttributeName';
869 } elseif (
$char ===
'>') {
873 $this->state =
'data';
875 } elseif (
$char ===
'/') {
879 $this->state =
'beforeAttributeName';
881 } elseif ($this->
char === $this->
EOF) {
888 $this->state =
'data';
895 $this->token[
'attr'][] = array(
896 'name' => strtolower(
$char),
900 $this->state =
'attributeName';
910 if (preg_match(
'/^[\t\n\x0b\x0c ]$/',
$char)) {
917 $this->state =
'afterAttributeName';
919 } elseif (
$char ===
'=') {
922 $this->state =
'beforeAttributeValue';
924 } elseif (
$char ===
'>') {
928 $this->state =
'data';
930 } elseif (
$char ===
'/' && $this->
character($this->
char + 1) !==
'>') {
934 $this->state =
'beforeAttributeName';
936 } elseif ($this->
char === $this->
EOF) {
943 $this->state =
'data';
949 $last = count($this->token[
'attr']) - 1;
950 $this->token[
'attr'][$last][
'name'] .= strtolower(
$char);
952 $this->state =
'attributeName';
962 if (preg_match(
'/^[\t\n\x0b\x0c ]$/',
$char)) {
969 $this->state =
'afterAttributeName';
971 } elseif (
$char ===
'=') {
974 $this->state =
'beforeAttributeValue';
976 } elseif (
$char ===
'>') {
980 $this->state =
'data';
982 } elseif (
$char ===
'/' && $this->
character($this->
char + 1) !==
'>') {
986 $this->state =
'beforeAttributeName';
988 } elseif ($this->
char === $this->
EOF) {
995 $this->state =
'data';
1002 $this->token[
'attr'][] = array(
1003 'name' => strtolower(
$char),
1007 $this->state =
'attributeName';
1017 if (preg_match(
'/^[\t\n\x0b\x0c ]$/',
$char)) {
1024 $this->state =
'beforeAttributeValue';
1026 } elseif (
$char ===
'"') {
1029 $this->state =
'attributeValueDoubleQuoted';
1031 } elseif (
$char ===
'&') {
1036 $this->state =
'attributeValueUnquoted';
1038 } elseif (
$char ===
'\'') {
1041 $this->state =
'attributeValueSingleQuoted';
1043 } elseif (
$char ===
'>') {
1047 $this->state =
'data';
1053 $last = count($this->token[
'attr']) - 1;
1054 $this->token[
'attr'][$last][
'value'] .=
$char;
1056 $this->state =
'attributeValueUnquoted';
1066 if (
$char ===
'"') {
1069 $this->state =
'beforeAttributeName';
1071 } elseif (
$char ===
'&') {
1076 } elseif ($this->
char === $this->
EOF) {
1083 $this->state =
'data';
1089 $last = count($this->token[
'attr']) - 1;
1090 $this->token[
'attr'][$last][
'value'] .=
$char;
1092 $this->state =
'attributeValueDoubleQuoted';
1102 if (
$char ===
'\'') {
1105 $this->state =
'beforeAttributeName';
1107 } elseif (
$char ===
'&') {
1112 } elseif ($this->
char === $this->
EOF) {
1119 $this->state =
'data';
1125 $last = count($this->token[
'attr']) - 1;
1126 $this->token[
'attr'][$last][
'value'] .=
$char;
1128 $this->state =
'attributeValueSingleQuoted';
1138 if (preg_match(
'/^[\t\n\x0b\x0c ]$/',
$char)) {
1145 $this->state =
'beforeAttributeName';
1147 } elseif (
$char ===
'&') {
1152 } elseif (
$char ===
'>') {
1156 $this->state =
'data';
1162 $last = count($this->token[
'attr']) - 1;
1163 $this->token[
'attr'][$last][
'value'] .=
$char;
1165 $this->state =
'attributeValueUnquoted';
1172 $entity = $this->
entity();
1181 $last = count($this->token[
'attr']) - 1;
1182 $this->token[
'attr'][$last][
'value'] .=
$char;
1199 'type' => self::COMMENT
1203 $this->
char += strlen(
$data);
1206 $this->state =
'data';
1209 if ($this->
char === $this->
EOF) {
1210 $this->
char = $this->
EOF - 1;
1219 if ($this->
character($this->
char + 1, 2) ===
'--') {
1221 $this->state =
'comment';
1222 $this->token = array(
1224 'type' => self::COMMENT
1230 } elseif (strtolower($this->
character($this->
char + 1, 7)) ===
'doctype') {
1232 $this->state =
'doctype';
1239 $this->state =
'bogusComment';
1250 if (
$char ===
'-') {
1252 $this->state =
'commentDash';
1255 } elseif ($this->
char === $this->
EOF) {
1260 $this->state =
'data';
1266 $this->token[
'data'] .=
$char;
1277 if (
$char ===
'-') {
1279 $this->state =
'commentEnd';
1282 } elseif ($this->
char === $this->
EOF) {
1287 $this->state =
'data';
1293 $this->token[
'data'] .=
'-' .
$char;
1294 $this->state =
'comment';
1304 if (
$char ===
'>') {
1306 $this->state =
'data';
1308 } elseif (
$char ===
'-') {
1309 $this->token[
'data'] .=
'-';
1311 } elseif ($this->
char === $this->
EOF) {
1314 $this->state =
'data';
1317 $this->token[
'data'] .=
'--' .
$char;
1318 $this->state =
'comment';
1328 if (preg_match(
'/^[\t\n\x0b\x0c ]$/',
$char)) {
1329 $this->state =
'beforeDoctypeName';
1333 $this->state =
'beforeDoctypeName';
1343 if (preg_match(
'/^[\t\n\x0b\x0c ]$/',
$char)) {
1346 } elseif (preg_match(
'/^[a-z]$/',
$char)) {
1347 $this->token = array(
1348 'name' => strtoupper(
$char),
1349 'type' => self::DOCTYPE,
1353 $this->state =
'doctypeName';
1355 } elseif (
$char ===
'>') {
1359 'type' => self::DOCTYPE,
1364 $this->state =
'data';
1366 } elseif ($this->
char === $this->
EOF) {
1370 'type' => self::DOCTYPE,
1376 $this->state =
'data';
1379 $this->token = array(
1381 'type' => self::DOCTYPE,
1385 $this->state =
'doctypeName';
1395 if (preg_match(
'/^[\t\n\x0b\x0c ]$/',
$char)) {
1396 $this->state =
'AfterDoctypeName';
1398 } elseif (
$char ===
'>') {
1400 $this->state =
'data';
1402 } elseif (preg_match(
'/^[a-z]$/',
$char)) {
1403 $this->token[
'name'] .= strtoupper(
$char);
1405 } elseif ($this->
char === $this->
EOF) {
1408 $this->state =
'data';
1411 $this->token[
'name'] .=
$char;
1414 $this->token[
'error'] = ($this->token[
'name'] ===
'HTML')
1425 if (preg_match(
'/^[\t\n\x0b\x0c ]$/',
$char)) {
1428 } elseif (
$char ===
'>') {
1430 $this->state =
'data';
1432 } elseif ($this->
char === $this->
EOF) {
1435 $this->state =
'data';
1438 $this->token[
'error'] =
true;
1439 $this->state =
'bogusDoctype';
1449 if (
$char ===
'>') {
1451 $this->state =
'data';
1453 } elseif ($this->
char === $this->
EOF) {
1456 $this->state =
'data';
1473 switch ($this->
character($this->
char + 1)) {
1479 switch ($this->
character($this->
char + 1)) {
1491 $char_class =
'0-9A-Fa-f';
1500 $char_class =
'0-9';
1508 $entity = $this->
character($start, $this->
char);
1509 $cond = strlen($e_name) > 0;
1519 $e_name = $this->
characters(
'0-9A-Za-z;', $this->
char + 1);
1520 $len = strlen($e_name);
1522 for ($c = 1; $c <= $len; $c++) {
1523 $id = substr($e_name, 0, $c);
1526 if (in_array($id, $this->entities)) {
1527 if ($e_name[$c - 1] !==
';') {
1528 if ($c < $len && $e_name[$c] ==
';') {
1537 $cond = isset($entity);
1545 $this->
char = $start;
1551 return html_entity_decode(
'&' . $entity .
';', ENT_QUOTES,
'UTF-8');
1556 $emit = $this->tree->emitToken(
$token);
1558 if (is_int($emit)) {
1559 $this->content_model = $emit;
1561 } elseif (
$token[
'type'] === self::ENDTAG) {
1568 $this->state = null;
1569 $this->tree->emitToken(
1590 private $scoping = array(
'button',
'caption',
'html',
'marquee',
'object',
'table',
'td',
'th');
1704 $this->dom =
new DOMDocument;
1706 $this->dom->encoding =
'UTF-8';
1707 $this->dom->preserveWhiteSpace =
true;
1708 $this->dom->substituteEntities =
true;
1709 $this->dom->strictErrorChecking =
false;
1715 switch ($this->phase) {
1716 case self::INIT_PHASE:
1719 case self::ROOT_PHASE:
1722 case self::MAIN_PHASE:
1725 case self::END_PHASE :
1744 if ((isset($token[
'error']) && $token[
'error']) ||
1750 !preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data']))
1761 } elseif (isset($token[
'error']) && !$token[
'error']) {
1766 $doctype =
new DOMDocumentType(null, null,
'HTML');
1775 } elseif (isset($token[
'data']) && preg_match(
1776 '/^[\t\n\x0b\x0c ]+$/',
1781 $text = $this->dom->createTextNode($token[
'data']);
1782 $this->dom->appendChild($text);
1799 $comment = $this->dom->createComment($token[
'data']);
1806 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])
1809 $text = $this->dom->createTextNode($token[
'data']);
1810 $this->dom->appendChild($text);
1819 !preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])) ||
1827 $html = $this->dom->createElement(
'html');
1828 $this->dom->appendChild($html);
1829 $this->stack[] = $html;
1845 } elseif ($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'html') {
1853 foreach ($token[
'attr'] as $attr) {
1854 if (!$this->stack[0]->hasAttribute($attr[
'name'])) {
1855 $this->stack[0]->setAttribute($attr[
'name'], $attr[
'value']);
1867 switch ($this->mode) {
1868 case self::BEFOR_HEAD:
1872 return $this->
inHead($token);
1874 case self::AFTER_HEAD:
1878 return $this->
inBody($token);
1880 case self::IN_TABLE:
1881 return $this->
inTable($token);
1883 case self::IN_CAPTION:
1886 case self::IN_CGROUP:
1889 case self::IN_TBODY:
1893 return $this->
inRow($token);
1896 return $this->
inCell($token);
1898 case self::IN_SELECT:
1901 case self::AFTER_BODY:
1904 case self::IN_FRAME:
1907 case self::AFTR_FRAME:
1910 case self::END_PHASE:
1925 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])
1937 } elseif ($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'head') {
1943 $this->head_pointer = $element;
1954 ($token[
'type'] ===
HTML5::ENDTAG && $token[
'name'] ===
'html') ||
1956 '/^[\t\n\x0b\x0c ]$/',
1970 return $this->
inHead($token);
1990 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])) || (
1992 end($this->stack)->nodeName,
1993 array(
'title',
'style',
'script')
2006 in_array($token[
'name'], array(
'title',
'style',
'script'))
2008 array_pop($this->stack);
2012 } elseif ($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'title') {
2016 if ($this->head_pointer !== null) {
2018 $this->head_pointer->appendChild($element);
2028 } elseif ($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'style') {
2032 if ($this->head_pointer !== null) {
2034 $this->head_pointer->appendChild($element);
2044 } elseif ($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'script') {
2047 $this->head_pointer->appendChild($element);
2055 array(
'base',
'link',
'meta')
2061 if ($this->head_pointer !== null) {
2063 $this->head_pointer->appendChild($element);
2064 array_pop($this->stack);
2071 } elseif ($token[
'type'] ===
HTML5::ENDTAG && $token[
'name'] ===
'head') {
2074 if ($this->head_pointer->isSameNode(end($this->stack))) {
2075 array_pop($this->stack);
2086 } elseif (($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'head') ||
2087 ($token[
'type'] ===
HTML5::ENDTAG && $token[
'name'] !==
'html')
2095 if ($this->head_pointer->isSameNode(end($this->stack))) {
2121 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])
2133 } elseif ($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'body') {
2141 } elseif ($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'frameset') {
2152 array(
'base',
'link',
'meta',
'script',
'style',
'title')
2158 return $this->
inHead($token);
2172 return $this->
inBody($token);
2180 switch ($token[
'type']) {
2198 switch ($token[
'name']) {
2205 return $this->
inHead($token);
2216 return $this->
inHead($token);
2225 if (count($this->stack) === 1 || $this->stack[1]->nodeName !==
'body') {
2234 foreach ($token[
'attr'] as $attr) {
2235 if (!$this->stack[1]->hasAttribute($attr[
'name'])) {
2236 $this->stack[1]->setAttribute($attr[
'name'], $attr[
'value']);
2277 if ($this->form_pointer !== null) {
2297 $this->form_pointer = $element;
2317 $stack_length = count($this->stack) - 1;
2319 for (
$n = $stack_length; 0 <=
$n;
$n--) {
2323 $node = $this->stack[
$n];
2329 if ($token[
'name'] === $node->tagName || ($token[
'name'] !==
'li'
2330 && ($node->tagName ===
'dd' || $node->tagName ===
'dt'))
2332 for ($x = $stack_length; $x >=
$n; $x--) {
2333 array_pop($this->stack);
2342 if ($cat !== self::FORMATTING && $cat !== self::PHRASING &&
2343 $node->tagName !==
'address' && $node->tagName !==
'div'
2398 while ($this->
elementInScope(array(
'h1',
'h2',
'h3',
'h4',
'h5',
'h6'))) {
2399 array_pop($this->stack);
2417 $leng = count($this->a_formatting);
2419 for (
$n = $leng - 1;
$n >= 0;
$n--) {
2420 if ($this->a_formatting[
$n] === self::MARKER) {
2423 } elseif ($this->a_formatting[
$n]->nodeName ===
'a') {
2442 $this->a_formatting[] = $el;
2467 $this->a_formatting[] = $el;
2560 array_pop($this->stack);
2580 array_pop($this->stack);
2587 $token[
'name'] =
'img';
2588 return $this->
inBody($token);
2602 $this->form_pointer !== null
2603 ? $this->form_pointer->appendChild($element)
2604 : end($this->stack)->appendChild($element);
2607 array_pop($this->stack);
2617 if ($this->form_pointer === null) {
2660 'This is a searchable index. ' .
2661 'Insert your search keywords here: '
2668 $attr = $token[
'attr'];
2669 $attr[] = array(
'name' =>
'name',
'value' =>
'isindex');
2682 'This is a searchable index. ' .
2683 'Insert your search keywords here: '
2779 case 'event-source':
2802 switch ($token[
'name']) {
2808 if (count($this->stack) < 2 || $this->stack[1]->nodeName !==
'body') {
2813 } elseif (end($this->stack)->nodeName !==
'body') {
2866 for (
$n = count($this->stack) - 1;
$n >= 0;
$n--) {
2867 if ($this->stack[
$n]->nodeName === $token[
'name']) {
2871 array_pop($this->stack);
2886 if (end($this->stack)->nodeName !== $token[
'name']) {
2896 array_pop($this->stack);
2900 $this->form_pointer = null;
2917 for (
$n = count($this->stack) - 1;
$n >= 0;
$n--) {
2919 array_pop($this->stack);
2947 for (
$n = count($this->stack) - 1;
$n >= 0;
$n--) {
2948 if ($this->stack[
$n]->nodeName === $token[
'name']) {
2952 array_pop($this->stack);
2965 $elements = array(
'h1',
'h2',
'h3',
'h4',
'h5',
'h6');
2982 array_pop($this->stack);
3010 for ($a = count($this->a_formatting) - 1; $a >= 0; $a--) {
3011 if ($this->a_formatting[$a] === self::MARKER) {
3014 } elseif ($this->a_formatting[$a]->tagName === $token[
'name']) {
3015 $formatting_element = $this->a_formatting[$a];
3016 $in_stack = in_array($formatting_element, $this->stack,
true);
3026 if (!isset($formatting_element) || ($in_stack &&
3035 } elseif (isset($formatting_element) && !$in_stack) {
3036 unset($this->a_formatting[$fe_af_pos]);
3037 $this->a_formatting = array_merge($this->a_formatting);
3046 $fe_s_pos = array_search($formatting_element, $this->stack,
true);
3047 $length = count($this->stack);
3049 for ($s = $fe_s_pos + 1; $s < $length; $s++) {
3052 if ($category !== self::PHRASING && $category !== self::FORMATTING) {
3053 $furthest_block = $this->stack[$s];
3063 if (!isset($furthest_block)) {
3064 for (
$n = $length - 1;
$n >= $fe_s_pos;
$n--) {
3065 array_pop($this->stack);
3068 unset($this->a_formatting[$fe_af_pos]);
3069 $this->a_formatting = array_merge($this->a_formatting);
3076 $common_ancestor = $this->stack[$fe_s_pos - 1];
3080 if ($furthest_block->parentNode !== null) {
3081 $furthest_block->parentNode->removeChild($furthest_block);
3088 $bookmark = $fe_af_pos;
3092 $node = $furthest_block;
3093 $last_node = $furthest_block;
3096 for (
$n = array_search($node, $this->stack,
true) - 1;
$n >= 0;
$n--) {
3099 $node = $this->stack[
$n];
3105 if (!in_array($node, $this->a_formatting,
true)) {
3106 unset($this->stack[
$n]);
3107 $this->stack = array_merge($this->stack);
3117 if ($node === $formatting_element) {
3124 } elseif ($last_node === $furthest_block) {
3125 $bookmark = array_search($node, $this->a_formatting,
true) + 1;
3134 if ($node->hasChildNodes()) {
3135 $clone = $node->cloneNode();
3136 $s_pos = array_search($node, $this->stack,
true);
3137 $a_pos = array_search($node, $this->a_formatting,
true);
3139 $this->stack[$s_pos] = $clone;
3140 $this->a_formatting[$a_pos] = $clone;
3146 if ($last_node->parentNode !== null) {
3147 $last_node->parentNode->removeChild($last_node);
3150 $node->appendChild($last_node);
3160 if ($last_node->parentNode !== null) {
3161 $last_node->parentNode->removeChild($last_node);
3164 $common_ancestor->appendChild($last_node);
3168 $clone = $formatting_element->cloneNode();
3173 while ($furthest_block->hasChildNodes()) {
3174 $child = $furthest_block->firstChild;
3175 $furthest_block->removeChild($child);
3176 $clone->appendChild($child);
3180 $furthest_block->appendChild($clone);
3186 $fe_af_pos = array_search($formatting_element, $this->a_formatting,
true);
3187 unset($this->a_formatting[$fe_af_pos]);
3188 $this->a_formatting = array_merge($this->a_formatting);
3190 $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1);
3191 $af_part2 = array_slice($this->a_formatting, $bookmark, count($this->a_formatting));
3192 $this->a_formatting = array_merge($af_part1, array($clone), $af_part2);
3199 $fe_s_pos = array_search($formatting_element, $this->stack,
true);
3200 $fb_s_pos = array_search($furthest_block, $this->stack,
true);
3201 unset($this->stack[$fe_s_pos]);
3203 $s_part1 = array_slice($this->stack, 0, $fb_s_pos);
3204 $s_part2 = array_slice($this->stack, $fb_s_pos + 1, count($this->stack));
3205 $this->stack = array_merge($s_part1, array($clone), $s_part2);
3208 unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block);
3232 for (
$n = count($this->stack) - 1;
$n >= 0;
$n--) {
3233 if ($this->stack[
$n]->nodeName === $token[
'name']) {
3237 array_pop($this->stack);
3240 $marker = end(array_keys($this->a_formatting, self::MARKER,
true));
3242 for (
$n = count($this->a_formatting) - 1;
$n > $marker;
$n--) {
3243 array_pop($this->a_formatting);
3276 for (
$n = count($this->stack) - 1;
$n >= 0;
$n--) {
3279 $node = end($this->stack);
3283 if ($token[
'name'] === $node->nodeName) {
3294 for ($x = count($this->stack) -
$n; $x >=
$n; $x--) {
3295 array_pop($this->stack);
3301 if ($category !== self::SPECIAL && $category !== self::SCOPING) {
3318 $clear = array(
'html',
'table');
3324 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])
3327 $text = $this->dom->createTextNode($token[
'data']);
3328 end($this->stack)->appendChild($text);
3334 $comment = $this->dom->createComment($token[
'data']);
3335 end($this->stack)->appendChild(
$comment);
3339 $token[
'name'] ===
'caption'
3355 $token[
'name'] ===
'colgroup'
3367 $token[
'name'] ===
'col'
3371 'name' =>
'colgroup',
3382 array(
'tbody',
'tfoot',
'thead')
3395 in_array($token[
'name'], array(
'td',
'th',
'tr'))
3411 $token[
'name'] ===
'table'
3427 $token[
'name'] ===
'table'
3447 $current = end($this->stack)->nodeName;
3448 array_pop($this->stack);
3450 if ($current ===
'table') {
3489 end($this->stack)->nodeName,
3490 array(
'table',
'tbody',
'tfoot',
'thead',
'tr')
3504 for (
$n = count($this->stack) - 1;
$n >= 0;
$n--) {
3505 if ($this->stack[
$n]->nodeName ===
'table') {
3506 $table = $this->stack[
$n];
3511 if (isset($table) && $table->parentNode !== null) {
3512 $this->foster_parent = $table->parentNode;
3514 } elseif (!isset($table)) {
3515 $this->foster_parent = $this->stack[0];
3517 } elseif (isset($table) && ($table->parentNode === null ||
3518 $table->parentNode->nodeType !== XML_ELEMENT_NODE)
3520 $this->foster_parent = $this->stack[
$n - 1];
3531 if ($token[
'type'] ===
HTML5::ENDTAG && $token[
'name'] ===
'caption') {
3550 $node = end($this->stack)->nodeName;
3551 array_pop($this->stack);
3553 if ($node ===
'caption') {
3583 $token[
'name'] ===
'table')
3590 'name' =>
'caption',
3595 return $this->
inTable($token);
3629 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])
3632 $text = $this->dom->createTextNode($token[
'data']);
3633 end($this->stack)->appendChild($text);
3639 $comment = $this->dom->createComment($token[
'data']);
3640 end($this->stack)->appendChild(
$comment);
3643 } elseif ($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'col') {
3647 array_pop($this->stack);
3651 $token[
'name'] ===
'colgroup'
3655 if (end($this->stack)->nodeName ===
'html') {
3662 array_pop($this->stack);
3667 } elseif ($token[
'type'] ===
HTML5::ENDTAG && $token[
'name'] ===
'col') {
3676 'name' =>
'colgroup',
3681 return $this->
inTable($token);
3687 $clear = array(
'tbody',
'tfoot',
'thead',
'html');
3701 ($token[
'name'] ===
'th' || $token[
'name'] ===
'td')
3713 return $this->
inRow($token);
3717 in_array($token[
'name'], array(
'tbody',
'tfoot',
'thead'))
3732 array_pop($this->stack);
3740 array(
'caption',
'col',
'colgroup',
'tbody',
'tfoor',
'thead')
3747 if (!$this->
elementInScope(array(
'tbody',
'thead',
'tfoot'),
true)) {
3760 'name' => end($this->stack)->nodeName,
3772 array(
'body',
'caption',
'col',
'colgroup',
'html',
'td',
'th',
'tr')
3786 $clear = array(
'tr',
'html');
3790 ($token[
'name'] ===
'th' || $token[
'name'] ===
'td')
3805 } elseif ($token[
'type'] ===
HTML5::ENDTAG && $token[
'name'] ===
'tr') {
3820 array_pop($this->stack);
3828 array(
'caption',
'col',
'colgroup',
'tbody',
'tfoot',
'thead',
'tr')
3840 return $this->
inCell($token);
3844 in_array($token[
'name'], array(
'tbody',
'tfoot',
'thead'))
3863 return $this->
inCell($token);
3870 array(
'body',
'caption',
'col',
'colgroup',
'html',
'td',
'th',
'tr')
3886 ($token[
'name'] ===
'td' || $token[
'name'] ===
'th')
3907 $node = end($this->stack)->nodeName;
3908 array_pop($this->stack);
3910 if ($node === $token[
'name']) {
3951 return $this->
inRow($token);
3981 return $this->
inRow($token);
3988 array(
'body',
'caption',
'col',
'colgroup',
'html')
3997 array(
'table',
'tbody',
'tfoot',
'thead',
'tr')
4011 return $this->
inRow($token);
4038 $token[
'name'] ===
'option'
4042 if (end($this->stack)->nodeName ===
'option') {
4056 $token[
'name'] ===
'optgroup'
4060 if (end($this->stack)->nodeName ===
'option') {
4071 if (end($this->stack)->nodeName ===
'optgroup') {
4074 'name' =>
'optgroup',
4085 $token[
'name'] ===
'optgroup'
4091 $elements_in_stack = count($this->stack);
4093 if ($this->stack[$elements_in_stack - 1]->nodeName ===
'option' &&
4094 $this->stack[$elements_in_stack - 2]->nodeName ===
'optgroup'
4107 if ($this->stack[$elements_in_stack - 1] ===
'optgroup') {
4108 array_pop($this->stack);
4113 $token[
'name'] ===
'option'
4118 if (end($this->stack)->nodeName ===
'option') {
4119 array_pop($this->stack);
4124 $token[
'name'] ===
'select'
4137 $current = end($this->stack)->nodeName;
4138 array_pop($this->stack);
4140 if ($current ===
'select') {
4150 } elseif ($token[
'name'] ===
'select' &&
4210 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])
4221 $comment = $this->dom->createComment($token[
'data']);
4222 $this->stack[0]->appendChild(
$comment);
4225 } elseif ($token[
'type'] ===
HTML5::ENDTAG && $token[
'name'] ===
'html') {
4239 return $this->
inBody($token);
4251 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])
4263 } elseif ($token[
'name'] ===
'frameset' &&
4269 } elseif ($token[
'name'] ===
'frameset' &&
4274 if (end($this->stack)->nodeName ===
'html') {
4280 array_pop($this->stack);
4290 } elseif ($token[
'name'] ===
'frame' &&
4297 array_pop($this->stack);
4300 } elseif ($token[
'name'] ===
'noframes' &&
4320 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])
4332 } elseif ($token[
'name'] ===
'html' &&
4339 } elseif ($token[
'name'] ===
'noframes' &&
4364 $comment = $this->dom->createComment($token[
'data']);
4371 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])
4380 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])) ||
4400 $token[
'name'] = preg_replace(
'/[^a-z0-9-]/i',
'', $token[
'name']);
4402 $token[
'name'] = ltrim($token[
'name'],
'-0..9');
4404 if ($token[
'name'] ===
'') {
4405 $token[
'name'] =
'span';
4409 $el = $this->dom->createElement($token[
'name']);
4411 foreach ($token[
'attr'] as $attr) {
4412 if (!$el->hasAttribute($attr[
'name'])) {
4413 $el->setAttribute($attr[
'name'], $attr[
'value']);
4418 $this->stack[] = $el;
4425 $text = $this->dom->createTextNode($data);
4431 $comment = $this->dom->createComment($data);
4437 if ($this->foster_parent === null) {
4438 end($this->stack)->appendChild($node);
4440 } elseif ($this->foster_parent !== null) {
4447 for (
$n = count($this->stack) - 1;
$n >= 0;
$n--) {
4448 if ($this->stack[
$n]->nodeName ===
'table' &&
4449 $this->stack[
$n]->parentNode !== null
4451 $table = $this->stack[
$n];
4456 if (isset($table) && $this->foster_parent->isSameNode($table->parentNode)) {
4457 $this->foster_parent->insertBefore($node, $table);
4459 $this->foster_parent->appendChild($node);
4462 $this->foster_parent = null;
4468 if (is_array($el)) {
4469 foreach ($el as $element) {
4478 $leng = count($this->stack);
4480 for (
$n = 0;
$n < $leng;
$n++) {
4483 $node = $this->stack[$leng - 1 -
$n];
4485 if ($node->tagName === $el) {
4489 } elseif ($node->tagName ===
'table') {
4494 } elseif ($table ===
true && in_array(
4511 } elseif ($node === $node->ownerDocument->documentElement) {
4530 $formatting_elements = count($this->a_formatting);
4532 if ($formatting_elements === 0) {
4538 $entry = end($this->a_formatting);
4544 if ($entry === self::MARKER || in_array($entry, $this->stack,
true)) {
4548 for ($a = $formatting_elements - 1; $a >= 0;
true) {
4552 $step_seven =
false;
4559 $entry = $this->a_formatting[$a];
4563 if ($entry === self::MARKER || in_array($entry, $this->stack,
true)) {
4571 if (isset($step_seven) && $step_seven ===
true) {
4573 $entry = $this->a_formatting[$a];
4577 $clone = $entry->cloneNode();
4581 end($this->stack)->appendChild($clone);
4582 $this->stack[] = $clone;
4586 $this->a_formatting[$a] = $clone;
4590 if (end($this->a_formatting) !== $clone) {
4607 $entry = end($this->a_formatting);
4610 array_pop($this->a_formatting);
4614 if ($entry === self::MARKER) {
4627 $node = end($this->stack);
4628 $elements = array_diff(array(
'dd',
'dt',
'li',
'p',
'td',
'th',
'tr'),
$exclude);
4630 while (in_array(end($this->stack)->nodeName, $elements)) {
4631 array_pop($this->stack);
4637 $name = $node->tagName;
4638 if (in_array($name, $this->special)) {
4640 } elseif (in_array($name, $this->scoping)) {
4642 } elseif (in_array($name, $this->formatting)) {
4657 $node = end($this->stack)->nodeName;
4659 if (in_array($node, $elements)) {
4662 array_pop($this->stack);
4671 $leng = count($this->stack);
4673 for (
$n = $leng - 1;
$n >= 0;
$n--) {
4675 $node = $this->stack[
$n];
4681 if ($this->stack[0]->isSameNode($node)) {
4687 if ($node->nodeName ===
'select') {
4693 } elseif ($node->nodeName ===
'td' || $node->nodeName ===
'th') {
4699 } elseif ($node->nodeName ===
'tr') {
4705 } elseif (in_array($node->nodeName, array(
'tbody',
'thead',
'tfoot'))) {
4711 } elseif ($node->nodeName ===
'caption') {
4717 } elseif ($node->nodeName ===
'colgroup') {
4723 } elseif ($node->nodeName ===
'table') {
4730 } elseif ($node->nodeName ===
'head') {
4736 } elseif ($node->nodeName ===
'body') {
4742 } elseif ($node->nodeName ===
'frameset') {
4750 } elseif ($node->nodeName ===
'html') {
4751 $this->mode = ($this->head_pointer === null)
4770 foreach (array(
'td',
'th') as $cell) {