28 }
catch (DOMException $e) {
31 $context->register(
'PH5PError', $e);
36 $doc->getElementsByTagName(
'html')->item(0)->
37 getElementsByTagName(
'body')->item(0)
469 $this->state =
'data';
471 while ($this->state !==
null) {
472 $this->{$this->state .
'State'}();
478 return $this->tree->save();
483 return ($this->char < $this->
EOF)
490 if (
$s + $l < $this->
EOF) {
501 return preg_replace(
'#^([' . $char_class .
']+).*#s',
'\\1', substr($this->
data, $start));
510 if (
$char ===
'&' && ($this->content_model === self::PCDATA || $this->content_model === self::RCDATA)) {
515 $this->state =
'entityData';
517 } elseif (
$char ===
'-') {
524 if (($this->content_model === self::RCDATA || $this->content_model ===
525 self::CDATA) && $this->escape ===
false &&
526 $this->
char >= 3 && $this->
character($this->
char - 4, 4) ===
'<!--'
528 $this->escape =
true;
535 'type' => self::CHARACTR,
541 } elseif (
$char ===
'<' && ($this->content_model === self::PCDATA ||
542 (($this->content_model === self::RCDATA ||
543 $this->content_model === self::CDATA) && $this->escape ===
false))
553 $this->state =
'tagOpen';
556 } elseif (
$char ===
'>') {
562 if (($this->content_model === self::RCDATA ||
563 $this->content_model === self::CDATA) && $this->escape ===
true &&
564 $this->
character($this->
char, 3) ===
'-->'
566 $this->escape =
false;
573 'type' => self::CHARACTR,
578 } elseif ($this->
char === $this->
EOF) {
583 } elseif ($this->content_model === self::PLAINTEXT) {
589 'type' => self::CHARACTR,
590 'data' => substr($this->
data, $this->
char)
601 $len = strcspn($this->
data,
'<&', $this->
char);
602 $char = substr($this->
data, $this->
char, $len);
603 $this->
char += $len - 1;
607 'type' => self::CHARACTR,
612 $this->state =
'data';
619 $entity = $this->
entity();
623 $char = (!$entity) ?
'&' : $entity;
626 'type' => self::CHARACTR,
632 $this->state =
'data';
637 switch ($this->content_model) {
645 if ($this->
character($this->
char + 1) ===
'/') {
647 $this->state =
'closeTagOpen';
652 'type' => self::CHARACTR,
657 $this->state =
'data';
670 $this->state =
'markupDeclarationOpen';
672 } elseif (
$char ===
'/') {
675 $this->state =
'closeTagOpen';
677 } elseif (preg_match(
'/^[A-Za-z]$/',
$char)) {
683 $this->token = array(
684 'name' => strtolower(
$char),
685 'type' => self::STARTTAG,
689 $this->state =
'tagName';
691 } elseif (
$char ===
'>') {
697 'type' => self::CHARACTR,
702 $this->state =
'data';
704 } elseif (
$char ===
'?') {
707 $this->state =
'bogusComment';
715 'type' => self::CHARACTR,
721 $this->state =
'data';
729 $next_node = strtolower($this->
characters(
'A-Za-z', $this->
char + 1));
730 $the_same = count($this->tree->stack) > 0 && $next_node === end($this->tree->stack)->nodeName;
732 if (($this->content_model === self::RCDATA || $this->content_model === self::CDATA) &&
733 (!$the_same || ($the_same && (!preg_match(
734 '/[\t\n\x0b\x0c >\/]/',
735 $this->
character($this->
char + 1 + strlen($next_node))
736 ) || $this->
EOF === $this->
char)))
755 'type' => self::CHARACTR,
760 $this->state =
'data';
769 if (preg_match(
'/^[A-Za-z]$/',
$char)) {
775 $this->token = array(
776 'name' => strtolower(
$char),
777 'type' => self::ENDTAG
780 $this->state =
'tagName';
782 } elseif (
$char ===
'>') {
785 $this->state =
'data';
787 } elseif ($this->
char === $this->
EOF) {
793 'type' => self::CHARACTR,
799 $this->state =
'data';
803 $this->state =
'bogusComment';
814 if (preg_match(
'/^[\t\n\x0b\x0c ]$/',
$char)) {
821 $this->state =
'beforeAttributeName';
823 } elseif (
$char ===
'>') {
827 $this->state =
'data';
829 } elseif ($this->
char === $this->
EOF) {
836 $this->state =
'data';
838 } elseif (
$char ===
'/') {
842 $this->state =
'beforeAttributeName';
848 $this->token[
'name'] .= strtolower(
$char);
849 $this->state =
'tagName';
859 if (preg_match(
'/^[\t\n\x0b\x0c ]$/',
$char)) {
866 $this->state =
'beforeAttributeName';
868 } elseif (
$char ===
'>') {
872 $this->state =
'data';
874 } elseif (
$char ===
'/') {
878 $this->state =
'beforeAttributeName';
880 } elseif ($this->
char === $this->
EOF) {
887 $this->state =
'data';
894 $this->token[
'attr'][] = array(
895 'name' => strtolower(
$char),
899 $this->state =
'attributeName';
909 if (preg_match(
'/^[\t\n\x0b\x0c ]$/',
$char)) {
916 $this->state =
'afterAttributeName';
918 } elseif (
$char ===
'=') {
921 $this->state =
'beforeAttributeValue';
923 } elseif (
$char ===
'>') {
927 $this->state =
'data';
929 } elseif (
$char ===
'/' && $this->
character($this->
char + 1) !==
'>') {
933 $this->state =
'beforeAttributeName';
935 } elseif ($this->
char === $this->
EOF) {
942 $this->state =
'data';
948 $last = count($this->token[
'attr']) - 1;
949 $this->token[
'attr'][$last][
'name'] .= strtolower(
$char);
951 $this->state =
'attributeName';
961 if (preg_match(
'/^[\t\n\x0b\x0c ]$/',
$char)) {
968 $this->state =
'afterAttributeName';
970 } elseif (
$char ===
'=') {
973 $this->state =
'beforeAttributeValue';
975 } elseif (
$char ===
'>') {
979 $this->state =
'data';
981 } elseif (
$char ===
'/' && $this->
character($this->
char + 1) !==
'>') {
985 $this->state =
'beforeAttributeName';
987 } elseif ($this->
char === $this->
EOF) {
994 $this->state =
'data';
1001 $this->token[
'attr'][] = array(
1002 'name' => strtolower(
$char),
1006 $this->state =
'attributeName';
1016 if (preg_match(
'/^[\t\n\x0b\x0c ]$/',
$char)) {
1023 $this->state =
'beforeAttributeValue';
1025 } elseif (
$char ===
'"') {
1028 $this->state =
'attributeValueDoubleQuoted';
1030 } elseif (
$char ===
'&') {
1035 $this->state =
'attributeValueUnquoted';
1037 } elseif (
$char ===
'\'') {
1040 $this->state =
'attributeValueSingleQuoted';
1042 } elseif (
$char ===
'>') {
1046 $this->state =
'data';
1052 $last = count($this->token[
'attr']) - 1;
1053 $this->token[
'attr'][$last][
'value'] .=
$char;
1055 $this->state =
'attributeValueUnquoted';
1065 if (
$char ===
'"') {
1068 $this->state =
'beforeAttributeName';
1070 } elseif (
$char ===
'&') {
1075 } elseif ($this->
char === $this->
EOF) {
1082 $this->state =
'data';
1088 $last = count($this->token[
'attr']) - 1;
1089 $this->token[
'attr'][$last][
'value'] .=
$char;
1091 $this->state =
'attributeValueDoubleQuoted';
1101 if (
$char ===
'\'') {
1104 $this->state =
'beforeAttributeName';
1106 } elseif (
$char ===
'&') {
1111 } elseif ($this->
char === $this->
EOF) {
1118 $this->state =
'data';
1124 $last = count($this->token[
'attr']) - 1;
1125 $this->token[
'attr'][$last][
'value'] .=
$char;
1127 $this->state =
'attributeValueSingleQuoted';
1137 if (preg_match(
'/^[\t\n\x0b\x0c ]$/',
$char)) {
1144 $this->state =
'beforeAttributeName';
1146 } elseif (
$char ===
'&') {
1151 } elseif (
$char ===
'>') {
1155 $this->state =
'data';
1161 $last = count($this->token[
'attr']) - 1;
1162 $this->token[
'attr'][$last][
'value'] .=
$char;
1164 $this->state =
'attributeValueUnquoted';
1171 $entity = $this->
entity();
1180 $last = count($this->token[
'attr']) - 1;
1181 $this->token[
'attr'][$last][
'value'] .=
$char;
1198 'type' => self::COMMENT
1202 $this->
char += strlen(
$data);
1205 $this->state =
'data';
1208 if ($this->
char === $this->
EOF) {
1209 $this->
char = $this->
EOF - 1;
1218 if ($this->
character($this->
char + 1, 2) ===
'--') {
1220 $this->state =
'comment';
1221 $this->token = array(
1223 'type' => self::COMMENT
1229 } elseif (strtolower($this->
character($this->
char + 1, 7)) ===
'doctype') {
1231 $this->state =
'doctype';
1238 $this->state =
'bogusComment';
1249 if (
$char ===
'-') {
1251 $this->state =
'commentDash';
1254 } elseif ($this->
char === $this->
EOF) {
1259 $this->state =
'data';
1265 $this->token[
'data'] .=
$char;
1276 if (
$char ===
'-') {
1278 $this->state =
'commentEnd';
1281 } elseif ($this->
char === $this->
EOF) {
1286 $this->state =
'data';
1292 $this->token[
'data'] .=
'-' .
$char;
1293 $this->state =
'comment';
1303 if (
$char ===
'>') {
1305 $this->state =
'data';
1307 } elseif (
$char ===
'-') {
1308 $this->token[
'data'] .=
'-';
1310 } elseif ($this->
char === $this->
EOF) {
1313 $this->state =
'data';
1316 $this->token[
'data'] .=
'--' .
$char;
1317 $this->state =
'comment';
1327 if (preg_match(
'/^[\t\n\x0b\x0c ]$/',
$char)) {
1328 $this->state =
'beforeDoctypeName';
1332 $this->state =
'beforeDoctypeName';
1342 if (preg_match(
'/^[\t\n\x0b\x0c ]$/',
$char)) {
1345 } elseif (preg_match(
'/^[a-z]$/',
$char)) {
1346 $this->token = array(
1347 'name' => strtoupper(
$char),
1348 'type' => self::DOCTYPE,
1352 $this->state =
'doctypeName';
1354 } elseif (
$char ===
'>') {
1358 'type' => self::DOCTYPE,
1363 $this->state =
'data';
1365 } elseif ($this->
char === $this->
EOF) {
1369 'type' => self::DOCTYPE,
1375 $this->state =
'data';
1378 $this->token = array(
1380 'type' => self::DOCTYPE,
1384 $this->state =
'doctypeName';
1394 if (preg_match(
'/^[\t\n\x0b\x0c ]$/',
$char)) {
1395 $this->state =
'AfterDoctypeName';
1397 } elseif (
$char ===
'>') {
1399 $this->state =
'data';
1401 } elseif (preg_match(
'/^[a-z]$/',
$char)) {
1402 $this->token[
'name'] .= strtoupper(
$char);
1404 } elseif ($this->
char === $this->
EOF) {
1407 $this->state =
'data';
1410 $this->token[
'name'] .=
$char;
1413 $this->token[
'error'] = ($this->token[
'name'] ===
'HTML')
1424 if (preg_match(
'/^[\t\n\x0b\x0c ]$/',
$char)) {
1427 } elseif (
$char ===
'>') {
1429 $this->state =
'data';
1431 } elseif ($this->
char === $this->
EOF) {
1434 $this->state =
'data';
1437 $this->token[
'error'] =
true;
1438 $this->state =
'bogusDoctype';
1448 if (
$char ===
'>') {
1450 $this->state =
'data';
1452 } elseif ($this->
char === $this->
EOF) {
1455 $this->state =
'data';
1472 switch ($this->
character($this->
char + 1)) {
1478 switch ($this->
character($this->
char + 1)) {
1490 $char_class =
'0-9A-Fa-f';
1499 $char_class =
'0-9';
1507 $entity = $this->
character($start, $this->
char);
1508 $cond = strlen($e_name) > 0;
1519 $e_name = $this->
characters(
'0-9A-Za-z;', $this->
char + 1);
1520 $len = strlen($e_name);
1522 for ($c = 1; $c <= $len; $c++) {
1523 $id = substr($e_name, 0, $c);
1526 if (in_array(
$id, $this->entities)) {
1527 if ($e_name[$c - 1] !==
';') {
1528 if ($c < $len && $e_name[$c] ==
';') {
1537 $cond = isset($entity);
1545 $this->
char = $start;
1551 return html_entity_decode(
'&' . rtrim($entity,
';') .
';', ENT_QUOTES,
'UTF-8');
1556 $emit = $this->tree->emitToken(
$token);
1558 if (is_int($emit)) {
1559 $this->content_model = $emit;
1561 } elseif (
$token[
'type'] === self::ENDTAG) {
1568 $this->state =
null;
1569 $this->tree->emitToken(
1590 private $scoping = array(
'button',
'caption',
'html',
'marquee',
'object',
'table',
'td',
'th');
1704 $this->dom =
new DOMDocument;
1706 $this->dom->encoding =
'UTF-8';
1707 $this->dom->preserveWhiteSpace =
true;
1708 $this->dom->substituteEntities =
true;
1709 $this->dom->strictErrorChecking =
false;
1715 switch ($this->phase) {
1744 if ((isset($token[
'error']) && $token[
'error']) ||
1750 !preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data']))
1761 } elseif (isset($token[
'error']) && !$token[
'error']) {
1766 $doctype =
new DOMDocumentType(
null,
null,
'HTML');
1775 } elseif (isset($token[
'data']) && preg_match(
1776 '/^[\t\n\x0b\x0c ]+$/',
1781 $text = $this->dom->createTextNode($token[
'data']);
1782 $this->dom->appendChild(
$text);
1799 $comment = $this->dom->createComment($token[
'data']);
1806 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])
1809 $text = $this->dom->createTextNode($token[
'data']);
1810 $this->dom->appendChild(
$text);
1819 !preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])) ||
1827 $html = $this->dom->createElement(
'html');
1828 $this->dom->appendChild(
$html);
1829 $this->stack[] =
$html;
1845 } elseif ($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'html') {
1853 foreach ($token[
'attr'] as $attr) {
1854 if (!$this->stack[0]->hasAttribute($attr[
'name'])) {
1855 $this->stack[0]->setAttribute($attr[
'name'], $attr[
'value']);
1867 switch ($this->mode) {
1872 return $this->
inHead($token);
1878 return $this->
inBody($token);
1881 return $this->
inTable($token);
1893 return $this->
inRow($token);
1896 return $this->
inCell($token);
1925 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])
1937 } elseif ($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'head') {
1943 $this->head_pointer = $element;
1954 ($token[
'type'] ===
HTML5::ENDTAG && $token[
'name'] ===
'html') ||
1956 '/^[\t\n\x0b\x0c ]$/',
1970 return $this->
inHead($token);
1990 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])) || (
1992 end($this->stack)->nodeName,
1993 array(
'title',
'style',
'script')
2006 in_array($token[
'name'], array(
'title',
'style',
'script'))
2008 array_pop($this->stack);
2012 } elseif ($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'title') {
2016 if ($this->head_pointer !==
null) {
2018 $this->head_pointer->appendChild($element);
2028 } elseif ($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'style') {
2032 if ($this->head_pointer !==
null) {
2034 $this->head_pointer->appendChild($element);
2044 } elseif ($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'script') {
2047 $this->head_pointer->appendChild($element);
2055 array(
'base',
'link',
'meta')
2061 if ($this->head_pointer !==
null) {
2063 $this->head_pointer->appendChild($element);
2064 array_pop($this->stack);
2071 } elseif ($token[
'type'] ===
HTML5::ENDTAG && $token[
'name'] ===
'head') {
2074 if ($this->head_pointer->isSameNode(end($this->stack))) {
2075 array_pop($this->stack);
2086 } elseif (($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'head') ||
2087 ($token[
'type'] ===
HTML5::ENDTAG && $token[
'name'] !==
'html')
2095 if ($this->head_pointer->isSameNode(end($this->stack))) {
2121 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])
2133 } elseif ($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'body') {
2141 } elseif ($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'frameset') {
2152 array(
'base',
'link',
'meta',
'script',
'style',
'title')
2158 return $this->
inHead($token);
2172 return $this->
inBody($token);
2180 switch ($token[
'type']) {
2198 switch ($token[
'name']) {
2205 return $this->
inHead($token);
2216 return $this->
inHead($token);
2225 if (count($this->stack) === 1 || $this->stack[1]->nodeName !==
'body') {
2234 foreach ($token[
'attr'] as $attr) {
2235 if (!$this->stack[1]->hasAttribute($attr[
'name'])) {
2236 $this->stack[1]->setAttribute($attr[
'name'], $attr[
'value']);
2277 if ($this->form_pointer !==
null) {
2297 $this->form_pointer = $element;
2317 $stack_length = count($this->stack) - 1;
2319 for (
$n = $stack_length; 0 <=
$n;
$n--) {
2323 $node = $this->stack[
$n];
2329 if ($token[
'name'] === $node->tagName || ($token[
'name'] !==
'li'
2330 && ($node->tagName ===
'dd' || $node->tagName ===
'dt'))
2332 for (
$x = $stack_length;
$x >=
$n;
$x--) {
2333 array_pop($this->stack);
2342 if ($cat !== self::FORMATTING && $cat !== self::PHRASING &&
2343 $node->tagName !==
'address' && $node->tagName !==
'div'
2398 while ($this->
elementInScope(array(
'h1',
'h2',
'h3',
'h4',
'h5',
'h6'))) {
2399 array_pop($this->stack);
2417 $leng = count($this->a_formatting);
2419 for (
$n = $leng - 1;
$n >= 0;
$n--) {
2420 if ($this->a_formatting[
$n] === self::MARKER) {
2423 } elseif ($this->a_formatting[
$n]->nodeName ===
'a') {
2442 $this->a_formatting[] = $el;
2467 $this->a_formatting[] = $el;
2560 array_pop($this->stack);
2580 array_pop($this->stack);
2587 $token[
'name'] =
'img';
2588 return $this->
inBody($token);
2602 $this->form_pointer !==
null
2603 ? $this->form_pointer->appendChild($element)
2604 : end($this->stack)->appendChild($element);
2607 array_pop($this->stack);
2617 if ($this->form_pointer ===
null) {
2660 'This is a searchable index. ' .
2661 'Insert your search keywords here: '
2668 $attr = $token[
'attr'];
2669 $attr[] = array(
'name' =>
'name',
'value' =>
'isindex');
2682 'This is a searchable index. ' .
2683 'Insert your search keywords here: '
2779 case 'event-source':
2802 switch ($token[
'name']) {
2808 if (count($this->stack) < 2 || $this->stack[1]->nodeName !==
'body') {
2813 } elseif (end($this->stack)->nodeName !==
'body') {
2866 for (
$n = count($this->stack) - 1;
$n >= 0;
$n--) {
2867 if ($this->stack[
$n]->nodeName === $token[
'name']) {
2871 array_pop($this->stack);
2886 if (end($this->stack)->nodeName !== $token[
'name']) {
2896 array_pop($this->stack);
2900 $this->form_pointer =
null;
2917 for (
$n = count($this->stack) - 1;
$n >= 0;
$n--) {
2919 array_pop($this->stack);
2947 for (
$n = count($this->stack) - 1;
$n >= 0;
$n--) {
2948 if ($this->stack[
$n]->nodeName === $token[
'name']) {
2952 array_pop($this->stack);
2965 $elements = array(
'h1',
'h2',
'h3',
'h4',
'h5',
'h6');
2982 array_pop($this->stack);
3010 for ($a = count($this->a_formatting) - 1; $a >= 0; $a--) {
3011 if ($this->a_formatting[$a] === self::MARKER) {
3014 } elseif ($this->a_formatting[$a]->tagName === $token[
'name']) {
3015 $formatting_element = $this->a_formatting[$a];
3016 $in_stack = in_array($formatting_element, $this->stack,
true);
3026 if (!isset($formatting_element) || ($in_stack &&
3035 } elseif (isset($formatting_element) && !$in_stack) {
3036 unset($this->a_formatting[$fe_af_pos]);
3037 $this->a_formatting = array_merge($this->a_formatting);
3046 $fe_s_pos = array_search($formatting_element, $this->stack,
true);
3047 $length = count($this->stack);
3049 for (
$s = $fe_s_pos + 1;
$s < $length;
$s++) {
3052 if ($category !== self::PHRASING && $category !== self::FORMATTING) {
3053 $furthest_block = $this->stack[
$s];
3063 if (!isset($furthest_block)) {
3064 for (
$n = $length - 1;
$n >= $fe_s_pos;
$n--) {
3065 array_pop($this->stack);
3068 unset($this->a_formatting[$fe_af_pos]);
3069 $this->a_formatting = array_merge($this->a_formatting);
3076 $common_ancestor = $this->stack[$fe_s_pos - 1];
3080 if ($furthest_block->parentNode !==
null) {
3081 $furthest_block->parentNode->removeChild($furthest_block);
3088 $bookmark = $fe_af_pos;
3092 $node = $furthest_block;
3093 $last_node = $furthest_block;
3096 for (
$n = array_search($node, $this->stack,
true) - 1;
$n >= 0;
$n--) {
3099 $node = $this->stack[
$n];
3105 if (!in_array($node, $this->a_formatting,
true)) {
3106 unset($this->stack[
$n]);
3107 $this->stack = array_merge($this->stack);
3117 if ($node === $formatting_element) {
3124 } elseif ($last_node === $furthest_block) {
3125 $bookmark = array_search($node, $this->a_formatting,
true) + 1;
3134 if ($node->hasChildNodes()) {
3135 $clone = $node->cloneNode();
3136 $s_pos = array_search($node, $this->stack,
true);
3137 $a_pos = array_search($node, $this->a_formatting,
true);
3139 $this->stack[$s_pos] = $clone;
3140 $this->a_formatting[$a_pos] = $clone;
3146 if ($last_node->parentNode !==
null) {
3147 $last_node->parentNode->removeChild($last_node);
3150 $node->appendChild($last_node);
3160 if ($last_node->parentNode !==
null) {
3161 $last_node->parentNode->removeChild($last_node);
3164 $common_ancestor->appendChild($last_node);
3168 $clone = $formatting_element->cloneNode();
3173 while ($furthest_block->hasChildNodes()) {
3174 $child = $furthest_block->firstChild;
3175 $furthest_block->removeChild($child);
3176 $clone->appendChild($child);
3180 $furthest_block->appendChild($clone);
3186 $fe_af_pos = array_search($formatting_element, $this->a_formatting,
true);
3187 unset($this->a_formatting[$fe_af_pos]);
3188 $this->a_formatting = array_merge($this->a_formatting);
3190 $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1);
3191 $af_part2 = array_slice($this->a_formatting, $bookmark, count($this->a_formatting));
3192 $this->a_formatting = array_merge($af_part1, array($clone), $af_part2);
3199 $fe_s_pos = array_search($formatting_element, $this->stack,
true);
3200 $fb_s_pos = array_search($furthest_block, $this->stack,
true);
3201 unset($this->stack[$fe_s_pos]);
3203 $s_part1 = array_slice($this->stack, 0, $fb_s_pos);
3204 $s_part2 = array_slice($this->stack, $fb_s_pos + 1, count($this->stack));
3205 $this->stack = array_merge($s_part1, array($clone), $s_part2);
3208 unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block);
3232 for (
$n = count($this->stack) - 1;
$n >= 0;
$n--) {
3233 if ($this->stack[
$n]->nodeName === $token[
'name']) {
3237 array_pop($this->stack);
3240 $marker = end(array_keys($this->a_formatting, self::MARKER,
true));
3242 for (
$n = count($this->a_formatting) - 1;
$n > $marker;
$n--) {
3243 array_pop($this->a_formatting);
3276 for (
$n = count($this->stack) - 1;
$n >= 0;
$n--) {
3279 $node = end($this->stack);
3283 if ($token[
'name'] === $node->nodeName) {
3294 for (
$x = count($this->stack) -
$n;
$x >=
$n;
$x--) {
3295 array_pop($this->stack);
3301 if ($category !== self::SPECIAL && $category !== self::SCOPING) {
3318 $clear = array(
'html',
'table');
3324 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])
3327 $text = $this->dom->createTextNode($token[
'data']);
3328 end($this->stack)->appendChild(
$text);
3334 $comment = $this->dom->createComment($token[
'data']);
3335 end($this->stack)->appendChild(
$comment);
3339 $token[
'name'] ===
'caption'
3355 $token[
'name'] ===
'colgroup'
3367 $token[
'name'] ===
'col'
3371 'name' =>
'colgroup',
3382 array(
'tbody',
'tfoot',
'thead')
3395 in_array($token[
'name'], array(
'td',
'th',
'tr'))
3411 $token[
'name'] ===
'table'
3427 $token[
'name'] ===
'table'
3447 $current = end($this->stack)->nodeName;
3448 array_pop($this->stack);
3489 end($this->stack)->nodeName,
3490 array(
'table',
'tbody',
'tfoot',
'thead',
'tr')
3504 for (
$n = count($this->stack) - 1;
$n >= 0;
$n--) {
3505 if ($this->stack[
$n]->nodeName ===
'table') {
3512 $this->foster_parent =
$table->parentNode;
3514 } elseif (!isset(
$table)) {
3515 $this->foster_parent = $this->stack[0];
3517 } elseif (isset(
$table) && (
$table->parentNode ===
null ||
3518 $table->parentNode->nodeType !== XML_ELEMENT_NODE)
3520 $this->foster_parent = $this->stack[
$n - 1];
3531 if ($token[
'type'] ===
HTML5::ENDTAG && $token[
'name'] ===
'caption') {
3550 $node = end($this->stack)->nodeName;
3551 array_pop($this->stack);
3553 if ($node ===
'caption') {
3583 $token[
'name'] ===
'table')
3590 'name' =>
'caption',
3595 return $this->
inTable($token);
3629 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])
3632 $text = $this->dom->createTextNode($token[
'data']);
3633 end($this->stack)->appendChild(
$text);
3639 $comment = $this->dom->createComment($token[
'data']);
3640 end($this->stack)->appendChild(
$comment);
3643 } elseif ($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'col') {
3647 array_pop($this->stack);
3651 $token[
'name'] ===
'colgroup'
3655 if (end($this->stack)->nodeName ===
'html') {
3662 array_pop($this->stack);
3667 } elseif ($token[
'type'] ===
HTML5::ENDTAG && $token[
'name'] ===
'col') {
3676 'name' =>
'colgroup',
3681 return $this->
inTable($token);
3687 $clear = array(
'tbody',
'tfoot',
'thead',
'html');
3701 ($token[
'name'] ===
'th' || $token[
'name'] ===
'td')
3713 return $this->
inRow($token);
3717 in_array($token[
'name'], array(
'tbody',
'tfoot',
'thead'))
3732 array_pop($this->stack);
3740 array(
'caption',
'col',
'colgroup',
'tbody',
'tfoor',
'thead')
3747 if (!$this->
elementInScope(array(
'tbody',
'thead',
'tfoot'),
true)) {
3760 'name' => end($this->stack)->nodeName,
3772 array(
'body',
'caption',
'col',
'colgroup',
'html',
'td',
'th',
'tr')
3786 $clear = array(
'tr',
'html');
3790 ($token[
'name'] ===
'th' || $token[
'name'] ===
'td')
3805 } elseif ($token[
'type'] ===
HTML5::ENDTAG && $token[
'name'] ===
'tr') {
3820 array_pop($this->stack);
3828 array(
'caption',
'col',
'colgroup',
'tbody',
'tfoot',
'thead',
'tr')
3840 return $this->
inCell($token);
3844 in_array($token[
'name'], array(
'tbody',
'tfoot',
'thead'))
3863 return $this->
inCell($token);
3870 array(
'body',
'caption',
'col',
'colgroup',
'html',
'td',
'th',
'tr')
3886 ($token[
'name'] ===
'td' || $token[
'name'] ===
'th')
3907 $node = end($this->stack)->nodeName;
3908 array_pop($this->stack);
3910 if ($node === $token[
'name']) {
3951 return $this->
inRow($token);
3981 return $this->
inRow($token);
3988 array(
'body',
'caption',
'col',
'colgroup',
'html')
3997 array(
'table',
'tbody',
'tfoot',
'thead',
'tr')
4011 return $this->
inRow($token);
4038 $token[
'name'] ===
'option'
4042 if (end($this->stack)->nodeName ===
'option') {
4056 $token[
'name'] ===
'optgroup'
4060 if (end($this->stack)->nodeName ===
'option') {
4071 if (end($this->stack)->nodeName ===
'optgroup') {
4074 'name' =>
'optgroup',
4085 $token[
'name'] ===
'optgroup'
4091 $elements_in_stack = count($this->stack);
4093 if ($this->stack[$elements_in_stack - 1]->nodeName ===
'option' &&
4094 $this->stack[$elements_in_stack - 2]->nodeName ===
'optgroup'
4107 if ($this->stack[$elements_in_stack - 1] ===
'optgroup') {
4108 array_pop($this->stack);
4113 $token[
'name'] ===
'option'
4118 if (end($this->stack)->nodeName ===
'option') {
4119 array_pop($this->stack);
4124 $token[
'name'] ===
'select'
4137 $current = end($this->stack)->nodeName;
4138 array_pop($this->stack);
4150 } elseif ($token[
'name'] ===
'select' &&
4210 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])
4221 $comment = $this->dom->createComment($token[
'data']);
4222 $this->stack[0]->appendChild(
$comment);
4225 } elseif ($token[
'type'] ===
HTML5::ENDTAG && $token[
'name'] ===
'html') {
4239 return $this->
inBody($token);
4251 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])
4263 } elseif ($token[
'name'] ===
'frameset' &&
4269 } elseif ($token[
'name'] ===
'frameset' &&
4274 if (end($this->stack)->nodeName ===
'html') {
4280 array_pop($this->stack);
4290 } elseif ($token[
'name'] ===
'frame' &&
4297 array_pop($this->stack);
4300 } elseif ($token[
'name'] ===
'noframes' &&
4320 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])
4332 } elseif ($token[
'name'] ===
'html' &&
4339 } elseif ($token[
'name'] ===
'noframes' &&
4364 $comment = $this->dom->createComment($token[
'data']);
4371 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])
4380 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])) ||
4400 $token[
'name'] = preg_replace(
'/[^a-z0-9-]/i',
'', $token[
'name']);
4402 $token[
'name'] = ltrim($token[
'name'],
'-0..9');
4404 if ($token[
'name'] ===
'') {
4405 $token[
'name'] =
'span';
4409 $el = $this->dom->createElement($token[
'name']);
4411 foreach ($token[
'attr'] as $attr) {
4412 if (!$el->hasAttribute($attr[
'name'])) {
4413 $el->setAttribute($attr[
'name'], $attr[
'value']);
4418 $this->stack[] = $el;
4437 if ($this->foster_parent ===
null) {
4438 end($this->stack)->appendChild($node);
4440 } elseif ($this->foster_parent !==
null) {
4447 for (
$n = count($this->stack) - 1;
$n >= 0;
$n--) {
4448 if ($this->stack[
$n]->nodeName ===
'table' &&
4449 $this->stack[
$n]->parentNode !==
null
4456 if (isset(
$table) && $this->foster_parent->isSameNode(
$table->parentNode)) {
4457 $this->foster_parent->insertBefore($node,
$table);
4459 $this->foster_parent->appendChild($node);
4462 $this->foster_parent =
null;
4468 if (is_array($el)) {
4469 foreach ($el as $element) {
4478 $leng = count($this->stack);
4480 for (
$n = 0;
$n < $leng;
$n++) {
4483 $node = $this->stack[$leng - 1 -
$n];
4485 if ($node->tagName === $el) {
4489 } elseif ($node->tagName ===
'table') {
4494 } elseif (
$table ===
true && in_array(
4511 } elseif ($node === $node->ownerDocument->documentElement) {
4530 $formatting_elements = count($this->a_formatting);
4532 if ($formatting_elements === 0) {
4538 $entry = end($this->a_formatting);
4544 if ($entry === self::MARKER || in_array($entry, $this->stack,
true)) {
4548 for ($a = $formatting_elements - 1; $a >= 0;
true) {
4552 $step_seven =
false;
4559 $entry = $this->a_formatting[$a];
4563 if ($entry === self::MARKER || in_array($entry, $this->stack,
true)) {
4571 if (isset($step_seven) && $step_seven ===
true) {
4573 $entry = $this->a_formatting[$a];
4577 $clone = $entry->cloneNode();
4581 end($this->stack)->appendChild($clone);
4582 $this->stack[] = $clone;
4586 $this->a_formatting[$a] = $clone;
4590 if (end($this->a_formatting) !== $clone) {
4607 $entry = end($this->a_formatting);
4610 array_pop($this->a_formatting);
4614 if ($entry === self::MARKER) {
4627 $node = end($this->stack);
4628 $elements = array_diff(array(
'dd',
'dt',
'li',
'p',
'td',
'th',
'tr'),
$exclude);
4630 while (in_array(end($this->stack)->nodeName, $elements)) {
4631 array_pop($this->stack);
4637 $name = $node->tagName;
4638 if (in_array(
$name, $this->special)) {
4640 } elseif (in_array(
$name, $this->scoping)) {
4642 } elseif (in_array(
$name, $this->formatting)) {
4657 $node = end($this->stack)->nodeName;
4659 if (in_array($node, $elements)) {
4662 array_pop($this->stack);
4671 $leng = count($this->stack);
4673 for (
$n = $leng - 1;
$n >= 0;
$n--) {
4675 $node = $this->stack[
$n];
4681 if ($this->stack[0]->isSameNode($node)) {
4687 if ($node->nodeName ===
'select') {
4693 } elseif ($node->nodeName ===
'td' || $node->nodeName ===
'th') {
4699 } elseif ($node->nodeName ===
'tr') {
4705 } elseif (in_array($node->nodeName, array(
'tbody',
'thead',
'tfoot'))) {
4711 } elseif ($node->nodeName ===
'caption') {
4717 } elseif ($node->nodeName ===
'colgroup') {
4723 } elseif ($node->nodeName ===
'table') {
4730 } elseif ($node->nodeName ===
'head') {
4736 } elseif ($node->nodeName ===
'body') {
4742 } elseif ($node->nodeName ===
'frameset') {
4750 } elseif ($node->nodeName ===
'html') {
4751 $this->mode = ($this->head_pointer ===
null)
4770 foreach (array(
'td',
'th') as $cell) {
const EOF
How fgetc() reports an End Of File.
An exception for terminatinating execution or to throw for unit testing.
getElementCategory($node)
elementInScope($el, $table=false)
clearStackToTableContext($elements)
appendToRealParent($node)
clearTheActiveFormattingElementsUpToTheLastMarker()
insertElement($token, $append=true, $check=false)
generateImpliedEndTags($exclude=array())
reconstructActiveFormattingElements()
beforeAttributeValueState()
beforeAttributeNameState()
attributeValueDoubleQuotedState()
markupDeclarationOpenState()
attributeValueSingleQuotedState()
attributeValueUnquotedState()
characters($char_class, $start)
afterAttributeNameState()
entityInAttributeValueState()
Parser that uses PHP 5's DOM extension (part of the core).
tokenizeDOM($node, &$tokens, $config)
Iterative function that tokenizes a node, putting it into an accumulator.
wrapHTML($html, $config, $context, $use_div=true)
Wraps an HTML fragment in the necessary HTML.
Our in-house implementation of a parser.
Experimental HTML5-based parser using Jeroen van der Meer's PH5P library.
tokenizeHTML($html, $config, $context)
normalize($html, $config, $context)
Takes a piece of HTML and normalizes it by converting entities, fixing encoding, extracting bits,...
if(!array_key_exists('StateId', $_REQUEST)) $id
if(empty($password)) $table