28 }
catch (DOMException $e) {
36 $doc->getElementsByTagName(
'html')->item(0)->
37 getElementsByTagName(
'body')->item(0)
79 private $escape =
false;
80 private $entities = array(
467 $this->content_model = self::PCDATA;
469 $this->state =
'data';
471 while ($this->state !== null) {
472 $this->{$this->state .
'State'}();
478 return $this->tree->save();
483 return ($this->char < $this->
EOF)
484 ? $this->
data[$this->char]
490 if (
$s + $l < $this->
EOF) {
501 return preg_replace(
'#^([' . $char_class .
']+).*#s',
'\\1', substr($this->
data,
$start));
508 $char = $this->char();
510 if ($char ===
'&' && ($this->content_model === self::PCDATA || $this->content_model === self::RCDATA)) {
515 $this->state =
'entityData';
517 } elseif ($char ===
'-') {
524 if (($this->content_model === self::RCDATA || $this->content_model ===
525 self::CDATA) && $this->escape ===
false &&
526 $this->
char >= 3 && $this->character($this->
char - 4, 4) ===
'<!--' 528 $this->escape =
true;
535 'type' => self::CHARACTR,
541 } elseif ($char ===
'<' && ($this->content_model === self::PCDATA ||
542 (($this->content_model === self::RCDATA ||
543 $this->content_model === self::CDATA) && $this->escape ===
false))
553 $this->state =
'tagOpen';
556 } elseif ($char ===
'>') {
562 if (($this->content_model === self::RCDATA ||
563 $this->content_model === self::CDATA) && $this->escape ===
true &&
564 $this->character($this->
char, 3) ===
'-->' 566 $this->escape =
false;
573 'type' => self::CHARACTR,
578 } elseif ($this->
char === $this->
EOF) {
583 } elseif ($this->content_model === self::PLAINTEXT) {
589 'type' => self::CHARACTR,
590 'data' => substr($this->
data, $this->
char)
601 $len = strcspn($this->
data,
'<&', $this->
char);
602 $char = substr($this->
data, $this->
char, $len);
603 $this->
char += $len - 1;
607 'type' => self::CHARACTR,
612 $this->state =
'data';
619 $entity = $this->entity();
623 $char = (!$entity) ?
'&' : $entity;
626 'type' => self::CHARACTR,
632 $this->state =
'data';
637 switch ($this->content_model) {
645 if ($this->character($this->
char + 1) ===
'/') {
647 $this->state =
'closeTagOpen';
652 'type' => self::CHARACTR,
657 $this->state =
'data';
665 $char = $this->char();
670 $this->state =
'markupDeclarationOpen';
672 } elseif ($char ===
'/') {
675 $this->state =
'closeTagOpen';
677 } elseif (preg_match(
'/^[A-Za-z]$/', $char)) {
683 $this->token = array(
684 'name' => strtolower($char),
685 'type' => self::STARTTAG,
689 $this->state =
'tagName';
691 } elseif ($char ===
'>') {
697 'type' => self::CHARACTR,
702 $this->state =
'data';
704 } elseif ($char ===
'?') {
707 $this->state =
'bogusComment';
715 'type' => self::CHARACTR,
721 $this->state =
'data';
729 $next_node = strtolower($this->characters(
'A-Za-z', $this->
char + 1));
730 $the_same = count($this->tree->stack) > 0 && $next_node === end($this->tree->stack)->nodeName;
732 if (($this->content_model === self::RCDATA || $this->content_model === self::CDATA) &&
733 (!$the_same || ($the_same && (!preg_match(
734 '/[\t\n\x0b\x0c >\/]/',
735 $this->character($this->
char + 1 + strlen($next_node))
736 ) || $this->
EOF === $this->
char)))
755 'type' => self::CHARACTR,
760 $this->state =
'data';
767 $char = $this->char();
769 if (preg_match(
'/^[A-Za-z]$/', $char)) {
775 $this->token = array(
776 'name' => strtolower($char),
777 'type' => self::ENDTAG
780 $this->state =
'tagName';
782 } elseif ($char ===
'>') {
785 $this->state =
'data';
787 } elseif ($this->
char === $this->
EOF) {
793 'type' => self::CHARACTR,
799 $this->state =
'data';
803 $this->state =
'bogusComment';
812 $char = $this->character($this->
char);
814 if (preg_match(
'/^[\t\n\x0b\x0c ]$/', $char)) {
821 $this->state =
'beforeAttributeName';
823 } elseif ($char ===
'>') {
826 $this->emitToken($this->token);
827 $this->state =
'data';
829 } elseif ($this->
char === $this->
EOF) {
833 $this->emitToken($this->token);
836 $this->state =
'data';
838 } elseif ($char ===
'/') {
842 $this->state =
'beforeAttributeName';
848 $this->token[
'name'] .= strtolower($char);
849 $this->state =
'tagName';
857 $char = $this->character($this->
char);
859 if (preg_match(
'/^[\t\n\x0b\x0c ]$/', $char)) {
866 $this->state =
'beforeAttributeName';
868 } elseif ($char ===
'>') {
871 $this->emitToken($this->token);
872 $this->state =
'data';
874 } elseif ($char ===
'/') {
878 $this->state =
'beforeAttributeName';
880 } elseif ($this->
char === $this->
EOF) {
884 $this->emitToken($this->token);
887 $this->state =
'data';
894 $this->token[
'attr'][] = array(
895 'name' => strtolower($char),
899 $this->state =
'attributeName';
907 $char = $this->character($this->
char);
909 if (preg_match(
'/^[\t\n\x0b\x0c ]$/', $char)) {
916 $this->state =
'afterAttributeName';
918 } elseif ($char ===
'=') {
921 $this->state =
'beforeAttributeValue';
923 } elseif ($char ===
'>') {
926 $this->emitToken($this->token);
927 $this->state =
'data';
929 } elseif ($char ===
'/' && $this->character($this->
char + 1) !==
'>') {
933 $this->state =
'beforeAttributeName';
935 } elseif ($this->
char === $this->
EOF) {
939 $this->emitToken($this->token);
942 $this->state =
'data';
948 $last = count($this->token[
'attr']) - 1;
949 $this->token[
'attr'][$last][
'name'] .= strtolower($char);
951 $this->state =
'attributeName';
959 $char = $this->character($this->
char);
961 if (preg_match(
'/^[\t\n\x0b\x0c ]$/', $char)) {
968 $this->state =
'afterAttributeName';
970 } elseif ($char ===
'=') {
973 $this->state =
'beforeAttributeValue';
975 } elseif ($char ===
'>') {
978 $this->emitToken($this->token);
979 $this->state =
'data';
981 } elseif ($char ===
'/' && $this->character($this->
char + 1) !==
'>') {
985 $this->state =
'beforeAttributeName';
987 } elseif ($this->
char === $this->
EOF) {
991 $this->emitToken($this->token);
994 $this->state =
'data';
1001 $this->token[
'attr'][] = array(
1002 'name' => strtolower($char),
1006 $this->state =
'attributeName';
1014 $char = $this->character($this->
char);
1016 if (preg_match(
'/^[\t\n\x0b\x0c ]$/', $char)) {
1023 $this->state =
'beforeAttributeValue';
1025 } elseif ($char ===
'"') {
1028 $this->state =
'attributeValueDoubleQuoted';
1030 } elseif ($char ===
'&') {
1035 $this->state =
'attributeValueUnquoted';
1037 } elseif ($char ===
'\'') {
1040 $this->state =
'attributeValueSingleQuoted';
1042 } elseif ($char ===
'>') {
1045 $this->emitToken($this->token);
1046 $this->state =
'data';
1052 $last = count($this->token[
'attr']) - 1;
1053 $this->token[
'attr'][$last][
'value'] .= $char;
1055 $this->state =
'attributeValueUnquoted';
1063 $char = $this->character($this->
char);
1065 if ($char ===
'"') {
1068 $this->state =
'beforeAttributeName';
1070 } elseif ($char ===
'&') {
1073 $this->entityInAttributeValueState(
'double');
1075 } elseif ($this->
char === $this->
EOF) {
1079 $this->emitToken($this->token);
1082 $this->state =
'data';
1088 $last = count($this->token[
'attr']) - 1;
1089 $this->token[
'attr'][$last][
'value'] .= $char;
1091 $this->state =
'attributeValueDoubleQuoted';
1099 $char = $this->character($this->
char);
1101 if ($char ===
'\'') {
1104 $this->state =
'beforeAttributeName';
1106 } elseif ($char ===
'&') {
1109 $this->entityInAttributeValueState(
'single');
1111 } elseif ($this->
char === $this->
EOF) {
1115 $this->emitToken($this->token);
1118 $this->state =
'data';
1124 $last = count($this->token[
'attr']) - 1;
1125 $this->token[
'attr'][$last][
'value'] .= $char;
1127 $this->state =
'attributeValueSingleQuoted';
1135 $char = $this->character($this->
char);
1137 if (preg_match(
'/^[\t\n\x0b\x0c ]$/', $char)) {
1144 $this->state =
'beforeAttributeName';
1146 } elseif ($char ===
'&') {
1149 $this->entityInAttributeValueState();
1151 } elseif ($char ===
'>') {
1154 $this->emitToken($this->token);
1155 $this->state =
'data';
1161 $last = count($this->token[
'attr']) - 1;
1162 $this->token[
'attr'][$last][
'value'] .= $char;
1164 $this->state =
'attributeValueUnquoted';
1171 $entity = $this->entity();
1180 $last = count($this->token[
'attr']) - 1;
1181 $this->token[
'attr'][$last][
'value'] .= $char;
1194 $data = $this->characters(
'^>', $this->
char);
1198 'type' => self::COMMENT
1202 $this->
char += strlen(
$data);
1205 $this->state =
'data';
1208 if ($this->
char === $this->
EOF) {
1209 $this->
char = $this->
EOF - 1;
1218 if ($this->character($this->
char + 1, 2) ===
'--') {
1220 $this->state =
'comment';
1221 $this->token = array(
1223 'type' => self::COMMENT
1229 } elseif (strtolower($this->character($this->
char + 1, 7)) ===
'doctype') {
1231 $this->state =
'doctype';
1238 $this->state =
'bogusComment';
1246 $char = $this->char();
1249 if ($char ===
'-') {
1251 $this->state =
'commentDash';
1254 } elseif ($this->
char === $this->
EOF) {
1257 $this->emitToken($this->token);
1259 $this->state =
'data';
1265 $this->token[
'data'] .= $char;
1273 $char = $this->char();
1276 if ($char ===
'-') {
1278 $this->state =
'commentEnd';
1281 } elseif ($this->
char === $this->
EOF) {
1284 $this->emitToken($this->token);
1286 $this->state =
'data';
1292 $this->token[
'data'] .=
'-' . $char;
1293 $this->state =
'comment';
1301 $char = $this->char();
1303 if ($char ===
'>') {
1304 $this->emitToken($this->token);
1305 $this->state =
'data';
1307 } elseif ($char ===
'-') {
1308 $this->token[
'data'] .=
'-';
1310 } elseif ($this->
char === $this->
EOF) {
1311 $this->emitToken($this->token);
1313 $this->state =
'data';
1316 $this->token[
'data'] .=
'--' . $char;
1317 $this->state =
'comment';
1325 $char = $this->char();
1327 if (preg_match(
'/^[\t\n\x0b\x0c ]$/', $char)) {
1328 $this->state =
'beforeDoctypeName';
1332 $this->state =
'beforeDoctypeName';
1340 $char = $this->char();
1342 if (preg_match(
'/^[\t\n\x0b\x0c ]$/', $char)) {
1345 } elseif (preg_match(
'/^[a-z]$/', $char)) {
1346 $this->token = array(
1347 'name' => strtoupper($char),
1348 'type' => self::DOCTYPE,
1352 $this->state =
'doctypeName';
1354 } elseif ($char ===
'>') {
1358 'type' => self::DOCTYPE,
1363 $this->state =
'data';
1365 } elseif ($this->
char === $this->
EOF) {
1369 'type' => self::DOCTYPE,
1375 $this->state =
'data';
1378 $this->token = array(
1380 'type' => self::DOCTYPE,
1384 $this->state =
'doctypeName';
1392 $char = $this->char();
1394 if (preg_match(
'/^[\t\n\x0b\x0c ]$/', $char)) {
1395 $this->state =
'AfterDoctypeName';
1397 } elseif ($char ===
'>') {
1398 $this->emitToken($this->token);
1399 $this->state =
'data';
1401 } elseif (preg_match(
'/^[a-z]$/', $char)) {
1402 $this->token[
'name'] .= strtoupper($char);
1404 } elseif ($this->
char === $this->
EOF) {
1405 $this->emitToken($this->token);
1407 $this->state =
'data';
1410 $this->token[
'name'] .= $char;
1413 $this->token[
'error'] = ($this->token[
'name'] ===
'HTML')
1422 $char = $this->char();
1424 if (preg_match(
'/^[\t\n\x0b\x0c ]$/', $char)) {
1427 } elseif ($char ===
'>') {
1428 $this->emitToken($this->token);
1429 $this->state =
'data';
1431 } elseif ($this->
char === $this->
EOF) {
1432 $this->emitToken($this->token);
1434 $this->state =
'data';
1437 $this->token[
'error'] =
true;
1438 $this->state =
'bogusDoctype';
1446 $char = $this->char();
1448 if ($char ===
'>') {
1449 $this->emitToken($this->token);
1450 $this->state =
'data';
1452 } elseif ($this->
char === $this->
EOF) {
1453 $this->emitToken($this->token);
1455 $this->state =
'data';
1472 switch ($this->character($this->
char + 1)) {
1478 switch ($this->character($this->
char + 1)) {
1490 $char_class =
'0-9A-Fa-f';
1499 $char_class =
'0-9';
1506 $e_name = $this->characters($char_class, $this->
char + $char + 1);
1507 $entity = $this->character(
$start, $this->
char);
1508 $cond = strlen($e_name) > 0;
1519 $e_name = $this->characters(
'0-9A-Za-z;', $this->
char + 1);
1520 $len = strlen($e_name);
1522 for (
$c = 1;
$c <= $len;
$c++) {
1523 $id = substr($e_name, 0,
$c);
1526 if (in_array(
$id, $this->entities)) {
1527 if ($e_name[
$c - 1] !==
';') {
1528 if (
$c < $len && $e_name[
$c] ==
';') {
1537 $cond = isset($entity);
1551 return html_entity_decode(
'&' . rtrim($entity,
';') .
';', ENT_QUOTES,
'UTF-8');
1556 $emit = $this->tree->emitToken(
$token);
1558 if (is_int($emit)) {
1559 $this->content_model = $emit;
1561 } elseif (
$token[
'type'] === self::ENDTAG) {
1562 $this->content_model = self::PCDATA;
1568 $this->state = null;
1569 $this->tree->emitToken(
1579 public $stack = array();
1584 private $foster_parent = null;
1585 private $a_formatting = array();
1587 private $head_pointer = null;
1588 private $form_pointer = null;
1590 private $scoping = array(
'button',
'caption',
'html',
'marquee',
'object',
'table',
'td',
'th');
1591 private $formatting = array(
1606 private $special = array(
1671 const INIT_PHASE = 0;
1672 const ROOT_PHASE = 1;
1673 const MAIN_PHASE = 2;
1674 const END_PHASE = 3;
1677 const BEFOR_HEAD = 0;
1679 const AFTER_HEAD = 2;
1682 const IN_CAPTION = 5;
1683 const IN_CGROUP = 6;
1687 const IN_SELECT = 10;
1688 const AFTER_BODY = 11;
1689 const IN_FRAME = 12;
1690 const AFTR_FRAME = 13;
1695 const FORMATTING = 2;
1702 $this->phase = self::INIT_PHASE;
1703 $this->mode = self::BEFOR_HEAD;
1706 $this->dom->encoding =
'UTF-8';
1707 $this->dom->preserveWhiteSpace =
true;
1708 $this->dom->substituteEntities =
true;
1709 $this->dom->strictErrorChecking =
false;
1715 switch ($this->phase) {
1716 case self::INIT_PHASE:
1717 return $this->initPhase(
$token);
1719 case self::ROOT_PHASE:
1720 return $this->rootElementPhase(
$token);
1722 case self::MAIN_PHASE:
1723 return $this->mainPhase(
$token);
1725 case self::END_PHASE :
1726 return $this->trailingEndPhase(
$token);
1750 !preg_match(
'/^[\t\n\x0b\x0c ]+$/',
$token[
'data']))
1757 $this->phase = self::ROOT_PHASE;
1758 return $this->rootElementPhase(
$token);
1761 } elseif (isset(
$token[
'error']) && !
$token[
'error']) {
1766 $doctype =
new DOMDocumentType(null, null,
'HTML');
1770 $this->phase = self::ROOT_PHASE;
1775 } elseif (isset(
$token[
'data']) && preg_match(
1776 '/^[\t\n\x0b\x0c ]+$/',
1781 $text = $this->dom->createTextNode(
$token[
'data']);
1782 $this->dom->appendChild(
$text);
1806 preg_match(
'/^[\t\n\x0b\x0c ]+$/',
$token[
'data'])
1809 $text = $this->dom->createTextNode(
$token[
'data']);
1810 $this->dom->appendChild(
$text);
1819 !preg_match(
'/^[\t\n\x0b\x0c ]+$/',
$token[
'data'])) ||
1827 $html = $this->dom->createElement(
'html');
1828 $this->dom->appendChild(
$html);
1829 $this->stack[] =
$html;
1831 $this->phase = self::MAIN_PHASE;
1832 return $this->mainPhase(
$token);
1853 foreach (
$token[
'attr'] as $attr) {
1854 if (!$this->stack[0]->hasAttribute($attr[
'name'])) {
1855 $this->stack[0]->setAttribute($attr[
'name'], $attr[
'value']);
1862 $this->generateImpliedEndTags();
1867 switch ($this->mode) {
1868 case self::BEFOR_HEAD:
1869 return $this->beforeHead(
$token);
1872 return $this->inHead(
$token);
1874 case self::AFTER_HEAD:
1875 return $this->afterHead(
$token);
1878 return $this->inBody(
$token);
1880 case self::IN_TABLE:
1881 return $this->inTable(
$token);
1883 case self::IN_CAPTION:
1884 return $this->inCaption(
$token);
1886 case self::IN_CGROUP:
1887 return $this->inColumnGroup(
$token);
1889 case self::IN_TBODY:
1890 return $this->inTableBody(
$token);
1893 return $this->inRow(
$token);
1896 return $this->inCell(
$token);
1898 case self::IN_SELECT:
1899 return $this->inSelect(
$token);
1901 case self::AFTER_BODY:
1902 return $this->afterBody(
$token);
1904 case self::IN_FRAME:
1905 return $this->inFrameset(
$token);
1907 case self::AFTR_FRAME:
1908 return $this->afterFrameset(
$token);
1910 case self::END_PHASE:
1911 return $this->trailingEndPhase(
$token);
1925 preg_match(
'/^[\t\n\x0b\x0c ]+$/',
$token[
'data'])
1928 $this->insertText(
$token[
'data']);
1934 $this->insertComment(
$token[
'data']);
1940 $element = $this->insertElement(
$token);
1943 $this->head_pointer = $element;
1946 $this->mode = self::IN_HEAD;
1956 '/^[\t\n\x0b\x0c ]$/',
1970 return $this->inHead(
$token);
1990 preg_match(
'/^[\t\n\x0b\x0c ]+$/',
$token[
'data'])) || (
1992 end($this->stack)->nodeName,
1993 array(
'title',
'style',
'script')
1997 $this->insertText(
$token[
'data']);
2003 $this->insertComment(
$token[
'data']);
2006 in_array(
$token[
'name'], array(
'title',
'style',
'script'))
2008 array_pop($this->stack);
2016 if ($this->head_pointer !== null) {
2017 $element = $this->insertElement(
$token,
false);
2018 $this->head_pointer->appendChild($element);
2021 $element = $this->insertElement(
$token);
2032 if ($this->head_pointer !== null) {
2033 $element = $this->insertElement(
$token,
false);
2034 $this->head_pointer->appendChild($element);
2037 $this->insertElement(
$token);
2046 $element = $this->insertElement(
$token,
false);
2047 $this->head_pointer->appendChild($element);
2055 array(
'base',
'link',
'meta')
2061 if ($this->head_pointer !== null) {
2062 $element = $this->insertElement(
$token,
false);
2063 $this->head_pointer->appendChild($element);
2064 array_pop($this->stack);
2067 $this->insertElement(
$token);
2074 if ($this->head_pointer->isSameNode(end($this->stack))) {
2075 array_pop($this->stack);
2083 $this->mode = self::AFTER_HEAD;
2095 if ($this->head_pointer->isSameNode(end($this->stack))) {
2105 $this->mode = self::AFTER_HEAD;
2109 return $this->afterHead(
$token);
2121 preg_match(
'/^[\t\n\x0b\x0c ]+$/',
$token[
'data'])
2124 $this->insertText(
$token[
'data']);
2130 $this->insertComment(
$token[
'data']);
2135 $this->insertElement(
$token);
2138 $this->mode = self::IN_BODY;
2143 $this->insertElement(
$token);
2146 $this->mode = self::IN_FRAME;
2152 array(
'base',
'link',
'meta',
'script',
'style',
'title')
2157 $this->mode = self::IN_HEAD;
2158 return $this->inHead(
$token);
2172 return $this->inBody(
$token);
2180 switch (
$token[
'type']) {
2184 $this->reconstructActiveFormattingElements();
2187 $this->insertText(
$token[
'data']);
2194 $this->insertComment(
$token[
'data']);
2198 switch (
$token[
'name']) {
2205 return $this->inHead(
$token);
2216 return $this->inHead(
$token);
2225 if (count($this->stack) === 1 || $this->stack[1]->nodeName !==
'body') {
2234 foreach (
$token[
'attr'] as $attr) {
2235 if (!$this->stack[1]->hasAttribute($attr[
'name'])) {
2236 $this->stack[1]->setAttribute($attr[
'name'], $attr[
'value']);
2260 if ($this->elementInScope(
'p')) {
2270 $this->insertElement(
$token);
2277 if ($this->form_pointer !== null) {
2285 if ($this->elementInScope(
'p')) {
2296 $element = $this->insertElement(
$token);
2297 $this->form_pointer = $element;
2308 if ($this->elementInScope(
'p')) {
2317 $stack_length = count($this->stack) - 1;
2319 for (
$n = $stack_length; 0 <=
$n;
$n--) {
2323 $node = $this->stack[
$n];
2324 $cat = $this->getElementCategory($node->tagName);
2329 if (
$token[
'name'] === $node->tagName || (
$token[
'name'] !==
'li' 2330 && ($node->tagName ===
'dd' || $node->tagName ===
'dt'))
2332 for (
$x = $stack_length;
$x >=
$n;
$x--) {
2333 array_pop($this->stack);
2342 if ($cat !== self::FORMATTING && $cat !== self::PHRASING &&
2343 $node->tagName !==
'address' && $node->tagName !==
'div' 2351 $this->insertElement(
$token);
2359 if ($this->elementInScope(
'p')) {
2369 $this->insertElement(
$token);
2384 if ($this->elementInScope(
'p')) {
2398 while ($this->elementInScope(array(
'h1',
'h2',
'h3',
'h4',
'h5',
'h6'))) {
2399 array_pop($this->stack);
2403 $this->insertElement(
$token);
2417 $leng = count($this->a_formatting);
2419 for (
$n = $leng - 1;
$n >= 0;
$n--) {
2420 if ($this->a_formatting[
$n] === self::MARKER) {
2423 } elseif ($this->a_formatting[
$n]->nodeName ===
'a') {
2435 $this->reconstructActiveFormattingElements();
2438 $el = $this->insertElement(
$token);
2442 $this->a_formatting[] = $el;
2460 $this->reconstructActiveFormattingElements();
2463 $el = $this->insertElement(
$token);
2467 $this->a_formatting[] = $el;
2476 if ($this->elementInScope(
'button')) {
2486 $this->reconstructActiveFormattingElements();
2489 $this->insertElement(
$token);
2493 $this->a_formatting[] = self::MARKER;
2500 $this->reconstructActiveFormattingElements();
2503 $this->insertElement(
$token);
2507 $this->a_formatting[] = self::MARKER;
2513 $this->reconstructActiveFormattingElements();
2516 $this->insertElement(
$token);
2526 if ($this->elementInScope(
'p')) {
2536 $this->insertElement(
$token);
2539 $this->mode = self::IN_TABLE;
2554 $this->reconstructActiveFormattingElements();
2557 $this->insertElement(
$token);
2560 array_pop($this->stack);
2567 if ($this->elementInScope(
'p')) {
2577 $this->insertElement(
$token);
2580 array_pop($this->stack);
2588 return $this->inBody(
$token);
2594 $this->reconstructActiveFormattingElements();
2597 $element = $this->insertElement(
$token,
false);
2602 $this->form_pointer !== null
2603 ? $this->form_pointer->appendChild($element)
2604 : end($this->stack)->appendChild($element);
2607 array_pop($this->stack);
2617 if ($this->form_pointer === null) {
2660 'This is a searchable index. ' .
2661 'Insert your search keywords here: ' 2669 $attr[] = array(
'name' =>
'name',
'value' =>
'isindex');
2682 'This is a searchable index. ' .
2683 'Insert your search keywords here: ' 2726 $this->insertElement(
$token);
2738 $this->insertElement(
$token);
2747 $this->reconstructActiveFormattingElements();
2750 $this->insertElement(
$token);
2753 $this->mode = self::IN_SELECT;
2779 case 'event-source':
2794 $this->reconstructActiveFormattingElements();
2796 $this->insertElement(
$token,
true,
true);
2802 switch (
$token[
'name']) {
2808 if (count($this->stack) < 2 || $this->stack[1]->nodeName !==
'body') {
2813 } elseif (end($this->stack)->nodeName !==
'body') {
2818 $this->mode = self::AFTER_BODY;
2833 return $this->afterBody(
$token);
2854 if ($this->elementInScope(
$token[
'name'])) {
2855 $this->generateImpliedEndTags();
2866 for (
$n = count($this->stack) - 1;
$n >= 0;
$n--) {
2867 if ($this->stack[
$n]->nodeName ===
$token[
'name']) {
2871 array_pop($this->stack);
2881 if ($this->elementInScope(
$token[
'name'])) {
2882 $this->generateImpliedEndTags();
2886 if (end($this->stack)->nodeName !==
$token[
'name']) {
2896 array_pop($this->stack);
2900 $this->form_pointer = null;
2907 if ($this->elementInScope(
'p')) {
2908 $this->generateImpliedEndTags(array(
'p'));
2917 for (
$n = count($this->stack) - 1;
$n >= 0;
$n--) {
2918 if ($this->elementInScope(
'p')) {
2919 array_pop($this->stack);
2936 if ($this->elementInScope(
$token[
'name'])) {
2937 $this->generateImpliedEndTags(array(
$token[
'name']));
2947 for (
$n = count($this->stack) - 1;
$n >= 0;
$n--) {
2948 if ($this->stack[
$n]->nodeName ===
$token[
'name']) {
2952 array_pop($this->stack);
2965 $elements = array(
'h1',
'h2',
'h3',
'h4',
'h5',
'h6');
2970 if ($this->elementInScope($elements)) {
2971 $this->generateImpliedEndTags();
2981 while ($this->elementInScope($elements)) {
2982 array_pop($this->stack);
3010 for ($a = count($this->a_formatting) - 1; $a >= 0; $a--) {
3011 if ($this->a_formatting[$a] === self::MARKER) {
3014 } elseif ($this->a_formatting[$a]->tagName ===
$token[
'name']) {
3015 $formatting_element = $this->a_formatting[$a];
3016 $in_stack = in_array($formatting_element, $this->stack,
true);
3026 if (!isset($formatting_element) || ($in_stack &&
3027 !$this->elementInScope(
$token[
'name']))
3035 } elseif (isset($formatting_element) && !$in_stack) {
3036 unset($this->a_formatting[$fe_af_pos]);
3037 $this->a_formatting = array_merge($this->a_formatting);
3046 $fe_s_pos = array_search($formatting_element, $this->stack,
true);
3047 $length = count($this->stack);
3049 for (
$s = $fe_s_pos + 1;
$s < $length;
$s++) {
3050 $category = $this->getElementCategory($this->stack[
$s]->nodeName);
3052 if ($category !== self::PHRASING && $category !== self::FORMATTING) {
3053 $furthest_block = $this->stack[
$s];
3063 if (!isset($furthest_block)) {
3064 for (
$n = $length - 1;
$n >= $fe_s_pos;
$n--) {
3065 array_pop($this->stack);
3068 unset($this->a_formatting[$fe_af_pos]);
3069 $this->a_formatting = array_merge($this->a_formatting);
3076 $common_ancestor = $this->stack[$fe_s_pos - 1];
3080 if ($furthest_block->parentNode !== null) {
3081 $furthest_block->parentNode->removeChild($furthest_block);
3088 $bookmark = $fe_af_pos;
3092 $node = $furthest_block;
3093 $last_node = $furthest_block;
3096 for (
$n = array_search($node, $this->stack,
true) - 1;
$n >= 0;
$n--) {
3099 $node = $this->stack[
$n];
3105 if (!in_array($node, $this->a_formatting,
true)) {
3106 unset($this->stack[
$n]);
3107 $this->stack = array_merge($this->stack);
3117 if ($node === $formatting_element) {
3124 } elseif ($last_node === $furthest_block) {
3125 $bookmark = array_search($node, $this->a_formatting,
true) + 1;
3134 if ($node->hasChildNodes()) {
3135 $clone = $node->cloneNode();
3136 $s_pos = array_search($node, $this->stack,
true);
3137 $a_pos = array_search($node, $this->a_formatting,
true);
3139 $this->stack[$s_pos] = $clone;
3140 $this->a_formatting[$a_pos] = $clone;
3146 if ($last_node->parentNode !== null) {
3147 $last_node->parentNode->removeChild($last_node);
3150 $node->appendChild($last_node);
3160 if ($last_node->parentNode !== null) {
3161 $last_node->parentNode->removeChild($last_node);
3164 $common_ancestor->appendChild($last_node);
3168 $clone = $formatting_element->cloneNode();
3173 while ($furthest_block->hasChildNodes()) {
3174 $child = $furthest_block->firstChild;
3175 $furthest_block->removeChild($child);
3176 $clone->appendChild($child);
3180 $furthest_block->appendChild($clone);
3186 $fe_af_pos = array_search($formatting_element, $this->a_formatting,
true);
3187 unset($this->a_formatting[$fe_af_pos]);
3188 $this->a_formatting = array_merge($this->a_formatting);
3190 $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1);
3191 $af_part2 = array_slice($this->a_formatting, $bookmark, count($this->a_formatting));
3192 $this->a_formatting = array_merge($af_part1, array($clone), $af_part2);
3199 $fe_s_pos = array_search($formatting_element, $this->stack,
true);
3200 $fb_s_pos = array_search($furthest_block, $this->stack,
true);
3201 unset($this->stack[$fe_s_pos]);
3203 $s_part1 = array_slice($this->stack, 0, $fb_s_pos);
3204 $s_part2 = array_slice($this->stack, $fb_s_pos + 1, count($this->stack));
3205 $this->stack = array_merge($s_part1, array($clone), $s_part2);
3208 unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block);
3220 if ($this->elementInScope(
$token[
'name'])) {
3221 $this->generateImpliedEndTags();
3232 for (
$n = count($this->stack) - 1;
$n >= 0;
$n--) {
3233 if ($this->stack[
$n]->nodeName ===
$token[
'name']) {
3237 array_pop($this->stack);
3240 $marker = end(array_keys($this->a_formatting, self::MARKER,
true));
3242 for (
$n = count($this->a_formatting) - 1;
$n > $marker;
$n--) {
3243 array_pop($this->a_formatting);
3276 for (
$n = count($this->stack) - 1;
$n >= 0;
$n--) {
3279 $node = end($this->stack);
3283 if (
$token[
'name'] === $node->nodeName) {
3285 $this->generateImpliedEndTags();
3294 for (
$x = count($this->stack) -
$n;
$x >=
$n;
$x--) {
3295 array_pop($this->stack);
3299 $category = $this->getElementCategory($node);
3301 if ($category !== self::SPECIAL && $category !== self::SCOPING) {
3318 $clear = array(
'html',
'table');
3324 preg_match(
'/^[\t\n\x0b\x0c ]+$/',
$token[
'data'])
3327 $text = $this->dom->createTextNode(
$token[
'data']);
3328 end($this->stack)->appendChild(
$text);
3335 end($this->stack)->appendChild(
$comment);
3339 $token[
'name'] ===
'caption' 3342 $this->clearStackToTableContext($clear);
3346 $this->a_formatting[] = self::MARKER;
3350 $this->insertElement(
$token);
3351 $this->mode = self::IN_CAPTION;
3355 $token[
'name'] ===
'colgroup' 3358 $this->clearStackToTableContext($clear);
3362 $this->insertElement(
$token);
3363 $this->mode = self::IN_CGROUP;
3371 'name' =>
'colgroup',
3377 $this->inColumnGroup(
$token);
3382 array(
'tbody',
'tfoot',
'thead')
3386 $this->clearStackToTableContext($clear);
3390 $this->insertElement(
$token);
3391 $this->mode = self::IN_TBODY;
3395 in_array(
$token[
'name'], array(
'td',
'th',
'tr'))
3407 return $this->inTableBody(
$token);
3411 $token[
'name'] ===
'table' 3423 return $this->mainPhase(
$token);
3427 $token[
'name'] ===
'table' 3432 if (!$this->elementInScope(
$token[
'name'],
true)) {
3438 $this->generateImpliedEndTags();
3447 $current = end($this->stack)->nodeName;
3448 array_pop($this->stack);
3456 $this->resetInsertionMode();
3489 end($this->stack)->nodeName,
3490 array(
'table',
'tbody',
'tfoot',
'thead',
'tr')
3504 for (
$n = count($this->stack) - 1;
$n >= 0;
$n--) {
3505 if ($this->stack[
$n]->nodeName ===
'table') {
3512 $this->foster_parent =
$table->parentNode;
3514 } elseif (!isset(
$table)) {
3515 $this->foster_parent = $this->stack[0];
3517 } elseif (isset(
$table) && (
$table->parentNode === null ||
3518 $table->parentNode->nodeType !== XML_ELEMENT_NODE)
3520 $this->foster_parent = $this->stack[
$n - 1];
3535 if (!$this->elementInScope(
$token[
'name'],
true)) {
3541 $this->generateImpliedEndTags();
3550 $node = end($this->stack)->nodeName;
3551 array_pop($this->stack);
3553 if ($node ===
'caption') {
3560 $this->clearTheActiveFormattingElementsUpToTheLastMarker();
3563 $this->mode = self::IN_TABLE;
3583 $token[
'name'] ===
'table')
3590 'name' =>
'caption',
3595 return $this->inTable(
$token);
3629 preg_match(
'/^[\t\n\x0b\x0c ]+$/',
$token[
'data'])
3632 $text = $this->dom->createTextNode(
$token[
'data']);
3633 end($this->stack)->appendChild(
$text);
3640 end($this->stack)->appendChild(
$comment);
3646 $this->insertElement(
$token);
3647 array_pop($this->stack);
3651 $token[
'name'] ===
'colgroup' 3655 if (end($this->stack)->nodeName ===
'html') {
3662 array_pop($this->stack);
3663 $this->mode = self::IN_TABLE;
3674 $this->inColumnGroup(
3676 'name' =>
'colgroup',
3681 return $this->inTable(
$token);
3687 $clear = array(
'tbody',
'tfoot',
'thead',
'html');
3692 $this->clearStackToTableContext($clear);
3696 $this->insertElement(
$token);
3697 $this->mode = self::IN_ROW;
3713 return $this->inRow(
$token);
3717 in_array(
$token[
'name'], array(
'tbody',
'tfoot',
'thead'))
3722 if (!$this->elementInScope(
$token[
'name'],
true)) {
3728 $this->clearStackToTableContext($clear);
3732 array_pop($this->stack);
3733 $this->mode = self::IN_TABLE;
3740 array(
'caption',
'col',
'colgroup',
'tbody',
'tfoor',
'thead')
3747 if (!$this->elementInScope(array(
'tbody',
'thead',
'tfoot'),
true)) {
3753 $this->clearStackToTableContext($clear);
3760 'name' => end($this->stack)->nodeName,
3765 return $this->mainPhase(
$token);
3772 array(
'body',
'caption',
'col',
'colgroup',
'html',
'td',
'th',
'tr')
3786 $clear = array(
'tr',
'html');
3793 $this->clearStackToTableContext($clear);
3797 $this->insertElement(
$token);
3798 $this->mode = self::IN_CELL;
3802 $this->a_formatting[] = self::MARKER;
3809 if (!$this->elementInScope(
$token[
'name'],
true)) {
3815 $this->clearStackToTableContext($clear);
3820 array_pop($this->stack);
3821 $this->mode = self::IN_TBODY;
3828 array(
'caption',
'col',
'colgroup',
'tbody',
'tfoot',
'thead',
'tr')
3840 return $this->inCell(
$token);
3844 in_array(
$token[
'name'], array(
'tbody',
'tfoot',
'thead'))
3849 if (!$this->elementInScope(
$token[
'name'],
true)) {
3863 return $this->inCell(
$token);
3870 array(
'body',
'caption',
'col',
'colgroup',
'html',
'td',
'th',
'tr')
3891 if (!$this->elementInScope(
$token[
'name'],
true)) {
3898 $this->generateImpliedEndTags(array(
$token[
'name']));
3907 $node = end($this->stack)->nodeName;
3908 array_pop($this->stack);
3910 if ($node ===
$token[
'name']) {
3917 $this->clearTheActiveFormattingElementsUpToTheLastMarker();
3921 $this->mode = self::IN_ROW;
3944 if (!$this->elementInScope(array(
'td',
'th'),
true)) {
3951 return $this->inRow(
$token);
3974 if (!$this->elementInScope(array(
'td',
'th'),
true)) {
3981 return $this->inRow(
$token);
3988 array(
'body',
'caption',
'col',
'colgroup',
'html')
3997 array(
'table',
'tbody',
'tfoot',
'thead',
'tr')
4004 if (!$this->elementInScope(
$token[
'name'],
true)) {
4011 return $this->inRow(
$token);
4028 $this->insertText(
$token[
'data']);
4034 $this->insertComment(
$token[
'data']);
4038 $token[
'name'] ===
'option' 4042 if (end($this->stack)->nodeName ===
'option') {
4052 $this->insertElement(
$token);
4056 $token[
'name'] ===
'optgroup' 4060 if (end($this->stack)->nodeName ===
'option') {
4071 if (end($this->stack)->nodeName ===
'optgroup') {
4074 'name' =>
'optgroup',
4081 $this->insertElement(
$token);
4085 $token[
'name'] ===
'optgroup' 4091 $elements_in_stack = count($this->stack);
4093 if ($this->stack[$elements_in_stack - 1]->nodeName ===
'option' &&
4094 $this->stack[$elements_in_stack - 2]->nodeName ===
'optgroup' 4107 if ($this->stack[$elements_in_stack - 1] ===
'optgroup') {
4108 array_pop($this->stack);
4113 $token[
'name'] ===
'option' 4118 if (end($this->stack)->nodeName ===
'option') {
4119 array_pop($this->stack);
4124 $token[
'name'] ===
'select' 4129 if (!$this->elementInScope(
$token[
'name'],
true)) {
4137 $current = end($this->stack)->nodeName;
4138 array_pop($this->stack);
4146 $this->resetInsertionMode();
4150 } elseif (
$token[
'name'] ===
'select' &&
4185 if ($this->elementInScope(
$token[
'name'],
true)) {
4193 $this->mainPhase(
$token);
4210 preg_match(
'/^[\t\n\x0b\x0c ]+$/',
$token[
'data'])
4222 $this->stack[0]->appendChild(
$comment);
4232 $this->phase = self::END_PHASE;
4238 $this->mode = self::IN_BODY;
4239 return $this->inBody(
$token);
4251 preg_match(
'/^[\t\n\x0b\x0c ]+$/',
$token[
'data'])
4254 $this->insertText(
$token[
'data']);
4260 $this->insertComment(
$token[
'data']);
4263 } elseif (
$token[
'name'] ===
'frameset' &&
4266 $this->insertElement(
$token);
4269 } elseif (
$token[
'name'] ===
'frameset' &&
4274 if (end($this->stack)->nodeName ===
'html') {
4280 array_pop($this->stack);
4286 $this->mode = self::AFTR_FRAME;
4290 } elseif (
$token[
'name'] ===
'frame' &&
4294 $this->insertElement(
$token);
4297 array_pop($this->stack);
4300 } elseif (
$token[
'name'] ===
'noframes' &&
4320 preg_match(
'/^[\t\n\x0b\x0c ]+$/',
$token[
'data'])
4323 $this->insertText(
$token[
'data']);
4329 $this->insertComment(
$token[
'data']);
4332 } elseif (
$token[
'name'] ===
'html' &&
4336 $this->phase = self::END_PHASE;
4339 } elseif (
$token[
'name'] ===
'noframes' &&
4371 preg_match(
'/^[\t\n\x0b\x0c ]+$/',
$token[
'data'])
4374 $this->mainPhase(
$token);
4380 preg_match(
'/^[\t\n\x0b\x0c ]+$/',
$token[
'data'])) ||
4385 $this->phase = self::MAIN_PHASE;
4386 return $this->mainPhase(
$token);
4400 $token[
'name'] = preg_replace(
'/[^a-z0-9-]/i',
'',
$token[
'name']);
4404 if (
$token[
'name'] ===
'') {
4409 $el = $this->dom->createElement(
$token[
'name']);
4411 foreach (
$token[
'attr'] as $attr) {
4412 if (!$el->hasAttribute($attr[
'name'])) {
4413 $el->setAttribute($attr[
'name'], $attr[
'value']);
4417 $this->appendToRealParent($el);
4418 $this->stack[] = $el;
4426 $this->appendToRealParent(
$text);
4432 $this->appendToRealParent(
$comment);
4437 if ($this->foster_parent === null) {
4438 end($this->stack)->appendChild($node);
4440 } elseif ($this->foster_parent !== null) {
4447 for (
$n = count($this->stack) - 1;
$n >= 0;
$n--) {
4448 if ($this->stack[
$n]->nodeName ===
'table' &&
4449 $this->stack[
$n]->parentNode !== null
4456 if (isset(
$table) && $this->foster_parent->isSameNode(
$table->parentNode)) {
4457 $this->foster_parent->insertBefore($node,
$table);
4459 $this->foster_parent->appendChild($node);
4462 $this->foster_parent = null;
4468 if (is_array($el)) {
4469 foreach ($el as $element) {
4470 if ($this->elementInScope($element,
$table)) {
4478 $leng = count($this->stack);
4480 for (
$n = 0;
$n < $leng;
$n++) {
4483 $node = $this->stack[$leng - 1 -
$n];
4485 if ($node->tagName === $el) {
4489 } elseif ($node->tagName ===
'table') {
4494 } elseif (
$table ===
true && in_array(
4511 } elseif ($node === $node->ownerDocument->documentElement) {
4530 $formatting_elements = count($this->a_formatting);
4532 if ($formatting_elements === 0) {
4538 $entry = end($this->a_formatting);
4544 if ($entry === self::MARKER || in_array($entry, $this->stack,
true)) {
4548 for ($a = $formatting_elements - 1; $a >= 0;
true) {
4552 $step_seven =
false;
4559 $entry = $this->a_formatting[$a];
4563 if ($entry === self::MARKER || in_array($entry, $this->stack,
true)) {
4571 if (isset($step_seven) && $step_seven ===
true) {
4573 $entry = $this->a_formatting[$a];
4577 $clone = $entry->cloneNode();
4581 end($this->stack)->appendChild($clone);
4582 $this->stack[] = $clone;
4586 $this->a_formatting[$a] = $clone;
4590 if (end($this->a_formatting) !== $clone) {
4607 $entry = end($this->a_formatting);
4610 array_pop($this->a_formatting);
4614 if ($entry === self::MARKER) {
4627 $node = end($this->stack);
4628 $elements = array_diff(array(
'dd',
'dt',
'li',
'p',
'td',
'th',
'tr'),
$exclude);
4630 while (in_array(end($this->stack)->nodeName, $elements)) {
4631 array_pop($this->stack);
4637 $name = $node->tagName;
4638 if (in_array(
$name, $this->special)) {
4639 return self::SPECIAL;
4640 } elseif (in_array(
$name, $this->scoping)) {
4641 return self::SCOPING;
4642 } elseif (in_array(
$name, $this->formatting)) {
4643 return self::FORMATTING;
4645 return self::PHRASING;
4657 $node = end($this->stack)->nodeName;
4659 if (in_array($node, $elements)) {
4662 array_pop($this->stack);
4671 $leng = count($this->stack);
4673 for (
$n = $leng - 1;
$n >= 0;
$n--) {
4675 $node = $this->stack[
$n];
4681 if ($this->stack[0]->isSameNode($node)) {
4687 if ($node->nodeName ===
'select') {
4688 $this->mode = self::IN_SELECT;
4693 } elseif ($node->nodeName ===
'td' || $node->nodeName ===
'th') {
4694 $this->mode = self::IN_CELL;
4699 } elseif ($node->nodeName ===
'tr') {
4700 $this->mode = self::IN_ROW;
4705 } elseif (in_array($node->nodeName, array(
'tbody',
'thead',
'tfoot'))) {
4706 $this->mode = self::IN_TBODY;
4711 } elseif ($node->nodeName ===
'caption') {
4712 $this->mode = self::IN_CAPTION;
4717 } elseif ($node->nodeName ===
'colgroup') {
4718 $this->mode = self::IN_CGROUP;
4723 } elseif ($node->nodeName ===
'table') {
4724 $this->mode = self::IN_TABLE;
4730 } elseif ($node->nodeName ===
'head') {
4731 $this->mode = self::IN_BODY;
4736 } elseif ($node->nodeName ===
'body') {
4737 $this->mode = self::IN_BODY;
4742 } elseif ($node->nodeName ===
'frameset') {
4743 $this->mode = self::IN_FRAME;
4750 } elseif ($node->nodeName ===
'html') {
4751 $this->mode = ($this->head_pointer === null)
4760 $this->mode = self::IN_BODY;
4770 foreach (array(
'td',
'th') as $cell) {
4771 if ($this->elementInScope($cell,
true)) {
attributeValueUnquotedState()
attributeValueSingleQuotedState()
getElementCategory($node)
tokenizeDOM($node, &$tokens, $config)
Iterative function that tokenizes a node, putting it into an accumulator.
Experimental HTML5-based parser using Jeroen van der Meer's PH5P library.
wrapHTML($html, $config, $context, $use_div=true)
Wraps an HTML fragment in the necessary HTML.
if(!array_key_exists('StateId', $_REQUEST)) $id
clearTheActiveFormattingElementsUpToTheLastMarker()
characters($char_class, $start)
markupDeclarationOpenState()
Parser that uses PHP 5's DOM extension (part of the core).
tokenizeHTML($html, $config, $context)
beforeAttributeValueState()
Our in-house implementation of a parser.
generateImpliedEndTags($exclude=array())
entityInAttributeValueState()
clearStackToTableContext($elements)
elementInScope($el, $table=false)
normalize($html, $config, $context)
Takes a piece of HTML and normalizes it by converting entities, fixing encoding, extracting bits...
insertElement($token, $append=true, $check=false)
appendToRealParent($node)
afterAttributeNameState()
attributeValueDoubleQuotedState()
if(empty($password)) $table
beforeAttributeNameState()
reconstructActiveFormattingElements()
const EOF
How fgetc() reports an End Of File.