16 $new_html = $this->
normalize($html, $config, $context);
17 $new_html = $this->
wrapHTML($new_html, $config, $context);
19 $parser =
new HTML5($new_html);
20 $doc = $parser->save();
21 }
catch (DOMException $e) {
24 $context->register(
'PH5PError', $e);
25 return $lexer->tokenizeHTML($html, $config, $context);
29 $doc->getElementsByTagName(
'html')->item(0)->
30 getElementsByTagName(
'body')->item(0)->
31 getElementsByTagName(
'div')->item(0)
71 private $escape =
false;
72 private $entities = array(
'AElig;',
'AElig',
'AMP;',
'AMP',
'Aacute;',
'Aacute',
73 'Acirc;',
'Acirc',
'Agrave;',
'Agrave',
'Alpha;',
'Aring;',
'Aring',
'Atilde;',
74 'Atilde',
'Auml;',
'Auml',
'Beta;',
'COPY;',
'COPY',
'Ccedil;',
'Ccedil',
'Chi;',
75 'Dagger;',
'Delta;',
'ETH;',
'ETH',
'Eacute;',
'Eacute',
'Ecirc;',
'Ecirc',
'Egrave;',
76 'Egrave',
'Epsilon;',
'Eta;',
'Euml;',
'Euml',
'GT;',
'GT',
'Gamma;',
'Iacute;',
77 'Iacute',
'Icirc;',
'Icirc',
'Igrave;',
'Igrave',
'Iota;',
'Iuml;',
'Iuml',
'Kappa;',
78 'LT;',
'LT',
'Lambda;',
'Mu;',
'Ntilde;',
'Ntilde',
'Nu;',
'OElig;',
'Oacute;',
79 'Oacute',
'Ocirc;',
'Ocirc',
'Ograve;',
'Ograve',
'Omega;',
'Omicron;',
'Oslash;',
80 'Oslash',
'Otilde;',
'Otilde',
'Ouml;',
'Ouml',
'Phi;',
'Pi;',
'Prime;',
'Psi;',
81 'QUOT;',
'QUOT',
'REG;',
'REG',
'Rho;',
'Scaron;',
'Sigma;',
'THORN;',
'THORN',
82 'TRADE;',
'Tau;',
'Theta;',
'Uacute;',
'Uacute',
'Ucirc;',
'Ucirc',
'Ugrave;',
83 'Ugrave',
'Upsilon;',
'Uuml;',
'Uuml',
'Xi;',
'Yacute;',
'Yacute',
'Yuml;',
'Zeta;',
84 'aacute;',
'aacute',
'acirc;',
'acirc',
'acute;',
'acute',
'aelig;',
'aelig',
85 'agrave;',
'agrave',
'alefsym;',
'alpha;',
'amp;',
'amp',
'and;',
'ang;',
'apos;',
86 'aring;',
'aring',
'asymp;',
'atilde;',
'atilde',
'auml;',
'auml',
'bdquo;',
'beta;',
87 'brvbar;',
'brvbar',
'bull;',
'cap;',
'ccedil;',
'ccedil',
'cedil;',
'cedil',
88 'cent;',
'cent',
'chi;',
'circ;',
'clubs;',
'cong;',
'copy;',
'copy',
'crarr;',
89 'cup;',
'curren;',
'curren',
'dArr;',
'dagger;',
'darr;',
'deg;',
'deg',
'delta;',
90 'diams;',
'divide;',
'divide',
'eacute;',
'eacute',
'ecirc;',
'ecirc',
'egrave;',
91 'egrave',
'empty;',
'emsp;',
'ensp;',
'epsilon;',
'equiv;',
'eta;',
'eth;',
'eth',
92 'euml;',
'euml',
'euro;',
'exist;',
'fnof;',
'forall;',
'frac12;',
'frac12',
93 'frac14;',
'frac14',
'frac34;',
'frac34',
'frasl;',
'gamma;',
'ge;',
'gt;',
'gt',
94 'hArr;',
'harr;',
'hearts;',
'hellip;',
'iacute;',
'iacute',
'icirc;',
'icirc',
95 'iexcl;',
'iexcl',
'igrave;',
'igrave',
'image;',
'infin;',
'int;',
'iota;',
96 'iquest;',
'iquest',
'isin;',
'iuml;',
'iuml',
'kappa;',
'lArr;',
'lambda;',
'lang;',
97 'laquo;',
'laquo',
'larr;',
'lceil;',
'ldquo;',
'le;',
'lfloor;',
'lowast;',
'loz;',
98 'lrm;',
'lsaquo;',
'lsquo;',
'lt;',
'lt',
'macr;',
'macr',
'mdash;',
'micro;',
'micro',
99 'middot;',
'middot',
'minus;',
'mu;',
'nabla;',
'nbsp;',
'nbsp',
'ndash;',
'ne;',
100 'ni;',
'not;',
'not',
'notin;',
'nsub;',
'ntilde;',
'ntilde',
'nu;',
'oacute;',
101 'oacute',
'ocirc;',
'ocirc',
'oelig;',
'ograve;',
'ograve',
'oline;',
'omega;',
102 'omicron;',
'oplus;',
'or;',
'ordf;',
'ordf',
'ordm;',
'ordm',
'oslash;',
'oslash',
103 'otilde;',
'otilde',
'otimes;',
'ouml;',
'ouml',
'para;',
'para',
'part;',
'permil;',
104 'perp;',
'phi;',
'pi;',
'piv;',
'plusmn;',
'plusmn',
'pound;',
'pound',
'prime;',
105 'prod;',
'prop;',
'psi;',
'quot;',
'quot',
'rArr;',
'radic;',
'rang;',
'raquo;',
106 'raquo',
'rarr;',
'rceil;',
'rdquo;',
'real;',
'reg;',
'reg',
'rfloor;',
'rho;',
107 'rlm;',
'rsaquo;',
'rsquo;',
'sbquo;',
'scaron;',
'sdot;',
'sect;',
'sect',
'shy;',
108 'shy',
'sigma;',
'sigmaf;',
'sim;',
'spades;',
'sub;',
'sube;',
'sum;',
'sup1;',
109 'sup1',
'sup2;',
'sup2',
'sup3;',
'sup3',
'sup;',
'supe;',
'szlig;',
'szlig',
'tau;',
110 'there4;',
'theta;',
'thetasym;',
'thinsp;',
'thorn;',
'thorn',
'tilde;',
'times;',
111 'times',
'trade;',
'uArr;',
'uacute;',
'uacute',
'uarr;',
'ucirc;',
'ucirc',
112 'ugrave;',
'ugrave',
'uml;',
'uml',
'upsih;',
'upsilon;',
'uuml;',
'uuml',
'weierp;',
113 'xi;',
'yacute;',
'yacute',
'yen;',
'yen',
'yuml;',
'yuml',
'zeta;',
'zwj;',
'zwnj;');
133 $this->content_model = self::PCDATA;
135 $this->state =
'data';
137 while($this->state !== null) {
138 $this->{$this->state.
'State'}();
143 return $this->tree->save();
147 return ($this->char < $this->
EOF)
148 ? $this->data[$this->char]
153 if($s + $l < $this->
EOF) {
155 return $this->data[$s];
157 return substr($this->data, $s, $l);
163 return preg_replace(
'#^(['.$char_class.
']+).*#s',
'\\1', substr($this->data, $start));
169 $char = $this->char();
171 if($char ===
'&' && ($this->content_model === self::PCDATA || $this->content_model === self::RCDATA)) {
176 $this->state =
'entityData';
178 } elseif($char ===
'-') {
185 if(($this->content_model === self::RCDATA || $this->content_model ===
186 self::CDATA) && $this->escape ===
false &&
187 $this->
char >= 3 && $this->character($this->
char - 4, 4) ===
'<!--') {
188 $this->escape =
true;
193 $this->emitToken(array(
194 'type' => self::CHARACTR,
199 } elseif($char ===
'<' && ($this->content_model === self::PCDATA ||
200 (($this->content_model === self::RCDATA ||
201 $this->content_model === self::CDATA) && $this->escape ===
false))) {
210 $this->state =
'tagOpen';
213 } elseif($char ===
'>') {
219 if(($this->content_model === self::RCDATA ||
220 $this->content_model === self::CDATA) && $this->escape ===
true &&
221 $this->character($this->
char, 3) ===
'-->') {
222 $this->escape =
false;
227 $this->emitToken(array(
228 'type' => self::CHARACTR,
232 } elseif($this->
char === $this->
EOF) {
237 } elseif($this->content_model === self::PLAINTEXT) {
241 $this->emitToken(array(
242 'type' => self::CHARACTR,
243 'data' => substr($this->data, $this->
char)
253 $len = strcspn($this->data,
'<&', $this->
char);
254 $char = substr($this->data, $this->
char, $len);
255 $this->
char += $len - 1;
257 $this->emitToken(array(
258 'type' => self::CHARACTR,
262 $this->state =
'data';
268 $entity = $this->entity();
272 $char = (!$entity) ?
'&' : $entity;
273 $this->emitToken(array(
274 'type' => self::CHARACTR,
279 $this->state =
'data';
283 switch($this->content_model) {
291 if($this->character($this->
char + 1) ===
'/') {
293 $this->state =
'closeTagOpen';
296 $this->emitToken(array(
297 'type' => self::CHARACTR,
301 $this->state =
'data';
309 $char = $this->char();
314 $this->state =
'markupDeclarationOpen';
316 } elseif($char ===
'/') {
319 $this->state =
'closeTagOpen';
321 } elseif(preg_match(
'/^[A-Za-z]$/', $char)) {
327 $this->token = array(
328 'name' => strtolower($char),
329 'type' => self::STARTTAG,
333 $this->state =
'tagName';
335 } elseif($char ===
'>') {
339 $this->emitToken(array(
340 'type' => self::CHARACTR,
344 $this->state =
'data';
346 } elseif($char ===
'?') {
349 $this->state =
'bogusComment';
355 $this->emitToken(array(
356 'type' => self::CHARACTR,
361 $this->state =
'data';
368 $next_node = strtolower($this->characters(
'A-Za-z', $this->
char + 1));
369 $the_same = count($this->tree->stack) > 0 && $next_node === end($this->tree->stack)->nodeName;
371 if(($this->content_model === self::RCDATA || $this->content_model === self::CDATA) &&
372 (!$the_same || ($the_same && (!preg_match(
'/[\t\n\x0b\x0c >\/]/',
373 $this->character($this->
char + 1 + strlen($next_node))) || $this->
EOF === $this->
char)))) {
389 $this->emitToken(array(
390 'type' => self::CHARACTR,
394 $this->state =
'data';
401 $char = $this->char();
403 if(preg_match(
'/^[A-Za-z]$/', $char)) {
409 $this->token = array(
410 'name' => strtolower($char),
411 'type' => self::ENDTAG
414 $this->state =
'tagName';
416 } elseif($char ===
'>') {
419 $this->state =
'data';
421 } elseif($this->
char === $this->
EOF) {
425 $this->emitToken(array(
426 'type' => self::CHARACTR,
431 $this->state =
'data';
435 $this->state =
'bogusComment';
443 $char = $this->character($this->
char);
445 if(preg_match(
'/^[\t\n\x0b\x0c ]$/', $char)) {
452 $this->state =
'beforeAttributeName';
454 } elseif($char ===
'>') {
457 $this->emitToken($this->token);
458 $this->state =
'data';
460 } elseif($this->
char === $this->
EOF) {
464 $this->emitToken($this->token);
467 $this->state =
'data';
469 } elseif($char ===
'/') {
473 $this->state =
'beforeAttributeName';
479 $this->token[
'name'] .= strtolower($char);
480 $this->state =
'tagName';
487 $char = $this->character($this->
char);
489 if(preg_match(
'/^[\t\n\x0b\x0c ]$/', $char)) {
496 $this->state =
'beforeAttributeName';
498 } elseif($char ===
'>') {
501 $this->emitToken($this->token);
502 $this->state =
'data';
504 } elseif($char ===
'/') {
508 $this->state =
'beforeAttributeName';
510 } elseif($this->
char === $this->
EOF) {
514 $this->emitToken($this->token);
517 $this->state =
'data';
524 $this->token[
'attr'][] = array(
525 'name' => strtolower($char),
529 $this->state =
'attributeName';
536 $char = $this->character($this->
char);
538 if(preg_match(
'/^[\t\n\x0b\x0c ]$/', $char)) {
545 $this->state =
'afterAttributeName';
547 } elseif($char ===
'=') {
550 $this->state =
'beforeAttributeValue';
552 } elseif($char ===
'>') {
555 $this->emitToken($this->token);
556 $this->state =
'data';
558 } elseif($char ===
'/' && $this->character($this->
char + 1) !==
'>') {
562 $this->state =
'beforeAttributeName';
564 } elseif($this->
char === $this->
EOF) {
568 $this->emitToken($this->token);
571 $this->state =
'data';
577 $last = count($this->token[
'attr']) - 1;
578 $this->token[
'attr'][$last][
'name'] .= strtolower($char);
580 $this->state =
'attributeName';
587 $char = $this->character($this->
char);
589 if(preg_match(
'/^[\t\n\x0b\x0c ]$/', $char)) {
596 $this->state =
'afterAttributeName';
598 } elseif($char ===
'=') {
601 $this->state =
'beforeAttributeValue';
603 } elseif($char ===
'>') {
606 $this->emitToken($this->token);
607 $this->state =
'data';
609 } elseif($char ===
'/' && $this->character($this->
char + 1) !==
'>') {
613 $this->state =
'beforeAttributeName';
615 } elseif($this->
char === $this->
EOF) {
619 $this->emitToken($this->token);
622 $this->state =
'data';
629 $this->token[
'attr'][] = array(
630 'name' => strtolower($char),
634 $this->state =
'attributeName';
641 $char = $this->character($this->
char);
643 if(preg_match(
'/^[\t\n\x0b\x0c ]$/', $char)) {
650 $this->state =
'beforeAttributeValue';
652 } elseif($char ===
'"') {
655 $this->state =
'attributeValueDoubleQuoted';
657 } elseif($char ===
'&') {
662 $this->state =
'attributeValueUnquoted';
664 } elseif($char ===
'\'') {
667 $this->state =
'attributeValueSingleQuoted';
669 } elseif($char ===
'>') {
672 $this->emitToken($this->token);
673 $this->state =
'data';
679 $last = count($this->token[
'attr']) - 1;
680 $this->token[
'attr'][$last][
'value'] .= $char;
682 $this->state =
'attributeValueUnquoted';
689 $char = $this->character($this->
char);
694 $this->state =
'beforeAttributeName';
696 } elseif($char ===
'&') {
699 $this->entityInAttributeValueState(
'double');
701 } elseif($this->
char === $this->
EOF) {
705 $this->emitToken($this->token);
708 $this->state =
'data';
714 $last = count($this->token[
'attr']) - 1;
715 $this->token[
'attr'][$last][
'value'] .= $char;
717 $this->state =
'attributeValueDoubleQuoted';
724 $char = $this->character($this->
char);
729 $this->state =
'beforeAttributeName';
731 } elseif($char ===
'&') {
734 $this->entityInAttributeValueState(
'single');
736 } elseif($this->
char === $this->
EOF) {
740 $this->emitToken($this->token);
743 $this->state =
'data';
749 $last = count($this->token[
'attr']) - 1;
750 $this->token[
'attr'][$last][
'value'] .= $char;
752 $this->state =
'attributeValueSingleQuoted';
759 $char = $this->character($this->
char);
761 if(preg_match(
'/^[\t\n\x0b\x0c ]$/', $char)) {
768 $this->state =
'beforeAttributeName';
770 } elseif($char ===
'&') {
773 $this->entityInAttributeValueState();
775 } elseif($char ===
'>') {
778 $this->emitToken($this->token);
779 $this->state =
'data';
785 $last = count($this->token[
'attr']) - 1;
786 $this->token[
'attr'][$last][
'value'] .= $char;
788 $this->state =
'attributeValueUnquoted';
794 $entity = $this->entity();
803 $last = count($this->token[
'attr']) - 1;
804 $this->token[
'attr'][$last][
'value'] .= $char;
816 $data = $this->characters(
'^>', $this->
char);
817 $this->emitToken(array(
819 'type' => self::COMMENT
822 $this->
char += strlen(
$data);
825 $this->state =
'data';
828 if($this->
char === $this->
EOF) {
829 $this->
char = $this->
EOF - 1;
837 if($this->character($this->
char + 1, 2) ===
'--') {
839 $this->state =
'comment';
840 $this->token = array(
842 'type' => self::COMMENT
848 } elseif(strtolower($this->character($this->
char + 1, 7)) ===
'doctype') {
850 $this->state =
'doctype';
857 $this->state =
'bogusComment';
864 $char = $this->char();
869 $this->state =
'commentDash';
872 } elseif($this->
char === $this->
EOF) {
875 $this->emitToken($this->token);
877 $this->state =
'data';
883 $this->token[
'data'] .= $char;
890 $char = $this->char();
895 $this->state =
'commentEnd';
898 } elseif($this->
char === $this->
EOF) {
901 $this->emitToken($this->token);
903 $this->state =
'data';
909 $this->token[
'data'] .=
'-'.$char;
910 $this->state =
'comment';
917 $char = $this->char();
920 $this->emitToken($this->token);
921 $this->state =
'data';
923 } elseif($char ===
'-') {
924 $this->token[
'data'] .=
'-';
926 } elseif($this->
char === $this->
EOF) {
927 $this->emitToken($this->token);
929 $this->state =
'data';
932 $this->token[
'data'] .=
'--'.$char;
933 $this->state =
'comment';
940 $char = $this->char();
942 if(preg_match(
'/^[\t\n\x0b\x0c ]$/', $char)) {
943 $this->state =
'beforeDoctypeName';
947 $this->state =
'beforeDoctypeName';
954 $char = $this->char();
956 if(preg_match(
'/^[\t\n\x0b\x0c ]$/', $char)) {
959 } elseif(preg_match(
'/^[a-z]$/', $char)) {
960 $this->token = array(
961 'name' => strtoupper($char),
962 'type' => self::DOCTYPE,
966 $this->state =
'doctypeName';
968 } elseif($char ===
'>') {
969 $this->emitToken(array(
971 'type' => self::DOCTYPE,
975 $this->state =
'data';
977 } elseif($this->
char === $this->
EOF) {
978 $this->emitToken(array(
980 'type' => self::DOCTYPE,
985 $this->state =
'data';
988 $this->token = array(
990 'type' => self::DOCTYPE,
994 $this->state =
'doctypeName';
1001 $char = $this->char();
1003 if(preg_match(
'/^[\t\n\x0b\x0c ]$/', $char)) {
1004 $this->state =
'AfterDoctypeName';
1006 } elseif($char ===
'>') {
1007 $this->emitToken($this->token);
1008 $this->state =
'data';
1010 } elseif(preg_match(
'/^[a-z]$/', $char)) {
1011 $this->token[
'name'] .= strtoupper($char);
1013 } elseif($this->
char === $this->
EOF) {
1014 $this->emitToken($this->token);
1016 $this->state =
'data';
1019 $this->token[
'name'] .= $char;
1022 $this->token[
'error'] = ($this->token[
'name'] ===
'HTML')
1030 $char = $this->char();
1032 if(preg_match(
'/^[\t\n\x0b\x0c ]$/', $char)) {
1035 } elseif($char ===
'>') {
1036 $this->emitToken($this->token);
1037 $this->state =
'data';
1039 } elseif($this->
char === $this->
EOF) {
1040 $this->emitToken($this->token);
1042 $this->state =
'data';
1045 $this->token[
'error'] =
true;
1046 $this->state =
'bogusDoctype';
1053 $char = $this->char();
1056 $this->emitToken($this->token);
1057 $this->state =
'data';
1059 } elseif($this->
char === $this->
EOF) {
1060 $this->emitToken($this->token);
1062 $this->state =
'data';
1070 $start = $this->char;
1078 switch($this->character($this->
char + 1)) {
1084 switch($this->character($this->
char + 1)) {
1096 $char_class =
'0-9A-Fa-f';
1105 $char_class =
'0-9';
1112 $e_name = $this->characters($char_class, $this->
char + $char + 1);
1113 $entity = $this->character($start, $this->
char);
1114 $cond = strlen($e_name) > 0;
1124 $e_name = $this->characters(
'0-9A-Za-z;', $this->
char + 1);
1125 $len = strlen($e_name);
1127 for($c = 1; $c <= $len; $c++) {
1128 $id = substr($e_name, 0, $c);
1131 if(in_array($id, $this->entities)) {
1132 if ($e_name[$c-1] !==
';') {
1133 if ($c < $len && $e_name[$c] ==
';') {
1142 $cond = isset($entity);
1150 $this->
char = $start;
1156 return html_entity_decode(
'&'.$entity.
';', ENT_QUOTES,
'UTF-8');
1160 $emit = $this->tree->emitToken($token);
1163 $this->content_model = $emit;
1165 } elseif($token[
'type'] === self::ENDTAG) {
1166 $this->content_model = self::PCDATA;
1171 $this->state = null;
1172 $this->tree->emitToken(array(
1179 public $stack = array();
1184 private $foster_parent = null;
1185 private $a_formatting = array();
1187 private $head_pointer = null;
1188 private $form_pointer = null;
1190 private $scoping = array(
'button',
'caption',
'html',
'marquee',
'object',
'table',
'td',
'th');
1191 private $formatting = array(
'a',
'b',
'big',
'em',
'font',
'i',
'nobr',
's',
'small',
'strike',
'strong',
'tt',
'u');
1192 private $special = array(
'address',
'area',
'base',
'basefont',
'bgsound',
1193 'blockquote',
'body',
'br',
'center',
'col',
'colgroup',
'dd',
'dir',
'div',
'dl',
1194 'dt',
'embed',
'fieldset',
'form',
'frame',
'frameset',
'h1',
'h2',
'h3',
'h4',
'h5',
1195 'h6',
'head',
'hr',
'iframe',
'image',
'img',
'input',
'isindex',
'li',
'link',
1196 'listing',
'menu',
'meta',
'noembed',
'noframes',
'noscript',
'ol',
'optgroup',
1197 'option',
'p',
'param',
'plaintext',
'pre',
'script',
'select',
'spacer',
'style',
1198 'tbody',
'textarea',
'tfoot',
'thead',
'title',
'tr',
'ul',
'wbr');
1201 const INIT_PHASE = 0;
1202 const ROOT_PHASE = 1;
1203 const MAIN_PHASE = 2;
1204 const END_PHASE = 3;
1207 const BEFOR_HEAD = 0;
1209 const AFTER_HEAD = 2;
1212 const IN_CAPTION = 5;
1213 const IN_CGROUP = 6;
1217 const IN_SELECT = 10;
1218 const AFTER_BODY = 11;
1219 const IN_FRAME = 12;
1220 const AFTR_FRAME = 13;
1225 const FORMATTING = 2;
1231 $this->phase = self::INIT_PHASE;
1232 $this->mode = self::BEFOR_HEAD;
1233 $this->dom =
new DOMDocument;
1235 $this->dom->encoding =
'UTF-8';
1236 $this->dom->preserveWhiteSpace =
true;
1237 $this->dom->substituteEntities =
true;
1238 $this->dom->strictErrorChecking =
false;
1243 switch($this->phase) {
1244 case self::INIT_PHASE:
return $this->initPhase($token);
break;
1245 case self::ROOT_PHASE:
return $this->rootElementPhase($token);
break;
1246 case self::MAIN_PHASE:
return $this->mainPhase($token);
break;
1247 case self::END_PHASE :
return $this->trailingEndPhase($token);
break;
1263 if((isset($token[
'error']) && $token[
'error']) ||
1269 !preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data']))) {
1275 $this->phase = self::ROOT_PHASE;
1276 return $this->rootElementPhase($token);
1279 } elseif(isset($token[
'error']) && !$token[
'error']) {
1284 $doctype =
new DOMDocumentType(null, null,
'HTML');
1288 $this->phase = self::ROOT_PHASE;
1293 } elseif(isset($token[
'data']) && preg_match(
'/^[\t\n\x0b\x0c ]+$/',
1296 $text = $this->dom->createTextNode($token[
'data']);
1297 $this->dom->appendChild($text);
1313 $comment = $this->dom->createComment($token[
'data']);
1320 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])) {
1322 $text = $this->dom->createTextNode($token[
'data']);
1323 $this->dom->appendChild($text);
1332 !preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])) ||
1339 $html = $this->dom->createElement(
'html');
1340 $this->dom->appendChild($html);
1341 $this->stack[] = $html;
1343 $this->phase = self::MAIN_PHASE;
1344 return $this->mainPhase($token);
1356 } elseif($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'html') {
1364 foreach($token[
'attr'] as $attr) {
1365 if(!$this->stack[0]->hasAttribute($attr[
'name'])) {
1366 $this->stack[0]->setAttribute($attr[
'name'], $attr[
'value']);
1373 $this->generateImpliedEndTags();
1378 switch($this->mode) {
1379 case self::BEFOR_HEAD:
return $this->beforeHead($token);
break;
1380 case self::IN_HEAD:
return $this->inHead($token);
break;
1381 case self::AFTER_HEAD:
return $this->afterHead($token);
break;
1382 case self::IN_BODY:
return $this->inBody($token);
break;
1383 case self::IN_TABLE:
return $this->inTable($token);
break;
1384 case self::IN_CAPTION:
return $this->inCaption($token);
break;
1385 case self::IN_CGROUP:
return $this->inColumnGroup($token);
break;
1386 case self::IN_TBODY:
return $this->inTableBody($token);
break;
1387 case self::IN_ROW:
return $this->inRow($token);
break;
1388 case self::IN_CELL:
return $this->inCell($token);
break;
1389 case self::IN_SELECT:
return $this->inSelect($token);
break;
1390 case self::AFTER_BODY:
return $this->afterBody($token);
break;
1391 case self::IN_FRAME:
return $this->inFrameset($token);
break;
1392 case self::AFTR_FRAME:
return $this->afterFrameset($token);
break;
1393 case self::END_PHASE:
return $this->trailingEndPhase($token);
break;
1405 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])) {
1407 $this->insertText($token[
'data']);
1413 $this->insertComment($token[
'data']);
1416 } elseif($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'head') {
1419 $element = $this->insertElement($token);
1422 $this->head_pointer = $element;
1425 $this->mode = self::IN_HEAD;
1433 ($token[
'type'] ===
HTML5::ENDTAG && $token[
'name'] ===
'html') ||
1434 ($token[
'type'] ===
HTML5::CHARACTR && !preg_match(
'/^[\t\n\x0b\x0c ]$/',
1438 $this->beforeHead(array(
1444 return $this->inHead($token);
1463 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])) || (
1464 $token[
'type'] ===
HTML5::CHARACTR && in_array(end($this->stack)->nodeName,
1465 array(
'title',
'style',
'script')))) {
1467 $this->insertText($token[
'data']);
1473 $this->insertComment($token[
'data']);
1476 in_array($token[
'name'], array(
'title',
'style',
'script'))) {
1477 array_pop($this->stack);
1481 } elseif($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'title') {
1485 if($this->head_pointer !== null) {
1486 $element = $this->insertElement($token,
false);
1487 $this->head_pointer->appendChild($element);
1490 $element = $this->insertElement($token);
1497 } elseif($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'style') {
1501 if($this->head_pointer !== null) {
1502 $element = $this->insertElement($token,
false);
1503 $this->head_pointer->appendChild($element);
1506 $this->insertElement($token);
1513 } elseif($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'script') {
1515 $element = $this->insertElement($token,
false);
1516 $this->head_pointer->appendChild($element);
1522 } elseif($token[
'type'] ===
HTML5::STARTTAG && in_array($token[
'name'],
1523 array(
'base',
'link',
'meta'))) {
1527 if($this->head_pointer !== null) {
1528 $element = $this->insertElement($token,
false);
1529 $this->head_pointer->appendChild($element);
1530 array_pop($this->stack);
1533 $this->insertElement($token);
1537 } elseif($token[
'type'] ===
HTML5::ENDTAG && $token[
'name'] ===
'head') {
1540 if($this->head_pointer->isSameNode(end($this->stack))) {
1541 array_pop($this->stack);
1549 $this->mode = self::AFTER_HEAD;
1552 } elseif(($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'head') ||
1553 ($token[
'type'] ===
HTML5::ENDTAG && $token[
'name'] !==
'html')) {
1560 if($this->head_pointer->isSameNode(end($this->stack))) {
1561 $this->inHead(array(
1568 $this->mode = self::AFTER_HEAD;
1572 return $this->afterHead($token);
1583 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])) {
1585 $this->insertText($token[
'data']);
1591 $this->insertComment($token[
'data']);
1594 } elseif($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'body') {
1596 $this->insertElement($token);
1599 $this->mode = self::IN_BODY;
1602 } elseif($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'frameset') {
1604 $this->insertElement($token);
1607 $this->mode = self::IN_FRAME;
1611 } elseif($token[
'type'] ===
HTML5::STARTTAG && in_array($token[
'name'],
1612 array(
'base',
'link',
'meta',
'script',
'style',
'title'))) {
1615 $this->mode = self::IN_HEAD;
1616 return $this->inHead($token);
1622 $this->afterHead(array(
1628 return $this->inBody($token);
1635 switch($token[
'type']) {
1639 $this->reconstructActiveFormattingElements();
1642 $this->insertText($token[
'data']);
1649 $this->insertComment($token[
'data']);
1653 switch($token[
'name']) {
1656 case 'script':
case 'style':
1659 return $this->inHead($token);
1664 case 'base':
case 'link':
case 'meta':
case 'title':
1667 return $this->inHead($token);
1676 if(count($this->stack) === 1 || $this->stack[1]->nodeName !==
'body') {
1685 foreach($token[
'attr'] as $attr) {
1686 if(!$this->stack[1]->hasAttribute($attr[
'name'])) {
1687 $this->stack[1]->setAttribute($attr[
'name'], $attr[
'value']);
1696 case 'address':
case 'blockquote':
case 'center':
case 'dir':
1697 case 'div':
case 'dl':
case 'fieldset':
case 'listing':
1698 case 'menu':
case 'ol':
case 'p':
case 'ul':
1702 if($this->elementInScope(
'p')) {
1703 $this->emitToken(array(
1710 $this->insertElement($token);
1717 if($this->form_pointer !== null) {
1725 if($this->elementInScope(
'p')) {
1726 $this->emitToken(array(
1734 $element = $this->insertElement($token);
1735 $this->form_pointer = $element;
1740 case 'li':
case 'dd':
case 'dt':
1744 if($this->elementInScope(
'p')) {
1745 $this->emitToken(array(
1751 $stack_length = count($this->stack) - 1;
1753 for(
$n = $stack_length; 0 <=
$n;
$n--) {
1757 $node = $this->stack[
$n];
1758 $cat = $this->getElementCategory($node->tagName);
1763 if($token[
'name'] === $node->tagName || ($token[
'name'] !==
'li' 1764 && ($node->tagName ===
'dd' || $node->tagName ===
'dt'))) {
1765 for($x = $stack_length; $x >=
$n ; $x--) {
1766 array_pop($this->stack);
1775 if($cat !== self::FORMATTING && $cat !== self::PHRASING &&
1776 $node->tagName !==
'address' && $node->tagName !==
'div') {
1783 $this->insertElement($token);
1791 if($this->elementInScope(
'p')) {
1792 $this->emitToken(array(
1799 $this->insertElement($token);
1806 case 'h1':
case 'h2':
case 'h3':
case 'h4':
case 'h5':
case 'h6':
1809 if($this->elementInScope(
'p')) {
1810 $this->emitToken(array(
1821 while($this->elementInScope(array(
'h1',
'h2',
'h3',
'h4',
'h5',
'h6'))) {
1822 array_pop($this->stack);
1826 $this->insertElement($token);
1840 $leng = count($this->a_formatting);
1842 for(
$n = $leng - 1;
$n >= 0;
$n--) {
1843 if($this->a_formatting[
$n] === self::MARKER) {
1846 } elseif($this->a_formatting[
$n]->nodeName ===
'a') {
1847 $this->emitToken(array(
1856 $this->reconstructActiveFormattingElements();
1859 $el = $this->insertElement($token);
1863 $this->a_formatting[] = $el;
1868 case 'b':
case 'big':
case 'em':
case 'font':
case 'i':
1869 case 'nobr':
case 's':
case 'small':
case 'strike':
1870 case 'strong':
case 'tt':
case 'u':
1872 $this->reconstructActiveFormattingElements();
1875 $el = $this->insertElement($token);
1879 $this->a_formatting[] = $el;
1888 if($this->elementInScope(
'button')) {
1889 $this->inBody(array(
1896 $this->reconstructActiveFormattingElements();
1899 $this->insertElement($token);
1903 $this->a_formatting[] = self::MARKER;
1907 case 'marquee':
case 'object':
1909 $this->reconstructActiveFormattingElements();
1912 $this->insertElement($token);
1916 $this->a_formatting[] = self::MARKER;
1922 $this->reconstructActiveFormattingElements();
1925 $this->insertElement($token);
1935 if($this->elementInScope(
'p')) {
1936 $this->emitToken(array(
1943 $this->insertElement($token);
1946 $this->mode = self::IN_TABLE;
1951 case 'area':
case 'basefont':
case 'bgsound':
case 'br':
1952 case 'embed':
case 'img':
case 'param':
case 'spacer':
1955 $this->reconstructActiveFormattingElements();
1958 $this->insertElement($token);
1961 array_pop($this->stack);
1968 if($this->elementInScope(
'p')) {
1969 $this->emitToken(array(
1976 $this->insertElement($token);
1979 array_pop($this->stack);
1986 $token[
'name'] =
'img';
1987 return $this->inBody($token);
1993 $this->reconstructActiveFormattingElements();
1996 $element = $this->insertElement($token,
false);
2001 $this->form_pointer !== null
2002 ? $this->form_pointer->appendChild($element)
2003 : end($this->stack)->appendChild($element);
2006 array_pop($this->stack);
2016 if($this->form_pointer === null) {
2019 $this->inBody(array(
2027 $this->inBody(array(
2035 $this->inBody(array(
2043 $this->inBody(array(
2050 $this->insertText(
'This is a searchable index. '.
2051 'Insert your search keywords here: ');
2057 $attr = $token[
'attr'];
2058 $attr[] = array(
'name' =>
'name',
'value' =>
'isindex');
2060 $this->inBody(array(
2068 $this->insertText(
'This is a searchable index. '.
2069 'Insert your search keywords here: ');
2073 $this->inBody(array(
2080 $this->inBody(array(
2087 $this->inBody(array(
2094 $this->inBody(array(
2103 $this->insertElement($token);
2112 case 'iframe':
case 'noembed':
case 'noframes':
2113 $this->insertElement($token);
2122 $this->reconstructActiveFormattingElements();
2125 $this->insertElement($token);
2128 $this->mode = self::IN_SELECT;
2134 case 'caption':
case 'col':
case 'colgroup':
case 'frame':
2135 case 'frameset':
case 'head':
case 'option':
case 'optgroup':
2136 case 'tbody':
case 'td':
case 'tfoot':
case 'th':
case 'thead':
2144 case 'event-source':
case 'section':
case 'nav':
case 'article':
2145 case 'aside':
case 'header':
case 'footer':
case 'datagrid':
2153 $this->reconstructActiveFormattingElements();
2155 $this->insertElement($token,
true,
true);
2161 switch($token[
'name']) {
2167 if(count($this->stack) < 2 || $this->stack[1]->nodeName !==
'body') {
2172 } elseif(end($this->stack)->nodeName !==
'body') {
2177 $this->mode = self::AFTER_BODY;
2185 $this->inBody(array(
2190 return $this->afterBody($token);
2196 case 'address':
case 'blockquote':
case 'center':
case 'dir':
2197 case 'div':
case 'dl':
case 'fieldset':
case 'listing':
2198 case 'menu':
case 'ol':
case 'pre':
case 'ul':
2202 if($this->elementInScope($token[
'name'])) {
2203 $this->generateImpliedEndTags();
2214 for(
$n = count($this->stack) - 1;
$n >= 0;
$n--) {
2215 if($this->stack[
$n]->nodeName === $token[
'name']) {
2219 array_pop($this->stack);
2229 if($this->elementInScope($token[
'name'])) {
2230 $this->generateImpliedEndTags();
2234 if(end($this->stack)->nodeName !== $token[
'name']) {
2244 array_pop($this->stack);
2248 $this->form_pointer = null;
2255 if($this->elementInScope(
'p')) {
2256 $this->generateImpliedEndTags(array(
'p'));
2265 for(
$n = count($this->stack) - 1;
$n >= 0;
$n--) {
2266 if($this->elementInScope(
'p')) {
2267 array_pop($this->stack);
2277 case 'dd':
case 'dt':
case 'li':
2282 if($this->elementInScope($token[
'name'])) {
2283 $this->generateImpliedEndTags(array($token[
'name']));
2293 for(
$n = count($this->stack) - 1;
$n >= 0;
$n--) {
2294 if($this->stack[
$n]->nodeName === $token[
'name']) {
2298 array_pop($this->stack);
2305 case 'h1':
case 'h2':
case 'h3':
case 'h4':
case 'h5':
case 'h6':
2306 $elements = array(
'h1',
'h2',
'h3',
'h4',
'h5',
'h6');
2311 if($this->elementInScope($elements)) {
2312 $this->generateImpliedEndTags();
2322 while($this->elementInScope($elements)) {
2323 array_pop($this->stack);
2330 case 'a':
case 'b':
case 'big':
case 'em':
case 'font':
2331 case 'i':
case 'nobr':
case 's':
case 'small':
case 'strike':
2332 case 'strong':
case 'tt':
case 'u':
2341 for($a = count($this->a_formatting) - 1; $a >= 0; $a--) {
2342 if($this->a_formatting[$a] === self::MARKER) {
2345 } elseif($this->a_formatting[$a]->tagName === $token[
'name']) {
2346 $formatting_element = $this->a_formatting[$a];
2347 $in_stack = in_array($formatting_element, $this->stack,
true);
2357 if(!isset($formatting_element) || ($in_stack &&
2358 !$this->elementInScope($token[
'name']))) {
2365 } elseif(isset($formatting_element) && !$in_stack) {
2366 unset($this->a_formatting[$fe_af_pos]);
2367 $this->a_formatting = array_merge($this->a_formatting);
2376 $fe_s_pos = array_search($formatting_element, $this->stack,
true);
2377 $length = count($this->stack);
2379 for($s = $fe_s_pos + 1; $s < $length; $s++) {
2380 $category = $this->getElementCategory($this->stack[$s]->nodeName);
2382 if($category !== self::PHRASING && $category !== self::FORMATTING) {
2383 $furthest_block = $this->stack[$s];
2393 if(!isset($furthest_block)) {
2394 for(
$n = $length - 1;
$n >= $fe_s_pos;
$n--) {
2395 array_pop($this->stack);
2398 unset($this->a_formatting[$fe_af_pos]);
2399 $this->a_formatting = array_merge($this->a_formatting);
2406 $common_ancestor = $this->stack[$fe_s_pos - 1];
2410 if($furthest_block->parentNode !== null) {
2411 $furthest_block->parentNode->removeChild($furthest_block);
2418 $bookmark = $fe_af_pos;
2422 $node = $furthest_block;
2423 $last_node = $furthest_block;
2426 for(
$n = array_search($node, $this->stack,
true) - 1;
$n >= 0;
$n--) {
2429 $node = $this->stack[
$n];
2435 if(!in_array($node, $this->a_formatting,
true)) {
2436 unset($this->stack[
$n]);
2437 $this->stack = array_merge($this->stack);
2447 if($node === $formatting_element) {
2454 } elseif($last_node === $furthest_block) {
2455 $bookmark = array_search($node, $this->a_formatting,
true) + 1;
2464 if($node->hasChildNodes()) {
2465 $clone = $node->cloneNode();
2466 $s_pos = array_search($node, $this->stack,
true);
2467 $a_pos = array_search($node, $this->a_formatting,
true);
2469 $this->stack[$s_pos] = $clone;
2470 $this->a_formatting[$a_pos] = $clone;
2476 if($last_node->parentNode !== null) {
2477 $last_node->parentNode->removeChild($last_node);
2480 $node->appendChild($last_node);
2490 if($last_node->parentNode !== null) {
2491 $last_node->parentNode->removeChild($last_node);
2494 $common_ancestor->appendChild($last_node);
2498 $clone = $formatting_element->cloneNode();
2503 while($furthest_block->hasChildNodes()) {
2504 $child = $furthest_block->firstChild;
2505 $furthest_block->removeChild($child);
2506 $clone->appendChild($child);
2510 $furthest_block->appendChild($clone);
2516 $fe_af_pos = array_search($formatting_element, $this->a_formatting,
true);
2517 unset($this->a_formatting[$fe_af_pos]);
2518 $this->a_formatting = array_merge($this->a_formatting);
2520 $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1);
2521 $af_part2 = array_slice($this->a_formatting, $bookmark, count($this->a_formatting));
2522 $this->a_formatting = array_merge($af_part1, array($clone), $af_part2);
2529 $fe_s_pos = array_search($formatting_element, $this->stack,
true);
2530 $fb_s_pos = array_search($furthest_block, $this->stack,
true);
2531 unset($this->stack[$fe_s_pos]);
2533 $s_part1 = array_slice($this->stack, 0, $fb_s_pos);
2534 $s_part2 = array_slice($this->stack, $fb_s_pos + 1, count($this->stack));
2535 $this->stack = array_merge($s_part1, array($clone), $s_part2);
2538 unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block);
2544 case 'button':
case 'marquee':
case 'object':
2548 if($this->elementInScope($token[
'name'])) {
2549 $this->generateImpliedEndTags();
2560 for(
$n = count($this->stack) - 1;
$n >= 0;
$n--) {
2561 if($this->stack[
$n]->nodeName === $token[
'name']) {
2565 array_pop($this->stack);
2568 $marker = end(array_keys($this->a_formatting, self::MARKER,
true));
2570 for(
$n = count($this->a_formatting) - 1;
$n > $marker;
$n--) {
2571 array_pop($this->a_formatting);
2580 case 'area':
case 'basefont':
case 'bgsound':
case 'br':
2581 case 'embed':
case 'hr':
case 'iframe':
case 'image':
2582 case 'img':
case 'input':
case 'isindex':
case 'noembed':
2583 case 'noframes':
case 'param':
case 'select':
case 'spacer':
2584 case 'table':
case 'textarea':
case 'wbr':
2590 for(
$n = count($this->stack) - 1;
$n >= 0;
$n--) {
2593 $node = end($this->stack);
2597 if($token[
'name'] === $node->nodeName) {
2599 $this->generateImpliedEndTags();
2608 for($x = count($this->stack) -
$n; $x >=
$n; $x--) {
2609 array_pop($this->stack);
2613 $category = $this->getElementCategory($node);
2615 if($category !== self::SPECIAL && $category !== self::SCOPING) {
2631 $clear = array(
'html',
'table');
2637 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])) {
2639 $text = $this->dom->createTextNode($token[
'data']);
2640 end($this->stack)->appendChild($text);
2646 $comment = $this->dom->createComment($token[
'data']);
2647 end($this->stack)->appendChild(
$comment);
2651 $token[
'name'] ===
'caption') {
2653 $this->clearStackToTableContext($clear);
2657 $this->a_formatting[] = self::MARKER;
2661 $this->insertElement($token);
2662 $this->mode = self::IN_CAPTION;
2666 $token[
'name'] ===
'colgroup') {
2668 $this->clearStackToTableContext($clear);
2672 $this->insertElement($token);
2673 $this->mode = self::IN_CGROUP;
2677 $token[
'name'] ===
'col') {
2678 $this->inTable(array(
2679 'name' =>
'colgroup',
2684 $this->inColumnGroup($token);
2687 } elseif($token[
'type'] ===
HTML5::STARTTAG && in_array($token[
'name'],
2688 array(
'tbody',
'tfoot',
'thead'))) {
2690 $this->clearStackToTableContext($clear);
2694 $this->insertElement($token);
2695 $this->mode = self::IN_TBODY;
2699 in_array($token[
'name'], array(
'td',
'th',
'tr'))) {
2702 $this->inTable(array(
2708 return $this->inTableBody($token);
2712 $token[
'name'] ===
'table') {
2716 $this->inTable(array(
2721 return $this->mainPhase($token);
2725 $token[
'name'] ===
'table') {
2729 if(!$this->elementInScope($token[
'name'],
true)) {
2735 $this->generateImpliedEndTags();
2744 $current = end($this->stack)->nodeName;
2745 array_pop($this->stack);
2747 if($current ===
'table') {
2753 $this->resetInsertionMode();
2758 } elseif($token[
'type'] ===
HTML5::ENDTAG && in_array($token[
'name'],
2759 array(
'body',
'caption',
'col',
'colgroup',
'html',
'tbody',
'td',
2760 'tfoot',
'th',
'thead',
'tr'))) {
2771 if(in_array(end($this->stack)->nodeName,
2772 array(
'table',
'tbody',
'tfoot',
'thead',
'tr'))) {
2784 for(
$n = count($this->stack) - 1;
$n >= 0;
$n--) {
2785 if($this->stack[
$n]->nodeName ===
'table') {
2786 $table = $this->stack[
$n];
2791 if(isset($table) && $table->parentNode !== null) {
2792 $this->foster_parent = $table->parentNode;
2794 } elseif(!isset($table)) {
2795 $this->foster_parent = $this->stack[0];
2797 } elseif(isset($table) && ($table->parentNode === null ||
2798 $table->parentNode->nodeType !== XML_ELEMENT_NODE)) {
2799 $this->foster_parent = $this->stack[
$n - 1];
2803 $this->inBody($token);
2809 if($token[
'type'] ===
HTML5::ENDTAG && $token[
'name'] ===
'caption') {
2813 if(!$this->elementInScope($token[
'name'],
true)) {
2819 $this->generateImpliedEndTags();
2828 $node = end($this->stack)->nodeName;
2829 array_pop($this->stack);
2831 if($node ===
'caption') {
2838 $this->clearTheActiveFormattingElementsUpToTheLastMarker();
2841 $this->mode = self::IN_TABLE;
2847 } elseif(($token[
'type'] ===
HTML5::STARTTAG && in_array($token[
'name'],
2848 array(
'caption',
'col',
'colgroup',
'tbody',
'td',
'tfoot',
'th',
2850 $token[
'name'] ===
'table')) {
2854 $this->inCaption(array(
2855 'name' =>
'caption',
2859 return $this->inTable($token);
2863 } elseif($token[
'type'] ===
HTML5::ENDTAG && in_array($token[
'name'],
2864 array(
'body',
'col',
'colgroup',
'html',
'tbody',
'tfoot',
'th',
2871 $this->inBody($token);
2880 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])) {
2882 $text = $this->dom->createTextNode($token[
'data']);
2883 end($this->stack)->appendChild($text);
2889 $comment = $this->dom->createComment($token[
'data']);
2890 end($this->stack)->appendChild(
$comment);
2893 } elseif($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'col') {
2896 $this->insertElement($token);
2897 array_pop($this->stack);
2901 $token[
'name'] ===
'colgroup') {
2904 if(end($this->stack)->nodeName ===
'html') {
2911 array_pop($this->stack);
2912 $this->mode = self::IN_TABLE;
2916 } elseif($token[
'type'] ===
HTML5::ENDTAG && $token[
'name'] ===
'col') {
2923 $this->inColumnGroup(array(
2924 'name' =>
'colgroup',
2928 return $this->inTable($token);
2933 $clear = array(
'tbody',
'tfoot',
'thead',
'html');
2938 $this->clearStackToTableContext($clear);
2942 $this->insertElement($token);
2943 $this->mode = self::IN_ROW;
2947 ($token[
'name'] ===
'th' || $token[
'name'] ===
'td')) {
2950 $this->inTableBody(array(
2956 return $this->inRow($token);
2960 in_array($token[
'name'], array(
'tbody',
'tfoot',
'thead'))) {
2964 if(!$this->elementInScope($token[
'name'],
true)) {
2970 $this->clearStackToTableContext($clear);
2974 array_pop($this->stack);
2975 $this->mode = self::IN_TABLE;
2980 } elseif(($token[
'type'] ===
HTML5::STARTTAG && in_array($token[
'name'],
2981 array(
'caption',
'col',
'colgroup',
'tbody',
'tfoor',
'thead'))) ||
2986 if(!$this->elementInScope(array(
'tbody',
'thead',
'tfoot'),
true)) {
2992 $this->clearStackToTableContext($clear);
2997 $this->inTableBody(array(
2998 'name' => end($this->stack)->nodeName,
3002 return $this->mainPhase($token);
3007 } elseif($token[
'type'] ===
HTML5::ENDTAG && in_array($token[
'name'],
3008 array(
'body',
'caption',
'col',
'colgroup',
'html',
'td',
'th',
'tr'))) {
3014 $this->inTable($token);
3019 $clear = array(
'tr',
'html');
3023 ($token[
'name'] ===
'th' || $token[
'name'] ===
'td')) {
3025 $this->clearStackToTableContext($clear);
3029 $this->insertElement($token);
3030 $this->mode = self::IN_CELL;
3034 $this->a_formatting[] = self::MARKER;
3037 } elseif($token[
'type'] ===
HTML5::ENDTAG && $token[
'name'] ===
'tr') {
3041 if(!$this->elementInScope($token[
'name'],
true)) {
3047 $this->clearStackToTableContext($clear);
3052 array_pop($this->stack);
3053 $this->mode = self::IN_TBODY;
3058 } elseif($token[
'type'] ===
HTML5::STARTTAG && in_array($token[
'name'],
3059 array(
'caption',
'col',
'colgroup',
'tbody',
'tfoot',
'thead',
'tr'))) {
3067 return $this->inCell($token);
3071 in_array($token[
'name'], array(
'tbody',
'tfoot',
'thead'))) {
3075 if(!$this->elementInScope($token[
'name'],
true)) {
3087 return $this->inCell($token);
3092 } elseif($token[
'type'] ===
HTML5::ENDTAG && in_array($token[
'name'],
3093 array(
'body',
'caption',
'col',
'colgroup',
'html',
'td',
'th',
'tr'))) {
3099 $this->inTable($token);
3106 ($token[
'name'] ===
'td' || $token[
'name'] ===
'th')) {
3110 if(!$this->elementInScope($token[
'name'],
true)) {
3117 $this->generateImpliedEndTags(array($token[
'name']));
3126 $node = end($this->stack)->nodeName;
3127 array_pop($this->stack);
3129 if($node === $token[
'name']) {
3136 $this->clearTheActiveFormattingElementsUpToTheLastMarker();
3140 $this->mode = self::IN_ROW;
3145 } elseif($token[
'type'] ===
HTML5::STARTTAG && in_array($token[
'name'],
3146 array(
'caption',
'col',
'colgroup',
'tbody',
'td',
'tfoot',
'th',
3151 if(!$this->elementInScope(array(
'td',
'th'),
true)) {
3158 return $this->inRow($token);
3163 } elseif($token[
'type'] ===
HTML5::STARTTAG && in_array($token[
'name'],
3164 array(
'caption',
'col',
'colgroup',
'tbody',
'td',
'tfoot',
'th',
3169 if(!$this->elementInScope(array(
'td',
'th'),
true)) {
3176 return $this->inRow($token);
3181 } elseif($token[
'type'] ===
HTML5::ENDTAG && in_array($token[
'name'],
3182 array(
'body',
'caption',
'col',
'colgroup',
'html'))) {
3187 } elseif($token[
'type'] ===
HTML5::ENDTAG && in_array($token[
'name'],
3188 array(
'table',
'tbody',
'tfoot',
'thead',
'tr'))) {
3193 if(!$this->elementInScope($token[
'name'],
true)) {
3200 return $this->inRow($token);
3206 $this->inBody($token);
3216 $this->insertText($token[
'data']);
3222 $this->insertComment($token[
'data']);
3226 $token[
'name'] ===
'option') {
3229 if(end($this->stack)->nodeName ===
'option') {
3230 $this->inSelect(array(
3237 $this->insertElement($token);
3241 $token[
'name'] ===
'optgroup') {
3244 if(end($this->stack)->nodeName ===
'option') {
3245 $this->inSelect(array(
3253 if(end($this->stack)->nodeName ===
'optgroup') {
3254 $this->inSelect(array(
3255 'name' =>
'optgroup',
3261 $this->insertElement($token);
3265 $token[
'name'] ===
'optgroup') {
3270 $elements_in_stack = count($this->stack);
3272 if($this->stack[$elements_in_stack - 1]->nodeName ===
'option' &&
3273 $this->stack[$elements_in_stack - 2]->nodeName ===
'optgroup') {
3274 $this->inSelect(array(
3283 if($this->stack[$elements_in_stack - 1] ===
'optgroup') {
3284 array_pop($this->stack);
3289 $token[
'name'] ===
'option') {
3293 if(end($this->stack)->nodeName ===
'option') {
3294 array_pop($this->stack);
3299 $token[
'name'] ===
'select') {
3303 if(!$this->elementInScope($token[
'name'],
true)) {
3311 $current = end($this->stack)->nodeName;
3312 array_pop($this->stack);
3314 if($current ===
'select') {
3320 $this->resetInsertionMode();
3324 } elseif($token[
'name'] ===
'select' &&
3328 $this->inSelect(array(
3335 } elseif(in_array($token[
'name'], array(
'caption',
'table',
'tbody',
3336 'tfoot',
'thead',
'tr',
'td',
'th')) && $token[
'type'] ===
HTML5::ENDTAG) {
3344 if($this->elementInScope($token[
'name'],
true)) {
3345 $this->inSelect(array(
3350 $this->mainPhase($token);
3366 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])) {
3369 $this->inBody($token);
3376 $comment = $this->dom->createComment($token[
'data']);
3377 $this->stack[0]->appendChild(
$comment);
3380 } elseif($token[
'type'] ===
HTML5::ENDTAG && $token[
'name'] ===
'html') {
3387 $this->phase = self::END_PHASE;
3393 $this->mode = self::IN_BODY;
3394 return $this->inBody($token);
3405 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])) {
3407 $this->insertText($token[
'data']);
3413 $this->insertComment($token[
'data']);
3416 } elseif($token[
'name'] ===
'frameset' &&
3418 $this->insertElement($token);
3421 } elseif($token[
'name'] ===
'frameset' &&
3425 if(end($this->stack)->nodeName ===
'html') {
3431 array_pop($this->stack);
3437 $this->mode = self::AFTR_FRAME;
3441 } elseif($token[
'name'] ===
'frame' &&
3444 $this->insertElement($token);
3447 array_pop($this->stack);
3450 } elseif($token[
'name'] ===
'noframes' &&
3453 $this->inBody($token);
3468 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])) {
3470 $this->insertText($token[
'data']);
3476 $this->insertComment($token[
'data']);
3479 } elseif($token[
'name'] ===
'html' &&
3482 $this->phase = self::END_PHASE;
3485 } elseif($token[
'name'] ===
'noframes' &&
3488 $this->inBody($token);
3508 $comment = $this->dom->createComment($token[
'data']);
3515 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])) {
3517 $this->mainPhase($token);
3523 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])) ||
3527 $this->phase = self::MAIN_PHASE;
3528 return $this->mainPhase($token);
3541 $token[
'name'] = preg_replace(
'/[^a-z0-9-]/i',
'', $token[
'name']);
3543 $token[
'name'] = ltrim($token[
'name'],
'-0..9');
3545 if ($token[
'name'] ===
'') $token[
'name'] =
'span';
3548 $el = $this->dom->createElement($token[
'name']);
3550 foreach($token[
'attr'] as $attr) {
3551 if(!$el->hasAttribute($attr[
'name'])) {
3552 $el->setAttribute($attr[
'name'], $attr[
'value']);
3556 $this->appendToRealParent($el);
3557 $this->stack[] = $el;
3563 $text = $this->dom->createTextNode(
$data);
3564 $this->appendToRealParent($text);
3569 $this->appendToRealParent(
$comment);
3573 if($this->foster_parent === null) {
3574 end($this->stack)->appendChild($node);
3576 } elseif($this->foster_parent !== null) {
3583 for(
$n = count($this->stack) - 1;
$n >= 0;
$n--) {
3584 if($this->stack[
$n]->nodeName ===
'table' &&
3585 $this->stack[
$n]->parentNode !== null) {
3586 $table = $this->stack[
$n];
3591 if(isset($table) && $this->foster_parent->isSameNode($table->parentNode))
3592 $this->foster_parent->insertBefore($node, $table);
3594 $this->foster_parent->appendChild($node);
3596 $this->foster_parent = null;
3602 foreach($el as $element) {
3603 if($this->elementInScope($element, $table)) {
3611 $leng = count($this->stack);
3613 for(
$n = 0;
$n < $leng;
$n++) {
3616 $node = $this->stack[$leng - 1 -
$n];
3618 if($node->tagName === $el) {
3622 } elseif($node->tagName ===
'table') {
3627 } elseif($table ===
true && in_array($node->tagName, array(
'caption',
'td',
3628 'th',
'button',
'marquee',
'object'))) {
3634 } elseif($node === $node->ownerDocument->documentElement) {
3652 $formatting_elements = count($this->a_formatting);
3654 if($formatting_elements === 0) {
3660 $entry = end($this->a_formatting);
3666 if($entry === self::MARKER || in_array($entry, $this->stack,
true)) {
3670 for($a = $formatting_elements - 1; $a >= 0;
true) {
3674 $step_seven =
false;
3681 $entry = $this->a_formatting[$a];
3685 if($entry === self::MARKER || in_array($entry, $this->stack,
true)) {
3693 if(isset($step_seven) && $step_seven ===
true) {
3695 $entry = $this->a_formatting[$a];
3699 $clone = $entry->cloneNode();
3703 end($this->stack)->appendChild($clone);
3704 $this->stack[] = $clone;
3708 $this->a_formatting[$a] = $clone;
3712 if(end($this->a_formatting) !== $clone) {
3728 $entry = end($this->a_formatting);
3731 array_pop($this->a_formatting);
3735 if($entry === self::MARKER) {
3747 $node = end($this->stack);
3748 $elements = array_diff(array(
'dd',
'dt',
'li',
'p',
'td',
'th',
'tr'),
$exclude);
3750 while(in_array(end($this->stack)->nodeName, $elements)) {
3751 array_pop($this->stack);
3756 $name = $node->tagName;
3757 if(in_array($name, $this->special))
3758 return self::SPECIAL;
3760 elseif(in_array($name, $this->scoping))
3761 return self::SCOPING;
3763 elseif(in_array($name, $this->formatting))
3764 return self::FORMATTING;
3767 return self::PHRASING;
3777 $node = end($this->stack)->nodeName;
3779 if(in_array($node, $elements)) {
3782 array_pop($this->stack);
3790 $leng = count($this->stack);
3792 for(
$n = $leng - 1;
$n >= 0;
$n--) {
3794 $node = $this->stack[
$n];
3800 if($this->stack[0]->isSameNode($node)) {
3806 if($node->nodeName ===
'select') {
3807 $this->mode = self::IN_SELECT;
3812 } elseif($node->nodeName ===
'td' || $node->nodeName ===
'th') {
3813 $this->mode = self::IN_CELL;
3818 } elseif($node->nodeName ===
'tr') {
3819 $this->mode = self::IN_ROW;
3824 } elseif(in_array($node->nodeName, array(
'tbody',
'thead',
'tfoot'))) {
3825 $this->mode = self::IN_TBODY;
3830 } elseif($node->nodeName ===
'caption') {
3831 $this->mode = self::IN_CAPTION;
3836 } elseif($node->nodeName ===
'colgroup') {
3837 $this->mode = self::IN_CGROUP;
3842 } elseif($node->nodeName ===
'table') {
3843 $this->mode = self::IN_TABLE;
3849 } elseif($node->nodeName ===
'head') {
3850 $this->mode = self::IN_BODY;
3855 } elseif($node->nodeName ===
'body') {
3856 $this->mode = self::IN_BODY;
3861 } elseif($node->nodeName ===
'frameset') {
3862 $this->mode = self::IN_FRAME;
3869 } elseif($node->nodeName ===
'html') {
3870 $this->mode = ($this->head_pointer === null)
3879 $this->mode = self::IN_BODY;
3888 foreach(array(
'td',
'th') as $cell) {
3889 if($this->elementInScope($cell,
true)) {
3890 $this->inCell(array(
tokenizeDOM($node, &$tokens)
Iterative function that tokenizes a node, putting it into an accumulator.
attributeValueUnquotedState()
attributeValueSingleQuotedState()
getElementCategory($node)
wrapHTML($html, $config, $context)
Wraps an HTML fragment in the necessary HTML.
Experimental HTML5-based parser using Jeroen van der Meer's PH5P library.
clearTheActiveFormattingElementsUpToTheLastMarker()
characters($char_class, $start)
markupDeclarationOpenState()
Parser that uses PHP 5's DOM extension (part of the core).
tokenizeHTML($html, $config, $context)
beforeAttributeValueState()
Our in-house implementation of a parser.
generateImpliedEndTags($exclude=array())
entityInAttributeValueState()
clearStackToTableContext($elements)
elementInScope($el, $table=false)
normalize($html, $config, $context)
Takes a piece of HTML and normalizes it by converting entities, fixing encoding, extracting bits...
insertElement($token, $append=true, $check=false)
appendToRealParent($node)
while($lm_rec=$ilDB->fetchAssoc($lm_set)) $data
afterAttributeNameState()
attributeValueDoubleQuotedState()
beforeAttributeNameState()
reconstructActiveFormattingElements()
const EOF
How fgetc() reports an End Of File.