ILIAS  release_5-2 Revision v5.2.25-18-g3f80b828510
HTML5 Class Reference
+ Collaboration diagram for HTML5:

Public Member Functions

 __construct ($data)
 
 save ()
 
 __construct ($data)
 
 save ()
 

Data Fields

const PCDATA = 0
 
const RCDATA = 1
 
const CDATA = 2
 
const PLAINTEXT = 3
 
const DOCTYPE = 0
 
const STARTTAG = 1
 
const ENDTAG = 2
 
const COMMENT = 3
 
const CHARACTR = 4
 
const EOF = 5
 

Private Member Functions

 char ()
 
 character ($s, $l=0)
 
 characters ($char_class, $start)
 
 dataState ()
 
 entityDataState ()
 
 tagOpenState ()
 
 closeTagOpenState ()
 
 tagNameState ()
 
 beforeAttributeNameState ()
 
 attributeNameState ()
 
 afterAttributeNameState ()
 
 beforeAttributeValueState ()
 
 attributeValueDoubleQuotedState ()
 
 attributeValueSingleQuotedState ()
 
 attributeValueUnquotedState ()
 
 entityInAttributeValueState ()
 
 bogusCommentState ()
 
 markupDeclarationOpenState ()
 
 commentState ()
 
 commentDashState ()
 
 commentEndState ()
 
 doctypeState ()
 
 beforeDoctypeNameState ()
 
 doctypeNameState ()
 
 afterDoctypeNameState ()
 
 bogusDoctypeState ()
 
 entity ()
 
 emitToken ($token)
 
 EOF ()
 
 char ()
 
 character ($s, $l=0)
 
 characters ($char_class, $start)
 
 dataState ()
 
 entityDataState ()
 
 tagOpenState ()
 
 closeTagOpenState ()
 
 tagNameState ()
 
 beforeAttributeNameState ()
 
 attributeNameState ()
 
 afterAttributeNameState ()
 
 beforeAttributeValueState ()
 
 attributeValueDoubleQuotedState ()
 
 attributeValueSingleQuotedState ()
 
 attributeValueUnquotedState ()
 
 entityInAttributeValueState ()
 
 bogusCommentState ()
 
 markupDeclarationOpenState ()
 
 commentState ()
 
 commentDashState ()
 
 commentEndState ()
 
 doctypeState ()
 
 beforeDoctypeNameState ()
 
 doctypeNameState ()
 
 afterDoctypeNameState ()
 
 bogusDoctypeState ()
 
 entity ()
 
 emitToken ($token)
 
 EOF ()
 

Private Attributes

 $data
 
 $char
 
 $EOF
 
 $state
 
 $tree
 
 $token
 
 $content_model
 
 $escape = false
 
 $entities
 

Detailed Description

Definition at line 70 of file PH5P.php.

Constructor & Destructor Documentation

◆ __construct() [1/2]

HTML5::__construct (   $data)

Definition at line 461 of file PH5P.php.

462 {
463 $this->data = $data;
464 $this->char = -1;
465 $this->EOF = strlen($data);
466 $this->tree = new HTML5TreeConstructer;
467 $this->content_model = self::PCDATA;
468
469 $this->state = 'data';
470
471 while ($this->state !== null) {
472 $this->{$this->state . 'State'}();
473 }
474 }
const PCDATA
Definition: PH5P.php:449
const EOF
Definition: PH5P.php:459
$data
Definition: PH5P.php:72

References $data, EOF, and PCDATA.

◆ __construct() [2/2]

HTML5::__construct (   $data)

Definition at line 67 of file PH5P.php.

68 {
69 $data = str_replace("\r\n", "\n", $data);
70 $date = str_replace("\r", null, $data);
71
72 $this->data = $data;
73 $this->char = -1;
74 $this->EOF = strlen($data);
75 $this->tree = new HTML5TreeConstructer;
76 $this->content_model = self::PCDATA;
77
78 $this->state = 'data';
79
80 while($this->state !== null) {
81 $this->{$this->state.'State'}();
82 }
83 }

References $data, EOF, and PCDATA.

Member Function Documentation

◆ afterAttributeNameState() [1/2]

HTML5::afterAttributeNameState ( )
private

Definition at line 955 of file PH5P.php.

956 {
957 // Consume the next input character:
958 $this->char++;
959 $char = $this->character($this->char);
960
961 if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
962 /* U+0009 CHARACTER TABULATION
963 U+000A LINE FEED (LF)
964 U+000B LINE TABULATION
965 U+000C FORM FEED (FF)
966 U+0020 SPACE
967 Stay in the after attribute name state. */
968 $this->state = 'afterAttributeName';
969
970 } elseif ($char === '=') {
971 /* U+003D EQUALS SIGN (=)
972 Switch to the before attribute value state. */
973 $this->state = 'beforeAttributeValue';
974
975 } elseif ($char === '>') {
976 /* U+003E GREATER-THAN SIGN (>)
977 Emit the current tag token. Switch to the data state. */
978 $this->emitToken($this->token);
979 $this->state = 'data';
980
981 } elseif ($char === '/' && $this->character($this->char + 1) !== '>') {
982 /* U+002F SOLIDUS (/)
983 Parse error unless this is a permitted slash. Switch to the
984 before attribute name state. */
985 $this->state = 'beforeAttributeName';
986
987 } elseif ($this->char === $this->EOF) {
988 /* EOF
989 Parse error. Emit the current tag token. Reconsume the EOF
990 character in the data state. */
991 $this->emitToken($this->token);
992
993 $this->char--;
994 $this->state = 'data';
995
996 } else {
997 /* Anything else
998 Start a new attribute in the current tag token. Set that attribute's
999 name to the current input character, and its value to the empty string.
1000 Switch to the attribute name state. */
1001 $this->token['attr'][] = array(
1002 'name' => strtolower($char),
1003 'value' => null
1004 );
1005
1006 $this->state = 'attributeName';
1007 }
1008 }
emitToken($token)
Definition: PH5P.php:1553
character($s, $l=0)
Definition: PH5P.php:488
$char
Definition: PH5P.php:73

References $char, character(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ afterAttributeNameState() [2/2]

HTML5::afterAttributeNameState ( )
private

Definition at line 535 of file PH5P.php.

536 {
537 // Consume the next input character:
538 $this->char++;
539 $char = $this->character($this->char);
540
541 if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
542 /* U+0009 CHARACTER TABULATION
543 U+000A LINE FEED (LF)
544 U+000B LINE TABULATION
545 U+000C FORM FEED (FF)
546 U+0020 SPACE
547 Stay in the after attribute name state. */
548 $this->state = 'afterAttributeName';
549
550 } elseif($char === '=') {
551 /* U+003D EQUALS SIGN (=)
552 Switch to the before attribute value state. */
553 $this->state = 'beforeAttributeValue';
554
555 } elseif($char === '>') {
556 /* U+003E GREATER-THAN SIGN (>)
557 Emit the current tag token. Switch to the data state. */
558 $this->emitToken($this->token);
559 $this->state = 'data';
560
561 } elseif($char === '/' && $this->character($this->char + 1) !== '>') {
562 /* U+002F SOLIDUS (/)
563 Parse error unless this is a permitted slash. Switch to the
564 before attribute name state. */
565 $this->state = 'beforeAttributeName';
566
567 } elseif($this->char === $this->EOF) {
568 /* EOF
569 Parse error. Emit the current tag token. Reconsume the EOF
570 character in the data state. */
571 $this->emitToken($this->token);
572
573 $this->char--;
574 $this->state = 'data';
575
576 } else {
577 /* Anything else
578 Start a new attribute in the current tag token. Set that attribute's
579 name to the current input character, and its value to the empty string.
580 Switch to the attribute name state. */
581 $this->token['attr'][] = array(
582 'name' => strtolower($char),
583 'value' => null
584 );
585
586 $this->state = 'attributeName';
587 }
588 }

References $char, character(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ afterDoctypeNameState() [1/2]

HTML5::afterDoctypeNameState ( )
private

Definition at line 1418 of file PH5P.php.

1419 {
1420 /* Consume the next input character: */
1421 $this->char++;
1422 $char = $this->char();
1423
1424 if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
1425 // Stay in the DOCTYPE name state.
1426
1427 } elseif ($char === '>') {
1428 $this->emitToken($this->token);
1429 $this->state = 'data';
1430
1431 } elseif ($this->char === $this->EOF) {
1432 $this->emitToken($this->token);
1433 $this->char--;
1434 $this->state = 'data';
1435
1436 } else {
1437 $this->token['error'] = true;
1438 $this->state = 'bogusDoctype';
1439 }
1440 }
char()
Definition: PH5P.php:481

References $char, char(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ afterDoctypeNameState() [2/2]

HTML5::afterDoctypeNameState ( )
private

Definition at line 991 of file PH5P.php.

992 {
993 /* Consume the next input character: */
994 $this->char++;
995 $char = $this->char();
996
997 if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
998 // Stay in the DOCTYPE name state.
999
1000 } elseif($char === '>') {
1001 $this->emitToken($this->token);
1002 $this->state = 'data';
1003
1004 } elseif($this->char === $this->EOF) {
1005 $this->emitToken($this->token);
1006 $this->char--;
1007 $this->state = 'data';
1008
1009 } else {
1010 $this->token['error'] = true;
1011 $this->state = 'bogusDoctype';
1012 }
1013 }

References $char, char(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ attributeNameState() [1/2]

HTML5::attributeNameState ( )
private

Definition at line 903 of file PH5P.php.

904 {
905 // Consume the next input character:
906 $this->char++;
907 $char = $this->character($this->char);
908
909 if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
910 /* U+0009 CHARACTER TABULATION
911 U+000A LINE FEED (LF)
912 U+000B LINE TABULATION
913 U+000C FORM FEED (FF)
914 U+0020 SPACE
915 Stay in the before attribute name state. */
916 $this->state = 'afterAttributeName';
917
918 } elseif ($char === '=') {
919 /* U+003D EQUALS SIGN (=)
920 Switch to the before attribute value state. */
921 $this->state = 'beforeAttributeValue';
922
923 } elseif ($char === '>') {
924 /* U+003E GREATER-THAN SIGN (>)
925 Emit the current tag token. Switch to the data state. */
926 $this->emitToken($this->token);
927 $this->state = 'data';
928
929 } elseif ($char === '/' && $this->character($this->char + 1) !== '>') {
930 /* U+002F SOLIDUS (/)
931 Parse error unless this is a permitted slash. Switch to the before
932 attribute name state. */
933 $this->state = 'beforeAttributeName';
934
935 } elseif ($this->char === $this->EOF) {
936 /* EOF
937 Parse error. Emit the current tag token. Reconsume the EOF
938 character in the data state. */
939 $this->emitToken($this->token);
940
941 $this->char--;
942 $this->state = 'data';
943
944 } else {
945 /* Anything else
946 Append the current input character to the current attribute's name.
947 Stay in the attribute name state. */
948 $last = count($this->token['attr']) - 1;
949 $this->token['attr'][$last]['name'] .= strtolower($char);
950
951 $this->state = 'attributeName';
952 }
953 }

References $char, character(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ attributeNameState() [2/2]

HTML5::attributeNameState ( )
private

Definition at line 483 of file PH5P.php.

484 {
485 // Consume the next input character:
486 $this->char++;
487 $char = $this->character($this->char);
488
489 if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
490 /* U+0009 CHARACTER TABULATION
491 U+000A LINE FEED (LF)
492 U+000B LINE TABULATION
493 U+000C FORM FEED (FF)
494 U+0020 SPACE
495 Stay in the before attribute name state. */
496 $this->state = 'afterAttributeName';
497
498 } elseif($char === '=') {
499 /* U+003D EQUALS SIGN (=)
500 Switch to the before attribute value state. */
501 $this->state = 'beforeAttributeValue';
502
503 } elseif($char === '>') {
504 /* U+003E GREATER-THAN SIGN (>)
505 Emit the current tag token. Switch to the data state. */
506 $this->emitToken($this->token);
507 $this->state = 'data';
508
509 } elseif($char === '/' && $this->character($this->char + 1) !== '>') {
510 /* U+002F SOLIDUS (/)
511 Parse error unless this is a permitted slash. Switch to the before
512 attribute name state. */
513 $this->state = 'beforeAttributeName';
514
515 } elseif($this->char === $this->EOF) {
516 /* EOF
517 Parse error. Emit the current tag token. Reconsume the EOF
518 character in the data state. */
519 $this->emitToken($this->token);
520
521 $this->char--;
522 $this->state = 'data';
523
524 } else {
525 /* Anything else
526 Append the current input character to the current attribute's name.
527 Stay in the attribute name state. */
528 $last = count($this->token['attr']) - 1;
529 $this->token['attr'][$last]['name'] .= strtolower($char);
530
531 $this->state = 'attributeName';
532 }
533 }

References $char, character(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ attributeValueDoubleQuotedState() [1/2]

HTML5::attributeValueDoubleQuotedState ( )
private

Definition at line 1059 of file PH5P.php.

1060 {
1061 // Consume the next input character:
1062 $this->char++;
1063 $char = $this->character($this->char);
1064
1065 if ($char === '"') {
1066 /* U+0022 QUOTATION MARK (")
1067 Switch to the before attribute name state. */
1068 $this->state = 'beforeAttributeName';
1069
1070 } elseif ($char === '&') {
1071 /* U+0026 AMPERSAND (&)
1072 Switch to the entity in attribute value state. */
1073 $this->entityInAttributeValueState('double');
1074
1075 } elseif ($this->char === $this->EOF) {
1076 /* EOF
1077 Parse error. Emit the current tag token. Reconsume the character
1078 in the data state. */
1079 $this->emitToken($this->token);
1080
1081 $this->char--;
1082 $this->state = 'data';
1083
1084 } else {
1085 /* Anything else
1086 Append the current input character to the current attribute's value.
1087 Stay in the attribute value (double-quoted) state. */
1088 $last = count($this->token['attr']) - 1;
1089 $this->token['attr'][$last]['value'] .= $char;
1090
1091 $this->state = 'attributeValueDoubleQuoted';
1092 }
1093 }
entityInAttributeValueState()
Definition: PH5P.php:1168

References $char, character(), emitToken(), entityInAttributeValueState(), and EOF.

+ Here is the call graph for this function:

◆ attributeValueDoubleQuotedState() [2/2]

HTML5::attributeValueDoubleQuotedState ( )
private

Definition at line 639 of file PH5P.php.

640 {
641 // Consume the next input character:
642 $this->char++;
643 $char = $this->character($this->char);
644
645 if($char === '"') {
646 /* U+0022 QUOTATION MARK (")
647 Switch to the before attribute name state. */
648 $this->state = 'beforeAttributeName';
649
650 } elseif($char === '&') {
651 /* U+0026 AMPERSAND (&)
652 Switch to the entity in attribute value state. */
653 $this->entityInAttributeValueState('double');
654
655 } elseif($this->char === $this->EOF) {
656 /* EOF
657 Parse error. Emit the current tag token. Reconsume the character
658 in the data state. */
659 $this->emitToken($this->token);
660
661 $this->char--;
662 $this->state = 'data';
663
664 } else {
665 /* Anything else
666 Append the current input character to the current attribute's value.
667 Stay in the attribute value (double-quoted) state. */
668 $last = count($this->token['attr']) - 1;
669 $this->token['attr'][$last]['value'] .= $char;
670
671 $this->state = 'attributeValueDoubleQuoted';
672 }
673 }

References $char, character(), emitToken(), entityInAttributeValueState(), and EOF.

+ Here is the call graph for this function:

◆ attributeValueSingleQuotedState() [1/2]

HTML5::attributeValueSingleQuotedState ( )
private

Definition at line 1095 of file PH5P.php.

1096 {
1097 // Consume the next input character:
1098 $this->char++;
1099 $char = $this->character($this->char);
1100
1101 if ($char === '\'') {
1102 /* U+0022 QUOTATION MARK (')
1103 Switch to the before attribute name state. */
1104 $this->state = 'beforeAttributeName';
1105
1106 } elseif ($char === '&') {
1107 /* U+0026 AMPERSAND (&)
1108 Switch to the entity in attribute value state. */
1109 $this->entityInAttributeValueState('single');
1110
1111 } elseif ($this->char === $this->EOF) {
1112 /* EOF
1113 Parse error. Emit the current tag token. Reconsume the character
1114 in the data state. */
1115 $this->emitToken($this->token);
1116
1117 $this->char--;
1118 $this->state = 'data';
1119
1120 } else {
1121 /* Anything else
1122 Append the current input character to the current attribute's value.
1123 Stay in the attribute value (single-quoted) state. */
1124 $last = count($this->token['attr']) - 1;
1125 $this->token['attr'][$last]['value'] .= $char;
1126
1127 $this->state = 'attributeValueSingleQuoted';
1128 }
1129 }

References $char, character(), emitToken(), entityInAttributeValueState(), and EOF.

+ Here is the call graph for this function:

◆ attributeValueSingleQuotedState() [2/2]

HTML5::attributeValueSingleQuotedState ( )
private

Definition at line 675 of file PH5P.php.

676 {
677 // Consume the next input character:
678 $this->char++;
679 $char = $this->character($this->char);
680
681 if($char === '\'') {
682 /* U+0022 QUOTATION MARK (')
683 Switch to the before attribute name state. */
684 $this->state = 'beforeAttributeName';
685
686 } elseif($char === '&') {
687 /* U+0026 AMPERSAND (&)
688 Switch to the entity in attribute value state. */
689 $this->entityInAttributeValueState('single');
690
691 } elseif($this->char === $this->EOF) {
692 /* EOF
693 Parse error. Emit the current tag token. Reconsume the character
694 in the data state. */
695 $this->emitToken($this->token);
696
697 $this->char--;
698 $this->state = 'data';
699
700 } else {
701 /* Anything else
702 Append the current input character to the current attribute's value.
703 Stay in the attribute value (single-quoted) state. */
704 $last = count($this->token['attr']) - 1;
705 $this->token['attr'][$last]['value'] .= $char;
706
707 $this->state = 'attributeValueSingleQuoted';
708 }
709 }

References $char, character(), emitToken(), entityInAttributeValueState(), and EOF.

+ Here is the call graph for this function:

◆ attributeValueUnquotedState() [1/2]

HTML5::attributeValueUnquotedState ( )
private

Definition at line 1131 of file PH5P.php.

1132 {
1133 // Consume the next input character:
1134 $this->char++;
1135 $char = $this->character($this->char);
1136
1137 if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
1138 /* U+0009 CHARACTER TABULATION
1139 U+000A LINE FEED (LF)
1140 U+000B LINE TABULATION
1141 U+000C FORM FEED (FF)
1142 U+0020 SPACE
1143 Switch to the before attribute name state. */
1144 $this->state = 'beforeAttributeName';
1145
1146 } elseif ($char === '&') {
1147 /* U+0026 AMPERSAND (&)
1148 Switch to the entity in attribute value state. */
1150
1151 } elseif ($char === '>') {
1152 /* U+003E GREATER-THAN SIGN (>)
1153 Emit the current tag token. Switch to the data state. */
1154 $this->emitToken($this->token);
1155 $this->state = 'data';
1156
1157 } else {
1158 /* Anything else
1159 Append the current input character to the current attribute's value.
1160 Stay in the attribute value (unquoted) state. */
1161 $last = count($this->token['attr']) - 1;
1162 $this->token['attr'][$last]['value'] .= $char;
1163
1164 $this->state = 'attributeValueUnquoted';
1165 }
1166 }

References $char, character(), emitToken(), and entityInAttributeValueState().

+ Here is the call graph for this function:

◆ attributeValueUnquotedState() [2/2]

HTML5::attributeValueUnquotedState ( )
private

Definition at line 711 of file PH5P.php.

712 {
713 // Consume the next input character:
714 $this->char++;
715 $char = $this->character($this->char);
716
717 if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
718 /* U+0009 CHARACTER TABULATION
719 U+000A LINE FEED (LF)
720 U+000B LINE TABULATION
721 U+000C FORM FEED (FF)
722 U+0020 SPACE
723 Switch to the before attribute name state. */
724 $this->state = 'beforeAttributeName';
725
726 } elseif($char === '&') {
727 /* U+0026 AMPERSAND (&)
728 Switch to the entity in attribute value state. */
729 $this->entityInAttributeValueState('non');
730
731 } elseif($char === '>') {
732 /* U+003E GREATER-THAN SIGN (>)
733 Emit the current tag token. Switch to the data state. */
734 $this->emitToken($this->token);
735 $this->state = 'data';
736
737 } else {
738 /* Anything else
739 Append the current input character to the current attribute's value.
740 Stay in the attribute value (unquoted) state. */
741 $last = count($this->token['attr']) - 1;
742 $this->token['attr'][$last]['value'] .= $char;
743
744 $this->state = 'attributeValueUnquoted';
745 }
746 }

References $char, character(), emitToken(), and entityInAttributeValueState().

+ Here is the call graph for this function:

◆ beforeAttributeNameState() [1/2]

HTML5::beforeAttributeNameState ( )
private

Definition at line 853 of file PH5P.php.

854 {
855 // Consume the next input character:
856 $this->char++;
857 $char = $this->character($this->char);
858
859 if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
860 /* U+0009 CHARACTER TABULATION
861 U+000A LINE FEED (LF)
862 U+000B LINE TABULATION
863 U+000C FORM FEED (FF)
864 U+0020 SPACE
865 Stay in the before attribute name state. */
866 $this->state = 'beforeAttributeName';
867
868 } elseif ($char === '>') {
869 /* U+003E GREATER-THAN SIGN (>)
870 Emit the current tag token. Switch to the data state. */
871 $this->emitToken($this->token);
872 $this->state = 'data';
873
874 } elseif ($char === '/') {
875 /* U+002F SOLIDUS (/)
876 Parse error unless this is a permitted slash. Stay in the before
877 attribute name state. */
878 $this->state = 'beforeAttributeName';
879
880 } elseif ($this->char === $this->EOF) {
881 /* EOF
882 Parse error. Emit the current tag token. Reconsume the EOF
883 character in the data state. */
884 $this->emitToken($this->token);
885
886 $this->char--;
887 $this->state = 'data';
888
889 } else {
890 /* Anything else
891 Start a new attribute in the current tag token. Set that attribute's
892 name to the current input character, and its value to the empty string.
893 Switch to the attribute name state. */
894 $this->token['attr'][] = array(
895 'name' => strtolower($char),
896 'value' => null
897 );
898
899 $this->state = 'attributeName';
900 }
901 }

References $char, character(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ beforeAttributeNameState() [2/2]

HTML5::beforeAttributeNameState ( )
private

Definition at line 433 of file PH5P.php.

434 {
435 // Consume the next input character:
436 $this->char++;
437 $char = $this->character($this->char);
438
439 if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
440 /* U+0009 CHARACTER TABULATION
441 U+000A LINE FEED (LF)
442 U+000B LINE TABULATION
443 U+000C FORM FEED (FF)
444 U+0020 SPACE
445 Stay in the before attribute name state. */
446 $this->state = 'beforeAttributeName';
447
448 } elseif($char === '>') {
449 /* U+003E GREATER-THAN SIGN (>)
450 Emit the current tag token. Switch to the data state. */
451 $this->emitToken($this->token);
452 $this->state = 'data';
453
454 } elseif($char === '/') {
455 /* U+002F SOLIDUS (/)
456 Parse error unless this is a permitted slash. Stay in the before
457 attribute name state. */
458 $this->state = 'beforeAttributeName';
459
460 } elseif($this->char === $this->EOF) {
461 /* EOF
462 Parse error. Emit the current tag token. Reconsume the EOF
463 character in the data state. */
464 $this->emitToken($this->token);
465
466 $this->char--;
467 $this->state = 'data';
468
469 } else {
470 /* Anything else
471 Start a new attribute in the current tag token. Set that attribute's
472 name to the current input character, and its value to the empty string.
473 Switch to the attribute name state. */
474 $this->token['attr'][] = array(
475 'name' => strtolower($char),
476 'value' => null
477 );
478
479 $this->state = 'attributeName';
480 }
481 }

References $char, character(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ beforeAttributeValueState() [1/2]

HTML5::beforeAttributeValueState ( )
private

Definition at line 1010 of file PH5P.php.

1011 {
1012 // Consume the next input character:
1013 $this->char++;
1014 $char = $this->character($this->char);
1015
1016 if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
1017 /* U+0009 CHARACTER TABULATION
1018 U+000A LINE FEED (LF)
1019 U+000B LINE TABULATION
1020 U+000C FORM FEED (FF)
1021 U+0020 SPACE
1022 Stay in the before attribute value state. */
1023 $this->state = 'beforeAttributeValue';
1024
1025 } elseif ($char === '"') {
1026 /* U+0022 QUOTATION MARK (")
1027 Switch to the attribute value (double-quoted) state. */
1028 $this->state = 'attributeValueDoubleQuoted';
1029
1030 } elseif ($char === '&') {
1031 /* U+0026 AMPERSAND (&)
1032 Switch to the attribute value (unquoted) state and reconsume
1033 this input character. */
1034 $this->char--;
1035 $this->state = 'attributeValueUnquoted';
1036
1037 } elseif ($char === '\'') {
1038 /* U+0027 APOSTROPHE (')
1039 Switch to the attribute value (single-quoted) state. */
1040 $this->state = 'attributeValueSingleQuoted';
1041
1042 } elseif ($char === '>') {
1043 /* U+003E GREATER-THAN SIGN (>)
1044 Emit the current tag token. Switch to the data state. */
1045 $this->emitToken($this->token);
1046 $this->state = 'data';
1047
1048 } else {
1049 /* Anything else
1050 Append the current input character to the current attribute's value.
1051 Switch to the attribute value (unquoted) state. */
1052 $last = count($this->token['attr']) - 1;
1053 $this->token['attr'][$last]['value'] .= $char;
1054
1055 $this->state = 'attributeValueUnquoted';
1056 }
1057 }

References $char, character(), and emitToken().

+ Here is the call graph for this function:

◆ beforeAttributeValueState() [2/2]

HTML5::beforeAttributeValueState ( )
private

Definition at line 590 of file PH5P.php.

591 {
592 // Consume the next input character:
593 $this->char++;
594 $char = $this->character($this->char);
595
596 if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
597 /* U+0009 CHARACTER TABULATION
598 U+000A LINE FEED (LF)
599 U+000B LINE TABULATION
600 U+000C FORM FEED (FF)
601 U+0020 SPACE
602 Stay in the before attribute value state. */
603 $this->state = 'beforeAttributeValue';
604
605 } elseif($char === '"') {
606 /* U+0022 QUOTATION MARK (")
607 Switch to the attribute value (double-quoted) state. */
608 $this->state = 'attributeValueDoubleQuoted';
609
610 } elseif($char === '&') {
611 /* U+0026 AMPERSAND (&)
612 Switch to the attribute value (unquoted) state and reconsume
613 this input character. */
614 $this->char--;
615 $this->state = 'attributeValueUnquoted';
616
617 } elseif($char === '\'') {
618 /* U+0027 APOSTROPHE (')
619 Switch to the attribute value (single-quoted) state. */
620 $this->state = 'attributeValueSingleQuoted';
621
622 } elseif($char === '>') {
623 /* U+003E GREATER-THAN SIGN (>)
624 Emit the current tag token. Switch to the data state. */
625 $this->emitToken($this->token);
626 $this->state = 'data';
627
628 } else {
629 /* Anything else
630 Append the current input character to the current attribute's value.
631 Switch to the attribute value (unquoted) state. */
632 $last = count($this->token['attr']) - 1;
633 $this->token['attr'][$last]['value'] .= $char;
634
635 $this->state = 'attributeValueUnquoted';
636 }
637 }

References $char, character(), and emitToken().

+ Here is the call graph for this function:

◆ beforeDoctypeNameState() [1/2]

HTML5::beforeDoctypeNameState ( )
private

Definition at line 1336 of file PH5P.php.

1337 {
1338 /* Consume the next input character: */
1339 $this->char++;
1340 $char = $this->char();
1341
1342 if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
1343 // Stay in the before DOCTYPE name state.
1344
1345 } elseif (preg_match('/^[a-z]$/', $char)) {
1346 $this->token = array(
1347 'name' => strtoupper($char),
1348 'type' => self::DOCTYPE,
1349 'error' => true
1350 );
1351
1352 $this->state = 'doctypeName';
1353
1354 } elseif ($char === '>') {
1355 $this->emitToken(
1356 array(
1357 'name' => null,
1358 'type' => self::DOCTYPE,
1359 'error' => true
1360 )
1361 );
1362
1363 $this->state = 'data';
1364
1365 } elseif ($this->char === $this->EOF) {
1366 $this->emitToken(
1367 array(
1368 'name' => null,
1369 'type' => self::DOCTYPE,
1370 'error' => true
1371 )
1372 );
1373
1374 $this->char--;
1375 $this->state = 'data';
1376
1377 } else {
1378 $this->token = array(
1379 'name' => $char,
1380 'type' => self::DOCTYPE,
1381 'error' => true
1382 );
1383
1384 $this->state = 'doctypeName';
1385 }
1386 }

References $char, char(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ beforeDoctypeNameState() [2/2]

HTML5::beforeDoctypeNameState ( )
private

Definition at line 913 of file PH5P.php.

914 {
915 /* Consume the next input character: */
916 $this->char++;
917 $char = $this->char();
918
919 if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
920 // Stay in the before DOCTYPE name state.
921
922 } elseif(preg_match('/^[a-z]$/', $char)) {
923 $this->token = array(
924 'name' => strtoupper($char),
925 'type' => self::DOCTYPE,
926 'error' => true
927 );
928
929 $this->state = 'doctypeName';
930
931 } elseif($char === '>') {
932 $this->emitToken(array(
933 'name' => null,
934 'type' => self::DOCTYPE,
935 'error' => true
936 ));
937
938 $this->state = 'data';
939
940 } elseif($this->char === $this->EOF) {
941 $this->emitToken(array(
942 'name' => null,
943 'type' => self::DOCTYPE,
944 'error' => true
945 ));
946
947 $this->char--;
948 $this->state = 'data';
949
950 } else {
951 $this->token = array(
952 'name' => $char,
953 'type' => self::DOCTYPE,
954 'error' => true
955 );
956
957 $this->state = 'doctypeName';
958 }
959 }

References $char, char(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ bogusCommentState() [1/2]

HTML5::bogusCommentState ( )
private

Definition at line 1184 of file PH5P.php.

1185 {
1186 /* Consume every character up to the first U+003E GREATER-THAN SIGN
1187 character (>) or the end of the file (EOF), whichever comes first. Emit
1188 a comment token whose data is the concatenation of all the characters
1189 starting from and including the character that caused the state machine
1190 to switch into the bogus comment state, up to and including the last
1191 consumed character before the U+003E character, if any, or up to the
1192 end of the file otherwise. (If the comment was started by the end of
1193 the file (EOF), the token is empty.) */
1194 $data = $this->characters('^>', $this->char);
1195 $this->emitToken(
1196 array(
1197 'data' => $data,
1198 'type' => self::COMMENT
1199 )
1200 );
1201
1202 $this->char += strlen($data);
1203
1204 /* Switch to the data state. */
1205 $this->state = 'data';
1206
1207 /* If the end of the file was reached, reconsume the EOF character. */
1208 if ($this->char === $this->EOF) {
1209 $this->char = $this->EOF - 1;
1210 }
1211 }
characters($char_class, $start)
Definition: PH5P.php:499

References $data, characters(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ bogusCommentState() [2/2]

HTML5::bogusCommentState ( )
private

Definition at line 763 of file PH5P.php.

764 {
765 /* Consume every character up to the first U+003E GREATER-THAN SIGN
766 character (>) or the end of the file (EOF), whichever comes first. Emit
767 a comment token whose data is the concatenation of all the characters
768 starting from and including the character that caused the state machine
769 to switch into the bogus comment state, up to and including the last
770 consumed character before the U+003E character, if any, or up to the
771 end of the file otherwise. (If the comment was started by the end of
772 the file (EOF), the token is empty.) */
773 $data = $this->characters('^>', $this->char);
774 $this->emitToken(array(
775 'data' => $data,
776 'type' => self::COMMENT
777 ));
778
779 $this->char += strlen($data);
780
781 /* Switch to the data state. */
782 $this->state = 'data';
783
784 /* If the end of the file was reached, reconsume the EOF character. */
785 if($this->char === $this->EOF) {
786 $this->char = $this->EOF - 1;
787 }
788 }

References $data, characters(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ bogusDoctypeState() [1/2]

HTML5::bogusDoctypeState ( )
private

Definition at line 1442 of file PH5P.php.

1443 {
1444 /* Consume the next input character: */
1445 $this->char++;
1446 $char = $this->char();
1447
1448 if ($char === '>') {
1449 $this->emitToken($this->token);
1450 $this->state = 'data';
1451
1452 } elseif ($this->char === $this->EOF) {
1453 $this->emitToken($this->token);
1454 $this->char--;
1455 $this->state = 'data';
1456
1457 } else {
1458 // Stay in the bogus DOCTYPE state.
1459 }
1460 }

References $char, char(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ bogusDoctypeState() [2/2]

HTML5::bogusDoctypeState ( )
private

Definition at line 1015 of file PH5P.php.

1016 {
1017 /* Consume the next input character: */
1018 $this->char++;
1019 $char = $this->char();
1020
1021 if($char === '>') {
1022 $this->emitToken($this->token);
1023 $this->state = 'data';
1024
1025 } elseif($this->char === $this->EOF) {
1026 $this->emitToken($this->token);
1027 $this->char--;
1028 $this->state = 'data';
1029
1030 } else {
1031 // Stay in the bogus DOCTYPE state.
1032 }
1033 }

References $char, char(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ char() [1/2]

HTML5::char ( )
private

Definition at line 481 of file PH5P.php.

482 {
483 return ($this->char < $this->EOF)
484 ? $this->data[$this->char]
485 : false;
486 }

References $char, and EOF.

Referenced by afterDoctypeNameState(), beforeDoctypeNameState(), bogusDoctypeState(), closeTagOpenState(), commentDashState(), commentEndState(), commentState(), dataState(), doctypeNameState(), doctypeState(), and tagOpenState().

+ Here is the caller graph for this function:

◆ char() [2/2]

HTML5::char ( )
private

Definition at line 90 of file PH5P.php.

91 {
92 return ($this->char < $this->EOF)
93 ? $this->data[$this->char]
94 : false;
95 }

References $char, and EOF.

◆ character() [1/2]

HTML5::character (   $s,
  $l = 0 
)
private

Definition at line 488 of file PH5P.php.

489 {
490 if ($s + $l < $this->EOF) {
491 if ($l === 0) {
492 return $this->data[$s];
493 } else {
494 return substr($this->data, $s, $l);
495 }
496 }
497 }
global $l
Definition: afr.php:30

References $l, and EOF.

Referenced by afterAttributeNameState(), attributeNameState(), attributeValueDoubleQuotedState(), attributeValueSingleQuotedState(), attributeValueUnquotedState(), beforeAttributeNameState(), beforeAttributeValueState(), closeTagOpenState(), dataState(), entity(), markupDeclarationOpenState(), tagNameState(), and tagOpenState().

+ Here is the caller graph for this function:

◆ character() [2/2]

HTML5::character (   $s,
  $l = 0 
)
private

Definition at line 97 of file PH5P.php.

98 {
99 if($s + $l < $this->EOF) {
100 if($l === 0) {
101 return $this->data[$s];
102 } else {
103 return substr($this->data, $s, $l);
104 }
105 }
106 }

References $l, and EOF.

◆ characters() [1/2]

HTML5::characters (   $char_class,
  $start 
)
private

Definition at line 499 of file PH5P.php.

500 {
501 return preg_replace('#^([' . $char_class . ']+).*#s', '\\1', substr($this->data, $start));
502 }

References $start.

Referenced by bogusCommentState(), closeTagOpenState(), and entity().

+ Here is the caller graph for this function:

◆ characters() [2/2]

HTML5::characters (   $char_class,
  $start 
)
private

Definition at line 108 of file PH5P.php.

109 {
110 return preg_replace('#^(['.$char_class.']+).*#s', '\\1', substr($this->data, $start));
111 }

References $start.

◆ closeTagOpenState() [1/2]

HTML5::closeTagOpenState ( )
private

Definition at line 727 of file PH5P.php.

728 {
729 $next_node = strtolower($this->characters('A-Za-z', $this->char + 1));
730 $the_same = count($this->tree->stack) > 0 && $next_node === end($this->tree->stack)->nodeName;
731
732 if (($this->content_model === self::RCDATA || $this->content_model === self::CDATA) &&
733 (!$the_same || ($the_same && (!preg_match(
734 '/[\t\n\x0b\x0c >\/]/',
735 $this->character($this->char + 1 + strlen($next_node))
736 ) || $this->EOF === $this->char)))
737 ) {
738 /* If the content model flag is set to the RCDATA or CDATA states then
739 examine the next few characters. If they do not match the tag name of
740 the last start tag token emitted (case insensitively), or if they do but
741 they are not immediately followed by one of the following characters:
742 * U+0009 CHARACTER TABULATION
743 * U+000A LINE FEED (LF)
744 * U+000B LINE TABULATION
745 * U+000C FORM FEED (FF)
746 * U+0020 SPACE
747 * U+003E GREATER-THAN SIGN (>)
748 * U+002F SOLIDUS (/)
749 * EOF
750 ...then there is a parse error. Emit a U+003C LESS-THAN SIGN character
751 token, a U+002F SOLIDUS character token, and switch to the data state
752 to process the next input character. */
753 $this->emitToken(
754 array(
755 'type' => self::CHARACTR,
756 'data' => '</'
757 )
758 );
759
760 $this->state = 'data';
761
762 } else {
763 /* Otherwise, if the content model flag is set to the PCDATA state,
764 or if the next few characters do match that tag name, consume the
765 next input character: */
766 $this->char++;
767 $char = $this->char();
768
769 if (preg_match('/^[A-Za-z]$/', $char)) {
770 /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z
771 Create a new end tag token, set its tag name to the lowercase version
772 of the input character (add 0x0020 to the character's code point), then
773 switch to the tag name state. (Don't emit the token yet; further details
774 will be filled in before it is emitted.) */
775 $this->token = array(
776 'name' => strtolower($char),
777 'type' => self::ENDTAG
778 );
779
780 $this->state = 'tagName';
781
782 } elseif ($char === '>') {
783 /* U+003E GREATER-THAN SIGN (>)
784 Parse error. Switch to the data state. */
785 $this->state = 'data';
786
787 } elseif ($this->char === $this->EOF) {
788 /* EOF
789 Parse error. Emit a U+003C LESS-THAN SIGN character token and a U+002F
790 SOLIDUS character token. Reconsume the EOF character in the data state. */
791 $this->emitToken(
792 array(
793 'type' => self::CHARACTR,
794 'data' => '</'
795 )
796 );
797
798 $this->char--;
799 $this->state = 'data';
800
801 } else {
802 /* Parse error. Switch to the bogus comment state. */
803 $this->state = 'bogusComment';
804 }
805 }
806 }

References $char, char(), character(), characters(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ closeTagOpenState() [2/2]

HTML5::closeTagOpenState ( )
private

Definition at line 314 of file PH5P.php.

315 {
316 $next_node = strtolower($this->characters('A-Za-z', $this->char + 1));
317 $the_same = count($this->tree->stack) > 0 && $next_node === end($this->tree->stack)->nodeName;
318
319 if(($this->content_model === self::RCDATA || $this->content_model === self::CDATA) &&
320 (!$the_same || ($the_same && (!preg_match('/[\t\n\x0b\x0c >\/]/',
321 $this->character($this->char + 1 + strlen($next_node))) || $this->EOF === $this->char)))) {
322 /* If the content model flag is set to the RCDATA or CDATA states then
323 examine the next few characters. If they do not match the tag name of
324 the last start tag token emitted (case insensitively), or if they do but
325 they are not immediately followed by one of the following characters:
326 * U+0009 CHARACTER TABULATION
327 * U+000A LINE FEED (LF)
328 * U+000B LINE TABULATION
329 * U+000C FORM FEED (FF)
330 * U+0020 SPACE
331 * U+003E GREATER-THAN SIGN (>)
332 * U+002F SOLIDUS (/)
333 * EOF
334 ...then there is a parse error. Emit a U+003C LESS-THAN SIGN character
335 token, a U+002F SOLIDUS character token, and switch to the data state
336 to process the next input character. */
337 $this->emitToken(array(
338 'type' => self::CHARACTR,
339 'data' => '</'
340 ));
341
342 $this->state = 'data';
343
344 } else {
345 /* Otherwise, if the content model flag is set to the PCDATA state,
346 or if the next few characters do match that tag name, consume the
347 next input character: */
348 $this->char++;
349 $char = $this->char();
350
351 if(preg_match('/^[A-Za-z]$/', $char)) {
352 /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z
353 Create a new end tag token, set its tag name to the lowercase version
354 of the input character (add 0x0020 to the character's code point), then
355 switch to the tag name state. (Don't emit the token yet; further details
356 will be filled in before it is emitted.) */
357 $this->token = array(
358 'name' => strtolower($char),
359 'type' => self::ENDTAG
360 );
361
362 $this->state = 'tagName';
363
364 } elseif($char === '>') {
365 /* U+003E GREATER-THAN SIGN (>)
366 Parse error. Switch to the data state. */
367 $this->state = 'data';
368
369 } elseif($this->char === $this->EOF) {
370 /* EOF
371 Parse error. Emit a U+003C LESS-THAN SIGN character token and a U+002F
372 SOLIDUS character token. Reconsume the EOF character in the data state. */
373 $this->emitToken(array(
374 'type' => self::CHARACTR,
375 'data' => '</'
376 ));
377
378 $this->char--;
379 $this->state = 'data';
380
381 } else {
382 /* Parse error. Switch to the bogus comment state. */
383 $this->state = 'bogusComment';
384 }
385 }
386 }

References $char, char(), character(), characters(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ commentDashState() [1/2]

HTML5::commentDashState ( )
private

Definition at line 1269 of file PH5P.php.

1270 {
1271 /* Consume the next input character: */
1272 $this->char++;
1273 $char = $this->char();
1274
1275 /* U+002D HYPHEN-MINUS (-) */
1276 if ($char === '-') {
1277 /* Switch to the comment end state */
1278 $this->state = 'commentEnd';
1279
1280 /* EOF */
1281 } elseif ($this->char === $this->EOF) {
1282 /* Parse error. Emit the comment token. Reconsume the EOF character
1283 in the data state. */
1284 $this->emitToken($this->token);
1285 $this->char--;
1286 $this->state = 'data';
1287
1288 /* Anything else */
1289 } else {
1290 /* Append a U+002D HYPHEN-MINUS (-) character and the input
1291 character to the comment token's data. Switch to the comment state. */
1292 $this->token['data'] .= '-' . $char;
1293 $this->state = 'comment';
1294 }
1295 }

References $char, char(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ commentDashState() [2/2]

HTML5::commentDashState ( )
private

Definition at line 846 of file PH5P.php.

847 {
848 /* Consume the next input character: */
849 $this->char++;
850 $char = $this->char();
851
852 /* U+002D HYPHEN-MINUS (-) */
853 if($char === '-') {
854 /* Switch to the comment end state */
855 $this->state = 'commentEnd';
856
857 /* EOF */
858 } elseif($this->char === $this->EOF) {
859 /* Parse error. Emit the comment token. Reconsume the EOF character
860 in the data state. */
861 $this->emitToken($this->token);
862 $this->char--;
863 $this->state = 'data';
864
865 /* Anything else */
866 } else {
867 /* Append a U+002D HYPHEN-MINUS (-) character and the input
868 character to the comment token's data. Switch to the comment state. */
869 $this->token['data'] .= '-'.$char;
870 $this->state = 'comment';
871 }
872 }

References $char, char(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ commentEndState() [1/2]

HTML5::commentEndState ( )
private

Definition at line 1297 of file PH5P.php.

1298 {
1299 /* Consume the next input character: */
1300 $this->char++;
1301 $char = $this->char();
1302
1303 if ($char === '>') {
1304 $this->emitToken($this->token);
1305 $this->state = 'data';
1306
1307 } elseif ($char === '-') {
1308 $this->token['data'] .= '-';
1309
1310 } elseif ($this->char === $this->EOF) {
1311 $this->emitToken($this->token);
1312 $this->char--;
1313 $this->state = 'data';
1314
1315 } else {
1316 $this->token['data'] .= '--' . $char;
1317 $this->state = 'comment';
1318 }
1319 }

References $char, char(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ commentEndState() [2/2]

HTML5::commentEndState ( )
private

Definition at line 874 of file PH5P.php.

875 {
876 /* Consume the next input character: */
877 $this->char++;
878 $char = $this->char();
879
880 if($char === '>') {
881 $this->emitToken($this->token);
882 $this->state = 'data';
883
884 } elseif($char === '-') {
885 $this->token['data'] .= '-';
886
887 } elseif($this->char === $this->EOF) {
888 $this->emitToken($this->token);
889 $this->char--;
890 $this->state = 'data';
891
892 } else {
893 $this->token['data'] .= '--'.$char;
894 $this->state = 'comment';
895 }
896 }

References $char, char(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ commentState() [1/2]

HTML5::commentState ( )
private

Definition at line 1242 of file PH5P.php.

1243 {
1244 /* Consume the next input character: */
1245 $this->char++;
1246 $char = $this->char();
1247
1248 /* U+002D HYPHEN-MINUS (-) */
1249 if ($char === '-') {
1250 /* Switch to the comment dash state */
1251 $this->state = 'commentDash';
1252
1253 /* EOF */
1254 } elseif ($this->char === $this->EOF) {
1255 /* Parse error. Emit the comment token. Reconsume the EOF character
1256 in the data state. */
1257 $this->emitToken($this->token);
1258 $this->char--;
1259 $this->state = 'data';
1260
1261 /* Anything else */
1262 } else {
1263 /* Append the input character to the comment token's data. Stay in
1264 the comment state. */
1265 $this->token['data'] .= $char;
1266 }
1267 }

References $char, char(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ commentState() [2/2]

HTML5::commentState ( )
private

Definition at line 819 of file PH5P.php.

820 {
821 /* Consume the next input character: */
822 $this->char++;
823 $char = $this->char();
824
825 /* U+002D HYPHEN-MINUS (-) */
826 if($char === '-') {
827 /* Switch to the comment dash state */
828 $this->state = 'commentDash';
829
830 /* EOF */
831 } elseif($this->char === $this->EOF) {
832 /* Parse error. Emit the comment token. Reconsume the EOF character
833 in the data state. */
834 $this->emitToken($this->token);
835 $this->char--;
836 $this->state = 'data';
837
838 /* Anything else */
839 } else {
840 /* Append the input character to the comment token's data. Stay in
841 the comment state. */
842 $this->token['data'] .= $char;
843 }
844 }

References $char, char(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ dataState() [1/2]

HTML5::dataState ( )
private

Definition at line 504 of file PH5P.php.

505 {
506 // Consume the next input character
507 $this->char++;
508 $char = $this->char();
509
510 if ($char === '&' && ($this->content_model === self::PCDATA || $this->content_model === self::RCDATA)) {
511 /* U+0026 AMPERSAND (&)
512 When the content model flag is set to one of the PCDATA or RCDATA
513 states: switch to the entity data state. Otherwise: treat it as per
514 the "anything else" entry below. */
515 $this->state = 'entityData';
516
517 } elseif ($char === '-') {
518 /* If the content model flag is set to either the RCDATA state or
519 the CDATA state, and the escape flag is false, and there are at
520 least three characters before this one in the input stream, and the
521 last four characters in the input stream, including this one, are
522 U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+002D HYPHEN-MINUS,
523 and U+002D HYPHEN-MINUS ("<!--"), then set the escape flag to true. */
524 if (($this->content_model === self::RCDATA || $this->content_model ===
525 self::CDATA) && $this->escape === false &&
526 $this->char >= 3 && $this->character($this->char - 4, 4) === '<!--'
527 ) {
528 $this->escape = true;
529 }
530
531 /* In any case, emit the input character as a character token. Stay
532 in the data state. */
533 $this->emitToken(
534 array(
535 'type' => self::CHARACTR,
536 'data' => $char
537 )
538 );
539
540 /* U+003C LESS-THAN SIGN (<) */
541 } elseif ($char === '<' && ($this->content_model === self::PCDATA ||
542 (($this->content_model === self::RCDATA ||
543 $this->content_model === self::CDATA) && $this->escape === false))
544 ) {
545 /* When the content model flag is set to the PCDATA state: switch
546 to the tag open state.
547
548 When the content model flag is set to either the RCDATA state or
549 the CDATA state and the escape flag is false: switch to the tag
550 open state.
551
552 Otherwise: treat it as per the "anything else" entry below. */
553 $this->state = 'tagOpen';
554
555 /* U+003E GREATER-THAN SIGN (>) */
556 } elseif ($char === '>') {
557 /* If the content model flag is set to either the RCDATA state or
558 the CDATA state, and the escape flag is true, and the last three
559 characters in the input stream including this one are U+002D
560 HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN ("-->"),
561 set the escape flag to false. */
562 if (($this->content_model === self::RCDATA ||
563 $this->content_model === self::CDATA) && $this->escape === true &&
564 $this->character($this->char, 3) === '-->'
565 ) {
566 $this->escape = false;
567 }
568
569 /* In any case, emit the input character as a character token.
570 Stay in the data state. */
571 $this->emitToken(
572 array(
573 'type' => self::CHARACTR,
574 'data' => $char
575 )
576 );
577
578 } elseif ($this->char === $this->EOF) {
579 /* EOF
580 Emit an end-of-file token. */
581 $this->EOF();
582
583 } elseif ($this->content_model === self::PLAINTEXT) {
584 /* When the content model flag is set to the PLAINTEXT state
585 THIS DIFFERS GREATLY FROM THE SPEC: Get the remaining characters of
586 the text and emit it as a character token. */
587 $this->emitToken(
588 array(
589 'type' => self::CHARACTR,
590 'data' => substr($this->data, $this->char)
591 )
592 );
593
594 $this->EOF();
595
596 } else {
597 /* Anything else
598 THIS DIFFERS GREATLY FROM THE SPEC: Get as many character that
599 otherwise would also be treated as a character token and emit it
600 as a single character token. Stay in the data state. */
601 $len = strcspn($this->data, '<&', $this->char);
602 $char = substr($this->data, $this->char, $len);
603 $this->char += $len - 1;
604
605 $this->emitToken(
606 array(
607 'type' => self::CHARACTR,
608 'data' => $char
609 )
610 );
611
612 $this->state = 'data';
613 }
614 }

References $char, char(), character(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ dataState() [2/2]

HTML5::dataState ( )
private

Definition at line 113 of file PH5P.php.

114 {
115 // Consume the next input character
116 $this->char++;
117 $char = $this->char();
118
119 if($char === '&' && ($this->content_model === self::PCDATA || $this->content_model === self::RCDATA)) {
120 /* U+0026 AMPERSAND (&)
121 When the content model flag is set to one of the PCDATA or RCDATA
122 states: switch to the entity data state. Otherwise: treat it as per
123 the "anything else" entry below. */
124 $this->state = 'entityData';
125
126 } elseif($char === '-') {
127 /* If the content model flag is set to either the RCDATA state or
128 the CDATA state, and the escape flag is false, and there are at
129 least three characters before this one in the input stream, and the
130 last four characters in the input stream, including this one, are
131 U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+002D HYPHEN-MINUS,
132 and U+002D HYPHEN-MINUS ("<!--"), then set the escape flag to true. */
133 if(($this->content_model === self::RCDATA || $this->content_model ===
134 self::CDATA) && $this->escape === false &&
135 $this->char >= 3 && $this->character($this->char - 4, 4) === '<!--') {
136 $this->escape = true;
137 }
138
139 /* In any case, emit the input character as a character token. Stay
140 in the data state. */
141 $this->emitToken(array(
142 'type' => self::CHARACTR,
143 'data' => $char
144 ));
145
146 /* U+003C LESS-THAN SIGN (<) */
147 } elseif($char === '<' && ($this->content_model === self::PCDATA ||
148 (($this->content_model === self::RCDATA ||
149 $this->content_model === self::CDATA) && $this->escape === false))) {
150 /* When the content model flag is set to the PCDATA state: switch
151 to the tag open state.
152
153 When the content model flag is set to either the RCDATA state or
154 the CDATA state and the escape flag is false: switch to the tag
155 open state.
156
157 Otherwise: treat it as per the "anything else" entry below. */
158 $this->state = 'tagOpen';
159
160 /* U+003E GREATER-THAN SIGN (>) */
161 } elseif($char === '>') {
162 /* If the content model flag is set to either the RCDATA state or
163 the CDATA state, and the escape flag is true, and the last three
164 characters in the input stream including this one are U+002D
165 HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN ("-->"),
166 set the escape flag to false. */
167 if(($this->content_model === self::RCDATA ||
168 $this->content_model === self::CDATA) && $this->escape === true &&
169 $this->character($this->char, 3) === '-->') {
170 $this->escape = false;
171 }
172
173 /* In any case, emit the input character as a character token.
174 Stay in the data state. */
175 $this->emitToken(array(
176 'type' => self::CHARACTR,
177 'data' => $char
178 ));
179
180 } elseif($this->char === $this->EOF) {
181 /* EOF
182 Emit an end-of-file token. */
183 $this->EOF();
184
185 } elseif($this->content_model === self::PLAINTEXT) {
186 /* When the content model flag is set to the PLAINTEXT state
187 THIS DIFFERS GREATLY FROM THE SPEC: Get the remaining characters of
188 the text and emit it as a character token. */
189 $this->emitToken(array(
190 'type' => self::CHARACTR,
191 'data' => substr($this->data, $this->char)
192 ));
193
194 $this->EOF();
195
196 } else {
197 /* Anything else
198 THIS DIFFERS GREATLY FROM THE SPEC: Get as many character that
199 otherwise would also be treated as a character token and emit it
200 as a single character token. Stay in the data state. */
201 $len = strcspn($this->data, '<&', $this->char);
202 $char = substr($this->data, $this->char, $len);
203 $this->char += $len - 1;
204
205 $this->emitToken(array(
206 'type' => self::CHARACTR,
207 'data' => $char
208 ));
209
210 $this->state = 'data';
211 }
212 }

References $char, char(), character(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ doctypeNameState() [1/2]

HTML5::doctypeNameState ( )
private

Definition at line 1388 of file PH5P.php.

1389 {
1390 /* Consume the next input character: */
1391 $this->char++;
1392 $char = $this->char();
1393
1394 if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
1395 $this->state = 'AfterDoctypeName';
1396
1397 } elseif ($char === '>') {
1398 $this->emitToken($this->token);
1399 $this->state = 'data';
1400
1401 } elseif (preg_match('/^[a-z]$/', $char)) {
1402 $this->token['name'] .= strtoupper($char);
1403
1404 } elseif ($this->char === $this->EOF) {
1405 $this->emitToken($this->token);
1406 $this->char--;
1407 $this->state = 'data';
1408
1409 } else {
1410 $this->token['name'] .= $char;
1411 }
1412
1413 $this->token['error'] = ($this->token['name'] === 'HTML')
1414 ? false
1415 : true;
1416 }

References $char, char(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ doctypeNameState() [2/2]

HTML5::doctypeNameState ( )
private

Definition at line 961 of file PH5P.php.

962 {
963 /* Consume the next input character: */
964 $this->char++;
965 $char = $this->char();
966
967 if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
968 $this->state = 'AfterDoctypeName';
969
970 } elseif($char === '>') {
971 $this->emitToken($this->token);
972 $this->state = 'data';
973
974 } elseif(preg_match('/^[a-z]$/', $char)) {
975 $this->token['name'] .= strtoupper($char);
976
977 } elseif($this->char === $this->EOF) {
978 $this->emitToken($this->token);
979 $this->char--;
980 $this->state = 'data';
981
982 } else {
983 $this->token['name'] .= $char;
984 }
985
986 $this->token['error'] = ($this->token['name'] === 'HTML')
987 ? false
988 : true;
989 }

References $char, char(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ doctypeState() [1/2]

HTML5::doctypeState ( )
private

Definition at line 1321 of file PH5P.php.

1322 {
1323 /* Consume the next input character: */
1324 $this->char++;
1325 $char = $this->char();
1326
1327 if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
1328 $this->state = 'beforeDoctypeName';
1329
1330 } else {
1331 $this->char--;
1332 $this->state = 'beforeDoctypeName';
1333 }
1334 }

References $char, and char().

+ Here is the call graph for this function:

◆ doctypeState() [2/2]

HTML5::doctypeState ( )
private

Definition at line 898 of file PH5P.php.

899 {
900 /* Consume the next input character: */
901 $this->char++;
902 $char = $this->char();
903
904 if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
905 $this->state = 'beforeDoctypeName';
906
907 } else {
908 $this->char--;
909 $this->state = 'beforeDoctypeName';
910 }
911 }

References $char, and char().

+ Here is the call graph for this function:

◆ emitToken() [1/2]

HTML5::emitToken (   $token)
private

Definition at line 1553 of file PH5P.php.

1554 {
1555 $emit = $this->tree->emitToken($token);
1556
1557 if (is_int($emit)) {
1558 $this->content_model = $emit;
1559
1560 } elseif ($token['type'] === self::ENDTAG) {
1561 $this->content_model = self::PCDATA;
1562 }
1563 }
$token
Definition: PH5P.php:77

References $token, and PCDATA.

Referenced by afterAttributeNameState(), afterDoctypeNameState(), attributeNameState(), attributeValueDoubleQuotedState(), attributeValueSingleQuotedState(), attributeValueUnquotedState(), beforeAttributeNameState(), beforeAttributeValueState(), beforeDoctypeNameState(), bogusCommentState(), bogusDoctypeState(), closeTagOpenState(), commentDashState(), commentEndState(), commentState(), dataState(), doctypeNameState(), entityDataState(), entityInAttributeValueState(), tagNameState(), and tagOpenState().

+ Here is the caller graph for this function:

◆ emitToken() [2/2]

HTML5::emitToken (   $token)
private

Definition at line 1121 of file PH5P.php.

1122 {
1123 $emit = $this->tree->emitToken($token);
1124
1125 if(is_int($emit)) {
1126 $this->content_model = $emit;
1127
1128 } elseif($token['type'] === self::ENDTAG) {
1129 $this->content_model = self::PCDATA;
1130 }
1131 }

References $token, and PCDATA.

◆ entity() [1/2]

HTML5::entity ( )
private

Definition at line 1462 of file PH5P.php.

1463 {
1465
1466 // This section defines how to consume an entity. This definition is
1467 // used when parsing entities in text and in attributes.
1468
1469 // The behaviour depends on the identity of the next character (the
1470 // one immediately after the U+0026 AMPERSAND character):
1471
1472 switch ($this->character($this->char + 1)) {
1473 // U+0023 NUMBER SIGN (#)
1474 case '#':
1475
1476 // The behaviour further depends on the character after the
1477 // U+0023 NUMBER SIGN:
1478 switch ($this->character($this->char + 1)) {
1479 // U+0078 LATIN SMALL LETTER X
1480 // U+0058 LATIN CAPITAL LETTER X
1481 case 'x':
1482 case 'X':
1483 // Follow the steps below, but using the range of
1484 // characters U+0030 DIGIT ZERO through to U+0039 DIGIT
1485 // NINE, U+0061 LATIN SMALL LETTER A through to U+0066
1486 // LATIN SMALL LETTER F, and U+0041 LATIN CAPITAL LETTER
1487 // A, through to U+0046 LATIN CAPITAL LETTER F (in other
1488 // words, 0-9, A-F, a-f).
1489 $char = 1;
1490 $char_class = '0-9A-Fa-f';
1491 break;
1492
1493 // Anything else
1494 default:
1495 // Follow the steps below, but using the range of
1496 // characters U+0030 DIGIT ZERO through to U+0039 DIGIT
1497 // NINE (i.e. just 0-9).
1498 $char = 0;
1499 $char_class = '0-9';
1500 break;
1501 }
1502
1503 // Consume as many characters as match the range of characters
1504 // given above.
1505 $this->char++;
1506 $e_name = $this->characters($char_class, $this->char + $char + 1);
1507 $entity = $this->character($start, $this->char);
1508 $cond = strlen($e_name) > 0;
1509
1510 // The rest of the parsing happens bellow.
1511 break;
1512
1513 // Anything else
1514 default:
1515 // Consume the maximum number of characters possible, with the
1516 // consumed characters case-sensitively matching one of the
1517 // identifiers in the first column of the entities table.
1518 $e_name = $this->characters('0-9A-Za-z;', $this->char + 1);
1519 $len = strlen($e_name);
1520
1521 for ($c = 1; $c <= $len; $c++) {
1522 $id = substr($e_name, 0, $c);
1523 $this->char++;
1524
1525 if (in_array($id, $this->entities)) {
1526 if ($e_name[$c - 1] !== ';') {
1527 if ($c < $len && $e_name[$c] == ';') {
1528 $this->char++; // consume extra semicolon
1529 }
1530 }
1531 $entity = $id;
1532 break;
1533 }
1534 }
1535
1536 $cond = isset($entity);
1537 // The rest of the parsing happens bellow.
1538 break;
1539 }
1540
1541 if (!$cond) {
1542 // If no match can be made, then this is a parse error. No
1543 // characters are consumed, and nothing is returned.
1544 $this->char = $start;
1545 return false;
1546 }
1547
1548 // Return a character token for the character corresponding to the
1549 // entity name (as given by the second column of the entities table).
1550 return html_entity_decode('&' . $entity . ';', ENT_QUOTES, 'UTF-8');
1551 }

References $char, $start, character(), and characters().

Referenced by entityDataState(), and entityInAttributeValueState().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ entity() [2/2]

HTML5::entity ( )
private

Definition at line 1035 of file PH5P.php.

1036 {
1038
1039 // This section defines how to consume an entity. This definition is
1040 // used when parsing entities in text and in attributes.
1041
1042 // The behaviour depends on the identity of the next character (the
1043 // one immediately after the U+0026 AMPERSAND character):
1044
1045 switch($this->character($this->char + 1)) {
1046 // U+0023 NUMBER SIGN (#)
1047 case '#':
1048
1049 // The behaviour further depends on the character after the
1050 // U+0023 NUMBER SIGN:
1051 switch($this->character($this->char + 1)) {
1052 // U+0078 LATIN SMALL LETTER X
1053 // U+0058 LATIN CAPITAL LETTER X
1054 case 'x':
1055 case 'X':
1056 // Follow the steps below, but using the range of
1057 // characters U+0030 DIGIT ZERO through to U+0039 DIGIT
1058 // NINE, U+0061 LATIN SMALL LETTER A through to U+0066
1059 // LATIN SMALL LETTER F, and U+0041 LATIN CAPITAL LETTER
1060 // A, through to U+0046 LATIN CAPITAL LETTER F (in other
1061 // words, 0-9, A-F, a-f).
1062 $char = 1;
1063 $char_class = '0-9A-Fa-f';
1064 break;
1065
1066 // Anything else
1067 default:
1068 // Follow the steps below, but using the range of
1069 // characters U+0030 DIGIT ZERO through to U+0039 DIGIT
1070 // NINE (i.e. just 0-9).
1071 $char = 0;
1072 $char_class = '0-9';
1073 break;
1074 }
1075
1076 // Consume as many characters as match the range of characters
1077 // given above.
1078 $this->char++;
1079 $e_name = $this->characters($char_class, $this->char + $char + 1);
1080 $entity = $this->character($start, $this->char);
1081 $cond = strlen($e_name) > 0;
1082
1083 // The rest of the parsing happens bellow.
1084 break;
1085
1086 // Anything else
1087 default:
1088 // Consume the maximum number of characters possible, with the
1089 // consumed characters case-sensitively matching one of the
1090 // identifiers in the first column of the entities table.
1091 $e_name = $this->characters('0-9A-Za-z;', $this->char + 1);
1092 $len = strlen($e_name);
1093
1094 for($c = 1; $c <= $len; $c++) {
1095 $id = substr($e_name, 0, $c);
1096 $this->char++;
1097
1098 if(in_array($id, $this->entities)) {
1099 $entity = $id;
1100 break;
1101 }
1102 }
1103
1104 $cond = isset($entity);
1105 // The rest of the parsing happens bellow.
1106 break;
1107 }
1108
1109 if(!$cond) {
1110 // If no match can be made, then this is a parse error. No
1111 // characters are consumed, and nothing is returned.
1112 $this->char = $start;
1113 return false;
1114 }
1115
1116 // Return a character token for the character corresponding to the
1117 // entity name (as given by the second column of the entities table).
1118 return html_entity_decode('&'.$entity.';', ENT_QUOTES, 'UTF-8');
1119 }

References $char, $start, character(), and characters().

+ Here is the call graph for this function:

◆ entityDataState() [1/2]

HTML5::entityDataState ( )
private

Definition at line 616 of file PH5P.php.

617 {
618 // Attempt to consume an entity.
619 $entity = $this->entity();
620
621 // If nothing is returned, emit a U+0026 AMPERSAND character token.
622 // Otherwise, emit the character token that was returned.
623 $char = (!$entity) ? '&' : $entity;
624 $this->emitToken(
625 array(
626 'type' => self::CHARACTR,
627 'data' => $char
628 )
629 );
630
631 // Finally, switch to the data state.
632 $this->state = 'data';
633 }
entity()
Definition: PH5P.php:1462

References $char, emitToken(), and entity().

+ Here is the call graph for this function:

◆ entityDataState() [2/2]

HTML5::entityDataState ( )
private

Definition at line 214 of file PH5P.php.

215 {
216 // Attempt to consume an entity.
217 $entity = $this->entity();
218
219 // If nothing is returned, emit a U+0026 AMPERSAND character token.
220 // Otherwise, emit the character token that was returned.
221 $char = (!$entity) ? '&' : $entity;
222 $this->emitToken($char);
223
224 // Finally, switch to the data state.
225 $this->state = 'data';
226 }

References $char, emitToken(), and entity().

+ Here is the call graph for this function:

◆ entityInAttributeValueState() [1/2]

HTML5::entityInAttributeValueState ( )
private

Definition at line 1168 of file PH5P.php.

1169 {
1170 // Attempt to consume an entity.
1171 $entity = $this->entity();
1172
1173 // If nothing is returned, append a U+0026 AMPERSAND character to the
1174 // current attribute's value. Otherwise, emit the character token that
1175 // was returned.
1176 $char = (!$entity)
1177 ? '&'
1178 : $entity;
1179
1180 $last = count($this->token['attr']) - 1;
1181 $this->token['attr'][$last]['value'] .= $char;
1182 }

References $char, and entity().

Referenced by attributeValueDoubleQuotedState(), attributeValueSingleQuotedState(), and attributeValueUnquotedState().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ entityInAttributeValueState() [2/2]

HTML5::entityInAttributeValueState ( )
private

Definition at line 748 of file PH5P.php.

749 {
750 // Attempt to consume an entity.
751 $entity = $this->entity();
752
753 // If nothing is returned, append a U+0026 AMPERSAND character to the
754 // current attribute's value. Otherwise, emit the character token that
755 // was returned.
756 $char = (!$entity)
757 ? '&'
758 : $entity;
759
760 $this->emitToken($char);
761 }

References $char, emitToken(), and entity().

+ Here is the call graph for this function:

◆ EOF() [1/2]

HTML5::EOF ( )
private

Definition at line 1565 of file PH5P.php.

1566 {
1567 $this->state = null;
1568 $this->tree->emitToken(
1569 array(
1570 'type' => self::EOF
1571 )
1572 );
1573 }
const EOF
How fgetc() reports an End Of File.
Definition: JSMin_lib.php:92

References EOF.

◆ EOF() [2/2]

HTML5::EOF ( )
private

Definition at line 1133 of file PH5P.php.

1134 {
1135 $this->state = null;
1136 $this->tree->emitToken(array(
1137 'type' => self::EOF
1138 ));
1139 }

References EOF.

◆ markupDeclarationOpenState() [1/2]

HTML5::markupDeclarationOpenState ( )
private

Definition at line 1213 of file PH5P.php.

1214 {
1215 /* If the next two characters are both U+002D HYPHEN-MINUS (-)
1216 characters, consume those two characters, create a comment token whose
1217 data is the empty string, and switch to the comment state. */
1218 if ($this->character($this->char + 1, 2) === '--') {
1219 $this->char += 2;
1220 $this->state = 'comment';
1221 $this->token = array(
1222 'data' => null,
1223 'type' => self::COMMENT
1224 );
1225
1226 /* Otherwise if the next seven chacacters are a case-insensitive match
1227 for the word "DOCTYPE", then consume those characters and switch to the
1228 DOCTYPE state. */
1229 } elseif (strtolower($this->character($this->char + 1, 7)) === 'doctype') {
1230 $this->char += 7;
1231 $this->state = 'doctype';
1232
1233 /* Otherwise, is is a parse error. Switch to the bogus comment state.
1234 The next character that is consumed, if any, is the first character
1235 that will be in the comment. */
1236 } else {
1237 $this->char++;
1238 $this->state = 'bogusComment';
1239 }
1240 }

References character().

+ Here is the call graph for this function:

◆ markupDeclarationOpenState() [2/2]

HTML5::markupDeclarationOpenState ( )
private

Definition at line 790 of file PH5P.php.

791 {
792 /* If the next two characters are both U+002D HYPHEN-MINUS (-)
793 characters, consume those two characters, create a comment token whose
794 data is the empty string, and switch to the comment state. */
795 if($this->character($this->char + 1, 2) === '--') {
796 $this->char += 2;
797 $this->state = 'comment';
798 $this->token = array(
799 'data' => null,
800 'type' => self::COMMENT
801 );
802
803 /* Otherwise if the next seven chacacters are a case-insensitive match
804 for the word "DOCTYPE", then consume those characters and switch to the
805 DOCTYPE state. */
806 } elseif(strtolower($this->character($this->char + 1, 7)) === 'doctype') {
807 $this->char += 7;
808 $this->state = 'doctype';
809
810 /* Otherwise, is is a parse error. Switch to the bogus comment state.
811 The next character that is consumed, if any, is the first character
812 that will be in the comment. */
813 } else {
814 $this->char++;
815 $this->state = 'bogusComment';
816 }
817 }

References character().

+ Here is the call graph for this function:

◆ save() [1/2]

HTML5::save ( )

Definition at line 476 of file PH5P.php.

477 {
478 return $this->tree->save();
479 }

◆ save() [2/2]

HTML5::save ( )

Definition at line 85 of file PH5P.php.

86 {
87 return $this->tree->save();
88 }

◆ tagNameState() [1/2]

HTML5::tagNameState ( )
private

Definition at line 808 of file PH5P.php.

809 {
810 // Consume the next input character:
811 $this->char++;
812 $char = $this->character($this->char);
813
814 if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
815 /* U+0009 CHARACTER TABULATION
816 U+000A LINE FEED (LF)
817 U+000B LINE TABULATION
818 U+000C FORM FEED (FF)
819 U+0020 SPACE
820 Switch to the before attribute name state. */
821 $this->state = 'beforeAttributeName';
822
823 } elseif ($char === '>') {
824 /* U+003E GREATER-THAN SIGN (>)
825 Emit the current tag token. Switch to the data state. */
826 $this->emitToken($this->token);
827 $this->state = 'data';
828
829 } elseif ($this->char === $this->EOF) {
830 /* EOF
831 Parse error. Emit the current tag token. Reconsume the EOF
832 character in the data state. */
833 $this->emitToken($this->token);
834
835 $this->char--;
836 $this->state = 'data';
837
838 } elseif ($char === '/') {
839 /* U+002F SOLIDUS (/)
840 Parse error unless this is a permitted slash. Switch to the before
841 attribute name state. */
842 $this->state = 'beforeAttributeName';
843
844 } else {
845 /* Anything else
846 Append the current input character to the current tag token's tag name.
847 Stay in the tag name state. */
848 $this->token['name'] .= strtolower($char);
849 $this->state = 'tagName';
850 }
851 }

References $char, character(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ tagNameState() [2/2]

HTML5::tagNameState ( )
private

Definition at line 388 of file PH5P.php.

389 {
390 // Consume the next input character:
391 $this->char++;
392 $char = $this->character($this->char);
393
394 if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
395 /* U+0009 CHARACTER TABULATION
396 U+000A LINE FEED (LF)
397 U+000B LINE TABULATION
398 U+000C FORM FEED (FF)
399 U+0020 SPACE
400 Switch to the before attribute name state. */
401 $this->state = 'beforeAttributeName';
402
403 } elseif($char === '>') {
404 /* U+003E GREATER-THAN SIGN (>)
405 Emit the current tag token. Switch to the data state. */
406 $this->emitToken($this->token);
407 $this->state = 'data';
408
409 } elseif($this->char === $this->EOF) {
410 /* EOF
411 Parse error. Emit the current tag token. Reconsume the EOF
412 character in the data state. */
413 $this->emitToken($this->token);
414
415 $this->char--;
416 $this->state = 'data';
417
418 } elseif($char === '/') {
419 /* U+002F SOLIDUS (/)
420 Parse error unless this is a permitted slash. Switch to the before
421 attribute name state. */
422 $this->state = 'beforeAttributeName';
423
424 } else {
425 /* Anything else
426 Append the current input character to the current tag token's tag name.
427 Stay in the tag name state. */
428 $this->token['name'] .= strtolower($char);
429 $this->state = 'tagName';
430 }
431 }

References $char, character(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ tagOpenState() [1/2]

HTML5::tagOpenState ( )
private

Definition at line 635 of file PH5P.php.

636 {
637 switch ($this->content_model) {
638 case self::RCDATA:
639 case self::CDATA:
640 /* If the next input character is a U+002F SOLIDUS (/) character,
641 consume it and switch to the close tag open state. If the next
642 input character is not a U+002F SOLIDUS (/) character, emit a
643 U+003C LESS-THAN SIGN character token and switch to the data
644 state to process the next input character. */
645 if ($this->character($this->char + 1) === '/') {
646 $this->char++;
647 $this->state = 'closeTagOpen';
648
649 } else {
650 $this->emitToken(
651 array(
652 'type' => self::CHARACTR,
653 'data' => '<'
654 )
655 );
656
657 $this->state = 'data';
658 }
659 break;
660
661 case self::PCDATA:
662 // If the content model flag is set to the PCDATA state
663 // Consume the next input character:
664 $this->char++;
665 $char = $this->char();
666
667 if ($char === '!') {
668 /* U+0021 EXCLAMATION MARK (!)
669 Switch to the markup declaration open state. */
670 $this->state = 'markupDeclarationOpen';
671
672 } elseif ($char === '/') {
673 /* U+002F SOLIDUS (/)
674 Switch to the close tag open state. */
675 $this->state = 'closeTagOpen';
676
677 } elseif (preg_match('/^[A-Za-z]$/', $char)) {
678 /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z
679 Create a new start tag token, set its tag name to the lowercase
680 version of the input character (add 0x0020 to the character's code
681 point), then switch to the tag name state. (Don't emit the token
682 yet; further details will be filled in before it is emitted.) */
683 $this->token = array(
684 'name' => strtolower($char),
685 'type' => self::STARTTAG,
686 'attr' => array()
687 );
688
689 $this->state = 'tagName';
690
691 } elseif ($char === '>') {
692 /* U+003E GREATER-THAN SIGN (>)
693 Parse error. Emit a U+003C LESS-THAN SIGN character token and a
694 U+003E GREATER-THAN SIGN character token. Switch to the data state. */
695 $this->emitToken(
696 array(
697 'type' => self::CHARACTR,
698 'data' => '<>'
699 )
700 );
701
702 $this->state = 'data';
703
704 } elseif ($char === '?') {
705 /* U+003F QUESTION MARK (?)
706 Parse error. Switch to the bogus comment state. */
707 $this->state = 'bogusComment';
708
709 } else {
710 /* Anything else
711 Parse error. Emit a U+003C LESS-THAN SIGN character token and
712 reconsume the current input character in the data state. */
713 $this->emitToken(
714 array(
715 'type' => self::CHARACTR,
716 'data' => '<'
717 )
718 );
719
720 $this->char--;
721 $this->state = 'data';
722 }
723 break;
724 }
725 }
const CDATA
Definition: PH5P.php:451
const RCDATA
Definition: PH5P.php:450

References $char, CDATA, char(), character(), emitToken(), PCDATA, and RCDATA.

+ Here is the call graph for this function:

◆ tagOpenState() [2/2]

HTML5::tagOpenState ( )
private

Definition at line 228 of file PH5P.php.

229 {
230 switch($this->content_model) {
231 case self::RCDATA:
232 case self::CDATA:
233 /* If the next input character is a U+002F SOLIDUS (/) character,
234 consume it and switch to the close tag open state. If the next
235 input character is not a U+002F SOLIDUS (/) character, emit a
236 U+003C LESS-THAN SIGN character token and switch to the data
237 state to process the next input character. */
238 if($this->character($this->char + 1) === '/') {
239 $this->char++;
240 $this->state = 'closeTagOpen';
241
242 } else {
243 $this->emitToken(array(
244 'type' => self::CHARACTR,
245 'data' => '<'
246 ));
247
248 $this->state = 'data';
249 }
250 break;
251
252 case self::PCDATA:
253 // If the content model flag is set to the PCDATA state
254 // Consume the next input character:
255 $this->char++;
256 $char = $this->char();
257
258 if($char === '!') {
259 /* U+0021 EXCLAMATION MARK (!)
260 Switch to the markup declaration open state. */
261 $this->state = 'markupDeclarationOpen';
262
263 } elseif($char === '/') {
264 /* U+002F SOLIDUS (/)
265 Switch to the close tag open state. */
266 $this->state = 'closeTagOpen';
267
268 } elseif(preg_match('/^[A-Za-z]$/', $char)) {
269 /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z
270 Create a new start tag token, set its tag name to the lowercase
271 version of the input character (add 0x0020 to the character's code
272 point), then switch to the tag name state. (Don't emit the token
273 yet; further details will be filled in before it is emitted.) */
274 $this->token = array(
275 'name' => strtolower($char),
276 'type' => self::STARTTAG,
277 'attr' => array()
278 );
279
280 $this->state = 'tagName';
281
282 } elseif($char === '>') {
283 /* U+003E GREATER-THAN SIGN (>)
284 Parse error. Emit a U+003C LESS-THAN SIGN character token and a
285 U+003E GREATER-THAN SIGN character token. Switch to the data state. */
286 $this->emitToken(array(
287 'type' => self::CHARACTR,
288 'data' => '<>'
289 ));
290
291 $this->state = 'data';
292
293 } elseif($char === '?') {
294 /* U+003F QUESTION MARK (?)
295 Parse error. Switch to the bogus comment state. */
296 $this->state = 'bogusComment';
297
298 } else {
299 /* Anything else
300 Parse error. Emit a U+003C LESS-THAN SIGN character token and
301 reconsume the current input character in the data state. */
302 $this->emitToken(array(
303 'type' => self::CHARACTR,
304 'data' => '<'
305 ));
306
307 $this->char--;
308 $this->state = 'data';
309 }
310 break;
311 }
312 }

References $char, CDATA, char(), character(), emitToken(), PCDATA, and RCDATA.

+ Here is the call graph for this function:

Field Documentation

◆ $char

◆ $content_model

HTML5::$content_model
private

Definition at line 78 of file PH5P.php.

◆ $data

HTML5::$data
private

Definition at line 72 of file PH5P.php.

Referenced by __construct(), and bogusCommentState().

◆ $entities

HTML5::$entities
private

Definition at line 80 of file PH5P.php.

◆ $EOF

HTML5::$EOF
private

Definition at line 74 of file PH5P.php.

◆ $escape

HTML5::$escape = false
private

Definition at line 79 of file PH5P.php.

◆ $state

HTML5::$state
private

Definition at line 75 of file PH5P.php.

◆ $token

HTML5::$token
private

Definition at line 77 of file PH5P.php.

Referenced by emitToken().

◆ $tree

HTML5::$tree
private

Definition at line 76 of file PH5P.php.

◆ CDATA

const HTML5::CDATA = 2

◆ CHARACTR

◆ COMMENT

◆ DOCTYPE

◆ ENDTAG

◆ EOF

◆ PCDATA

const HTML5::PCDATA = 0

Definition at line 449 of file PH5P.php.

Referenced by __construct(), emitToken(), HTML5TreeConstructer\inHead(), and tagOpenState().

◆ PLAINTEXT

const HTML5::PLAINTEXT = 3

Definition at line 452 of file PH5P.php.

Referenced by HTML5TreeConstructer\inBody().

◆ RCDATA

const HTML5::RCDATA = 1

◆ STARTTAG


The documentation for this class was generated from the following files: