ILIAS  release_5-0 Revision 5.0.0-1144-gc4397b1f870
HTML5 Class Reference
+ Collaboration diagram for HTML5:

Public Member Functions

 __construct ($data)
 
 save ()
 

Data Fields

const PCDATA = 0
 
const RCDATA = 1
 
const CDATA = 2
 
const PLAINTEXT = 3
 
const DOCTYPE = 0
 
const STARTTAG = 1
 
const ENDTAG = 2
 
const COMMENT = 3
 
const CHARACTR = 4
 
const EOF = 5
 

Private Member Functions

 char ()
 
 character ($s, $l=0)
 
 characters ($char_class, $start)
 
 dataState ()
 
 entityDataState ()
 
 tagOpenState ()
 
 closeTagOpenState ()
 
 tagNameState ()
 
 beforeAttributeNameState ()
 
 attributeNameState ()
 
 afterAttributeNameState ()
 
 beforeAttributeValueState ()
 
 attributeValueDoubleQuotedState ()
 
 attributeValueSingleQuotedState ()
 
 attributeValueUnquotedState ()
 
 entityInAttributeValueState ()
 
 bogusCommentState ()
 
 markupDeclarationOpenState ()
 
 commentState ()
 
 commentDashState ()
 
 commentEndState ()
 
 doctypeState ()
 
 beforeDoctypeNameState ()
 
 doctypeNameState ()
 
 afterDoctypeNameState ()
 
 bogusDoctypeState ()
 
 entity ()
 
 emitToken ($token)
 
 EOF ()
 

Private Attributes

 $data
 
 $char
 
 $EOF
 
 $state
 
 $tree
 
 $token
 
 $content_model
 
 $escape = false
 
 $entities
 

Detailed Description

Definition at line 71 of file PH5P.php.

Constructor & Destructor Documentation

◆ __construct()

HTML5::__construct (   $data)

Definition at line 462 of file PH5P.php.

463 {
464 $this->data = $data;
465 $this->char = -1;
466 $this->EOF = strlen($data);
467 $this->tree = new HTML5TreeConstructer;
468 $this->content_model = self::PCDATA;
469
470 $this->state = 'data';
471
472 while ($this->state !== null) {
473 $this->{$this->state . 'State'}();
474 }
475 }
const PCDATA
Definition: PH5P.php:450
const EOF
Definition: PH5P.php:460
$data
Definition: PH5P.php:73

References $data, EOF, and PCDATA.

Member Function Documentation

◆ afterAttributeNameState()

HTML5::afterAttributeNameState ( )
private

Definition at line 956 of file PH5P.php.

957 {
958 // Consume the next input character:
959 $this->char++;
960 $char = $this->character($this->char);
961
962 if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
963 /* U+0009 CHARACTER TABULATION
964 U+000A LINE FEED (LF)
965 U+000B LINE TABULATION
966 U+000C FORM FEED (FF)
967 U+0020 SPACE
968 Stay in the after attribute name state. */
969 $this->state = 'afterAttributeName';
970
971 } elseif ($char === '=') {
972 /* U+003D EQUALS SIGN (=)
973 Switch to the before attribute value state. */
974 $this->state = 'beforeAttributeValue';
975
976 } elseif ($char === '>') {
977 /* U+003E GREATER-THAN SIGN (>)
978 Emit the current tag token. Switch to the data state. */
979 $this->emitToken($this->token);
980 $this->state = 'data';
981
982 } elseif ($char === '/' && $this->character($this->char + 1) !== '>') {
983 /* U+002F SOLIDUS (/)
984 Parse error unless this is a permitted slash. Switch to the
985 before attribute name state. */
986 $this->state = 'beforeAttributeName';
987
988 } elseif ($this->char === $this->EOF) {
989 /* EOF
990 Parse error. Emit the current tag token. Reconsume the EOF
991 character in the data state. */
992 $this->emitToken($this->token);
993
994 $this->char--;
995 $this->state = 'data';
996
997 } else {
998 /* Anything else
999 Start a new attribute in the current tag token. Set that attribute's
1000 name to the current input character, and its value to the empty string.
1001 Switch to the attribute name state. */
1002 $this->token['attr'][] = array(
1003 'name' => strtolower($char),
1004 'value' => null
1005 );
1006
1007 $this->state = 'attributeName';
1008 }
1009 }
emitToken($token)
Definition: PH5P.php:1554
character($s, $l=0)
Definition: PH5P.php:489
$char
Definition: PH5P.php:74

References $char, character(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ afterDoctypeNameState()

HTML5::afterDoctypeNameState ( )
private

Definition at line 1419 of file PH5P.php.

1420 {
1421 /* Consume the next input character: */
1422 $this->char++;
1423 $char = $this->char();
1424
1425 if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
1426 // Stay in the DOCTYPE name state.
1427
1428 } elseif ($char === '>') {
1429 $this->emitToken($this->token);
1430 $this->state = 'data';
1431
1432 } elseif ($this->char === $this->EOF) {
1433 $this->emitToken($this->token);
1434 $this->char--;
1435 $this->state = 'data';
1436
1437 } else {
1438 $this->token['error'] = true;
1439 $this->state = 'bogusDoctype';
1440 }
1441 }
char()
Definition: PH5P.php:482

References $char, char(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ attributeNameState()

HTML5::attributeNameState ( )
private

Definition at line 904 of file PH5P.php.

905 {
906 // Consume the next input character:
907 $this->char++;
908 $char = $this->character($this->char);
909
910 if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
911 /* U+0009 CHARACTER TABULATION
912 U+000A LINE FEED (LF)
913 U+000B LINE TABULATION
914 U+000C FORM FEED (FF)
915 U+0020 SPACE
916 Stay in the before attribute name state. */
917 $this->state = 'afterAttributeName';
918
919 } elseif ($char === '=') {
920 /* U+003D EQUALS SIGN (=)
921 Switch to the before attribute value state. */
922 $this->state = 'beforeAttributeValue';
923
924 } elseif ($char === '>') {
925 /* U+003E GREATER-THAN SIGN (>)
926 Emit the current tag token. Switch to the data state. */
927 $this->emitToken($this->token);
928 $this->state = 'data';
929
930 } elseif ($char === '/' && $this->character($this->char + 1) !== '>') {
931 /* U+002F SOLIDUS (/)
932 Parse error unless this is a permitted slash. Switch to the before
933 attribute name state. */
934 $this->state = 'beforeAttributeName';
935
936 } elseif ($this->char === $this->EOF) {
937 /* EOF
938 Parse error. Emit the current tag token. Reconsume the EOF
939 character in the data state. */
940 $this->emitToken($this->token);
941
942 $this->char--;
943 $this->state = 'data';
944
945 } else {
946 /* Anything else
947 Append the current input character to the current attribute's name.
948 Stay in the attribute name state. */
949 $last = count($this->token['attr']) - 1;
950 $this->token['attr'][$last]['name'] .= strtolower($char);
951
952 $this->state = 'attributeName';
953 }
954 }

References $char, character(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ attributeValueDoubleQuotedState()

HTML5::attributeValueDoubleQuotedState ( )
private

Definition at line 1060 of file PH5P.php.

1061 {
1062 // Consume the next input character:
1063 $this->char++;
1064 $char = $this->character($this->char);
1065
1066 if ($char === '"') {
1067 /* U+0022 QUOTATION MARK (")
1068 Switch to the before attribute name state. */
1069 $this->state = 'beforeAttributeName';
1070
1071 } elseif ($char === '&') {
1072 /* U+0026 AMPERSAND (&)
1073 Switch to the entity in attribute value state. */
1074 $this->entityInAttributeValueState('double');
1075
1076 } elseif ($this->char === $this->EOF) {
1077 /* EOF
1078 Parse error. Emit the current tag token. Reconsume the character
1079 in the data state. */
1080 $this->emitToken($this->token);
1081
1082 $this->char--;
1083 $this->state = 'data';
1084
1085 } else {
1086 /* Anything else
1087 Append the current input character to the current attribute's value.
1088 Stay in the attribute value (double-quoted) state. */
1089 $last = count($this->token['attr']) - 1;
1090 $this->token['attr'][$last]['value'] .= $char;
1091
1092 $this->state = 'attributeValueDoubleQuoted';
1093 }
1094 }
entityInAttributeValueState()
Definition: PH5P.php:1169

References $char, character(), emitToken(), entityInAttributeValueState(), and EOF.

+ Here is the call graph for this function:

◆ attributeValueSingleQuotedState()

HTML5::attributeValueSingleQuotedState ( )
private

Definition at line 1096 of file PH5P.php.

1097 {
1098 // Consume the next input character:
1099 $this->char++;
1100 $char = $this->character($this->char);
1101
1102 if ($char === '\'') {
1103 /* U+0022 QUOTATION MARK (')
1104 Switch to the before attribute name state. */
1105 $this->state = 'beforeAttributeName';
1106
1107 } elseif ($char === '&') {
1108 /* U+0026 AMPERSAND (&)
1109 Switch to the entity in attribute value state. */
1110 $this->entityInAttributeValueState('single');
1111
1112 } elseif ($this->char === $this->EOF) {
1113 /* EOF
1114 Parse error. Emit the current tag token. Reconsume the character
1115 in the data state. */
1116 $this->emitToken($this->token);
1117
1118 $this->char--;
1119 $this->state = 'data';
1120
1121 } else {
1122 /* Anything else
1123 Append the current input character to the current attribute's value.
1124 Stay in the attribute value (single-quoted) state. */
1125 $last = count($this->token['attr']) - 1;
1126 $this->token['attr'][$last]['value'] .= $char;
1127
1128 $this->state = 'attributeValueSingleQuoted';
1129 }
1130 }

References $char, character(), emitToken(), entityInAttributeValueState(), and EOF.

+ Here is the call graph for this function:

◆ attributeValueUnquotedState()

HTML5::attributeValueUnquotedState ( )
private

Definition at line 1132 of file PH5P.php.

1133 {
1134 // Consume the next input character:
1135 $this->char++;
1136 $char = $this->character($this->char);
1137
1138 if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
1139 /* U+0009 CHARACTER TABULATION
1140 U+000A LINE FEED (LF)
1141 U+000B LINE TABULATION
1142 U+000C FORM FEED (FF)
1143 U+0020 SPACE
1144 Switch to the before attribute name state. */
1145 $this->state = 'beforeAttributeName';
1146
1147 } elseif ($char === '&') {
1148 /* U+0026 AMPERSAND (&)
1149 Switch to the entity in attribute value state. */
1151
1152 } elseif ($char === '>') {
1153 /* U+003E GREATER-THAN SIGN (>)
1154 Emit the current tag token. Switch to the data state. */
1155 $this->emitToken($this->token);
1156 $this->state = 'data';
1157
1158 } else {
1159 /* Anything else
1160 Append the current input character to the current attribute's value.
1161 Stay in the attribute value (unquoted) state. */
1162 $last = count($this->token['attr']) - 1;
1163 $this->token['attr'][$last]['value'] .= $char;
1164
1165 $this->state = 'attributeValueUnquoted';
1166 }
1167 }

References $char, character(), emitToken(), and entityInAttributeValueState().

+ Here is the call graph for this function:

◆ beforeAttributeNameState()

HTML5::beforeAttributeNameState ( )
private

Definition at line 854 of file PH5P.php.

855 {
856 // Consume the next input character:
857 $this->char++;
858 $char = $this->character($this->char);
859
860 if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
861 /* U+0009 CHARACTER TABULATION
862 U+000A LINE FEED (LF)
863 U+000B LINE TABULATION
864 U+000C FORM FEED (FF)
865 U+0020 SPACE
866 Stay in the before attribute name state. */
867 $this->state = 'beforeAttributeName';
868
869 } elseif ($char === '>') {
870 /* U+003E GREATER-THAN SIGN (>)
871 Emit the current tag token. Switch to the data state. */
872 $this->emitToken($this->token);
873 $this->state = 'data';
874
875 } elseif ($char === '/') {
876 /* U+002F SOLIDUS (/)
877 Parse error unless this is a permitted slash. Stay in the before
878 attribute name state. */
879 $this->state = 'beforeAttributeName';
880
881 } elseif ($this->char === $this->EOF) {
882 /* EOF
883 Parse error. Emit the current tag token. Reconsume the EOF
884 character in the data state. */
885 $this->emitToken($this->token);
886
887 $this->char--;
888 $this->state = 'data';
889
890 } else {
891 /* Anything else
892 Start a new attribute in the current tag token. Set that attribute's
893 name to the current input character, and its value to the empty string.
894 Switch to the attribute name state. */
895 $this->token['attr'][] = array(
896 'name' => strtolower($char),
897 'value' => null
898 );
899
900 $this->state = 'attributeName';
901 }
902 }

References $char, character(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ beforeAttributeValueState()

HTML5::beforeAttributeValueState ( )
private

Definition at line 1011 of file PH5P.php.

1012 {
1013 // Consume the next input character:
1014 $this->char++;
1015 $char = $this->character($this->char);
1016
1017 if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
1018 /* U+0009 CHARACTER TABULATION
1019 U+000A LINE FEED (LF)
1020 U+000B LINE TABULATION
1021 U+000C FORM FEED (FF)
1022 U+0020 SPACE
1023 Stay in the before attribute value state. */
1024 $this->state = 'beforeAttributeValue';
1025
1026 } elseif ($char === '"') {
1027 /* U+0022 QUOTATION MARK (")
1028 Switch to the attribute value (double-quoted) state. */
1029 $this->state = 'attributeValueDoubleQuoted';
1030
1031 } elseif ($char === '&') {
1032 /* U+0026 AMPERSAND (&)
1033 Switch to the attribute value (unquoted) state and reconsume
1034 this input character. */
1035 $this->char--;
1036 $this->state = 'attributeValueUnquoted';
1037
1038 } elseif ($char === '\'') {
1039 /* U+0027 APOSTROPHE (')
1040 Switch to the attribute value (single-quoted) state. */
1041 $this->state = 'attributeValueSingleQuoted';
1042
1043 } elseif ($char === '>') {
1044 /* U+003E GREATER-THAN SIGN (>)
1045 Emit the current tag token. Switch to the data state. */
1046 $this->emitToken($this->token);
1047 $this->state = 'data';
1048
1049 } else {
1050 /* Anything else
1051 Append the current input character to the current attribute's value.
1052 Switch to the attribute value (unquoted) state. */
1053 $last = count($this->token['attr']) - 1;
1054 $this->token['attr'][$last]['value'] .= $char;
1055
1056 $this->state = 'attributeValueUnquoted';
1057 }
1058 }

References $char, character(), and emitToken().

+ Here is the call graph for this function:

◆ beforeDoctypeNameState()

HTML5::beforeDoctypeNameState ( )
private

Definition at line 1337 of file PH5P.php.

1338 {
1339 /* Consume the next input character: */
1340 $this->char++;
1341 $char = $this->char();
1342
1343 if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
1344 // Stay in the before DOCTYPE name state.
1345
1346 } elseif (preg_match('/^[a-z]$/', $char)) {
1347 $this->token = array(
1348 'name' => strtoupper($char),
1349 'type' => self::DOCTYPE,
1350 'error' => true
1351 );
1352
1353 $this->state = 'doctypeName';
1354
1355 } elseif ($char === '>') {
1356 $this->emitToken(
1357 array(
1358 'name' => null,
1359 'type' => self::DOCTYPE,
1360 'error' => true
1361 )
1362 );
1363
1364 $this->state = 'data';
1365
1366 } elseif ($this->char === $this->EOF) {
1367 $this->emitToken(
1368 array(
1369 'name' => null,
1370 'type' => self::DOCTYPE,
1371 'error' => true
1372 )
1373 );
1374
1375 $this->char--;
1376 $this->state = 'data';
1377
1378 } else {
1379 $this->token = array(
1380 'name' => $char,
1381 'type' => self::DOCTYPE,
1382 'error' => true
1383 );
1384
1385 $this->state = 'doctypeName';
1386 }
1387 }

References $char, char(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ bogusCommentState()

HTML5::bogusCommentState ( )
private

Definition at line 1185 of file PH5P.php.

1186 {
1187 /* Consume every character up to the first U+003E GREATER-THAN SIGN
1188 character (>) or the end of the file (EOF), whichever comes first. Emit
1189 a comment token whose data is the concatenation of all the characters
1190 starting from and including the character that caused the state machine
1191 to switch into the bogus comment state, up to and including the last
1192 consumed character before the U+003E character, if any, or up to the
1193 end of the file otherwise. (If the comment was started by the end of
1194 the file (EOF), the token is empty.) */
1195 $data = $this->characters('^>', $this->char);
1196 $this->emitToken(
1197 array(
1198 'data' => $data,
1199 'type' => self::COMMENT
1200 )
1201 );
1202
1203 $this->char += strlen($data);
1204
1205 /* Switch to the data state. */
1206 $this->state = 'data';
1207
1208 /* If the end of the file was reached, reconsume the EOF character. */
1209 if ($this->char === $this->EOF) {
1210 $this->char = $this->EOF - 1;
1211 }
1212 }
characters($char_class, $start)
Definition: PH5P.php:500

References $data, characters(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ bogusDoctypeState()

HTML5::bogusDoctypeState ( )
private

Definition at line 1443 of file PH5P.php.

1444 {
1445 /* Consume the next input character: */
1446 $this->char++;
1447 $char = $this->char();
1448
1449 if ($char === '>') {
1450 $this->emitToken($this->token);
1451 $this->state = 'data';
1452
1453 } elseif ($this->char === $this->EOF) {
1454 $this->emitToken($this->token);
1455 $this->char--;
1456 $this->state = 'data';
1457
1458 } else {
1459 // Stay in the bogus DOCTYPE state.
1460 }
1461 }

References $char, char(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ char()

HTML5::char ( )
private

Definition at line 482 of file PH5P.php.

483 {
484 return ($this->char < $this->EOF)
485 ? $this->data[$this->char]
486 : false;
487 }

References $char, and EOF.

Referenced by afterDoctypeNameState(), beforeDoctypeNameState(), bogusDoctypeState(), closeTagOpenState(), commentDashState(), commentEndState(), commentState(), dataState(), doctypeNameState(), doctypeState(), and tagOpenState().

+ Here is the caller graph for this function:

◆ character()

HTML5::character (   $s,
  $l = 0 
)
private

Definition at line 489 of file PH5P.php.

490 {
491 if ($s + $l < $this->EOF) {
492 if ($l === 0) {
493 return $this->data[$s];
494 } else {
495 return substr($this->data, $s, $l);
496 }
497 }
498 }

References EOF.

Referenced by afterAttributeNameState(), attributeNameState(), attributeValueDoubleQuotedState(), attributeValueSingleQuotedState(), attributeValueUnquotedState(), beforeAttributeNameState(), beforeAttributeValueState(), closeTagOpenState(), dataState(), entity(), markupDeclarationOpenState(), tagNameState(), and tagOpenState().

+ Here is the caller graph for this function:

◆ characters()

HTML5::characters (   $char_class,
  $start 
)
private

Definition at line 500 of file PH5P.php.

501 {
502 return preg_replace('#^([' . $char_class . ']+).*#s', '\\1', substr($this->data, $start));
503 }

Referenced by bogusCommentState(), closeTagOpenState(), and entity().

+ Here is the caller graph for this function:

◆ closeTagOpenState()

HTML5::closeTagOpenState ( )
private

Definition at line 728 of file PH5P.php.

729 {
730 $next_node = strtolower($this->characters('A-Za-z', $this->char + 1));
731 $the_same = count($this->tree->stack) > 0 && $next_node === end($this->tree->stack)->nodeName;
732
733 if (($this->content_model === self::RCDATA || $this->content_model === self::CDATA) &&
734 (!$the_same || ($the_same && (!preg_match(
735 '/[\t\n\x0b\x0c >\/]/',
736 $this->character($this->char + 1 + strlen($next_node))
737 ) || $this->EOF === $this->char)))
738 ) {
739 /* If the content model flag is set to the RCDATA or CDATA states then
740 examine the next few characters. If they do not match the tag name of
741 the last start tag token emitted (case insensitively), or if they do but
742 they are not immediately followed by one of the following characters:
743 * U+0009 CHARACTER TABULATION
744 * U+000A LINE FEED (LF)
745 * U+000B LINE TABULATION
746 * U+000C FORM FEED (FF)
747 * U+0020 SPACE
748 * U+003E GREATER-THAN SIGN (>)
749 * U+002F SOLIDUS (/)
750 * EOF
751 ...then there is a parse error. Emit a U+003C LESS-THAN SIGN character
752 token, a U+002F SOLIDUS character token, and switch to the data state
753 to process the next input character. */
754 $this->emitToken(
755 array(
756 'type' => self::CHARACTR,
757 'data' => '</'
758 )
759 );
760
761 $this->state = 'data';
762
763 } else {
764 /* Otherwise, if the content model flag is set to the PCDATA state,
765 or if the next few characters do match that tag name, consume the
766 next input character: */
767 $this->char++;
768 $char = $this->char();
769
770 if (preg_match('/^[A-Za-z]$/', $char)) {
771 /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z
772 Create a new end tag token, set its tag name to the lowercase version
773 of the input character (add 0x0020 to the character's code point), then
774 switch to the tag name state. (Don't emit the token yet; further details
775 will be filled in before it is emitted.) */
776 $this->token = array(
777 'name' => strtolower($char),
778 'type' => self::ENDTAG
779 );
780
781 $this->state = 'tagName';
782
783 } elseif ($char === '>') {
784 /* U+003E GREATER-THAN SIGN (>)
785 Parse error. Switch to the data state. */
786 $this->state = 'data';
787
788 } elseif ($this->char === $this->EOF) {
789 /* EOF
790 Parse error. Emit a U+003C LESS-THAN SIGN character token and a U+002F
791 SOLIDUS character token. Reconsume the EOF character in the data state. */
792 $this->emitToken(
793 array(
794 'type' => self::CHARACTR,
795 'data' => '</'
796 )
797 );
798
799 $this->char--;
800 $this->state = 'data';
801
802 } else {
803 /* Parse error. Switch to the bogus comment state. */
804 $this->state = 'bogusComment';
805 }
806 }
807 }

References $char, char(), character(), characters(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ commentDashState()

HTML5::commentDashState ( )
private

Definition at line 1270 of file PH5P.php.

1271 {
1272 /* Consume the next input character: */
1273 $this->char++;
1274 $char = $this->char();
1275
1276 /* U+002D HYPHEN-MINUS (-) */
1277 if ($char === '-') {
1278 /* Switch to the comment end state */
1279 $this->state = 'commentEnd';
1280
1281 /* EOF */
1282 } elseif ($this->char === $this->EOF) {
1283 /* Parse error. Emit the comment token. Reconsume the EOF character
1284 in the data state. */
1285 $this->emitToken($this->token);
1286 $this->char--;
1287 $this->state = 'data';
1288
1289 /* Anything else */
1290 } else {
1291 /* Append a U+002D HYPHEN-MINUS (-) character and the input
1292 character to the comment token's data. Switch to the comment state. */
1293 $this->token['data'] .= '-' . $char;
1294 $this->state = 'comment';
1295 }
1296 }

References $char, char(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ commentEndState()

HTML5::commentEndState ( )
private

Definition at line 1298 of file PH5P.php.

1299 {
1300 /* Consume the next input character: */
1301 $this->char++;
1302 $char = $this->char();
1303
1304 if ($char === '>') {
1305 $this->emitToken($this->token);
1306 $this->state = 'data';
1307
1308 } elseif ($char === '-') {
1309 $this->token['data'] .= '-';
1310
1311 } elseif ($this->char === $this->EOF) {
1312 $this->emitToken($this->token);
1313 $this->char--;
1314 $this->state = 'data';
1315
1316 } else {
1317 $this->token['data'] .= '--' . $char;
1318 $this->state = 'comment';
1319 }
1320 }

References $char, char(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ commentState()

HTML5::commentState ( )
private

Definition at line 1243 of file PH5P.php.

1244 {
1245 /* Consume the next input character: */
1246 $this->char++;
1247 $char = $this->char();
1248
1249 /* U+002D HYPHEN-MINUS (-) */
1250 if ($char === '-') {
1251 /* Switch to the comment dash state */
1252 $this->state = 'commentDash';
1253
1254 /* EOF */
1255 } elseif ($this->char === $this->EOF) {
1256 /* Parse error. Emit the comment token. Reconsume the EOF character
1257 in the data state. */
1258 $this->emitToken($this->token);
1259 $this->char--;
1260 $this->state = 'data';
1261
1262 /* Anything else */
1263 } else {
1264 /* Append the input character to the comment token's data. Stay in
1265 the comment state. */
1266 $this->token['data'] .= $char;
1267 }
1268 }

References $char, char(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ dataState()

HTML5::dataState ( )
private

Definition at line 505 of file PH5P.php.

506 {
507 // Consume the next input character
508 $this->char++;
509 $char = $this->char();
510
511 if ($char === '&' && ($this->content_model === self::PCDATA || $this->content_model === self::RCDATA)) {
512 /* U+0026 AMPERSAND (&)
513 When the content model flag is set to one of the PCDATA or RCDATA
514 states: switch to the entity data state. Otherwise: treat it as per
515 the "anything else" entry below. */
516 $this->state = 'entityData';
517
518 } elseif ($char === '-') {
519 /* If the content model flag is set to either the RCDATA state or
520 the CDATA state, and the escape flag is false, and there are at
521 least three characters before this one in the input stream, and the
522 last four characters in the input stream, including this one, are
523 U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+002D HYPHEN-MINUS,
524 and U+002D HYPHEN-MINUS ("<!--"), then set the escape flag to true. */
525 if (($this->content_model === self::RCDATA || $this->content_model ===
526 self::CDATA) && $this->escape === false &&
527 $this->char >= 3 && $this->character($this->char - 4, 4) === '<!--'
528 ) {
529 $this->escape = true;
530 }
531
532 /* In any case, emit the input character as a character token. Stay
533 in the data state. */
534 $this->emitToken(
535 array(
536 'type' => self::CHARACTR,
537 'data' => $char
538 )
539 );
540
541 /* U+003C LESS-THAN SIGN (<) */
542 } elseif ($char === '<' && ($this->content_model === self::PCDATA ||
543 (($this->content_model === self::RCDATA ||
544 $this->content_model === self::CDATA) && $this->escape === false))
545 ) {
546 /* When the content model flag is set to the PCDATA state: switch
547 to the tag open state.
548
549 When the content model flag is set to either the RCDATA state or
550 the CDATA state and the escape flag is false: switch to the tag
551 open state.
552
553 Otherwise: treat it as per the "anything else" entry below. */
554 $this->state = 'tagOpen';
555
556 /* U+003E GREATER-THAN SIGN (>) */
557 } elseif ($char === '>') {
558 /* If the content model flag is set to either the RCDATA state or
559 the CDATA state, and the escape flag is true, and the last three
560 characters in the input stream including this one are U+002D
561 HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN ("-->"),
562 set the escape flag to false. */
563 if (($this->content_model === self::RCDATA ||
564 $this->content_model === self::CDATA) && $this->escape === true &&
565 $this->character($this->char, 3) === '-->'
566 ) {
567 $this->escape = false;
568 }
569
570 /* In any case, emit the input character as a character token.
571 Stay in the data state. */
572 $this->emitToken(
573 array(
574 'type' => self::CHARACTR,
575 'data' => $char
576 )
577 );
578
579 } elseif ($this->char === $this->EOF) {
580 /* EOF
581 Emit an end-of-file token. */
582 $this->EOF();
583
584 } elseif ($this->content_model === self::PLAINTEXT) {
585 /* When the content model flag is set to the PLAINTEXT state
586 THIS DIFFERS GREATLY FROM THE SPEC: Get the remaining characters of
587 the text and emit it as a character token. */
588 $this->emitToken(
589 array(
590 'type' => self::CHARACTR,
591 'data' => substr($this->data, $this->char)
592 )
593 );
594
595 $this->EOF();
596
597 } else {
598 /* Anything else
599 THIS DIFFERS GREATLY FROM THE SPEC: Get as many character that
600 otherwise would also be treated as a character token and emit it
601 as a single character token. Stay in the data state. */
602 $len = strcspn($this->data, '<&', $this->char);
603 $char = substr($this->data, $this->char, $len);
604 $this->char += $len - 1;
605
606 $this->emitToken(
607 array(
608 'type' => self::CHARACTR,
609 'data' => $char
610 )
611 );
612
613 $this->state = 'data';
614 }
615 }

References $char, char(), character(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ doctypeNameState()

HTML5::doctypeNameState ( )
private

Definition at line 1389 of file PH5P.php.

1390 {
1391 /* Consume the next input character: */
1392 $this->char++;
1393 $char = $this->char();
1394
1395 if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
1396 $this->state = 'AfterDoctypeName';
1397
1398 } elseif ($char === '>') {
1399 $this->emitToken($this->token);
1400 $this->state = 'data';
1401
1402 } elseif (preg_match('/^[a-z]$/', $char)) {
1403 $this->token['name'] .= strtoupper($char);
1404
1405 } elseif ($this->char === $this->EOF) {
1406 $this->emitToken($this->token);
1407 $this->char--;
1408 $this->state = 'data';
1409
1410 } else {
1411 $this->token['name'] .= $char;
1412 }
1413
1414 $this->token['error'] = ($this->token['name'] === 'HTML')
1415 ? false
1416 : true;
1417 }

References $char, char(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ doctypeState()

HTML5::doctypeState ( )
private

Definition at line 1322 of file PH5P.php.

1323 {
1324 /* Consume the next input character: */
1325 $this->char++;
1326 $char = $this->char();
1327
1328 if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
1329 $this->state = 'beforeDoctypeName';
1330
1331 } else {
1332 $this->char--;
1333 $this->state = 'beforeDoctypeName';
1334 }
1335 }

References $char, and char().

+ Here is the call graph for this function:

◆ emitToken()

HTML5::emitToken (   $token)
private

Definition at line 1554 of file PH5P.php.

1555 {
1556 $emit = $this->tree->emitToken($token);
1557
1558 if (is_int($emit)) {
1559 $this->content_model = $emit;
1560
1561 } elseif ($token['type'] === self::ENDTAG) {
1562 $this->content_model = self::PCDATA;
1563 }
1564 }
$token
Definition: PH5P.php:78

References $token, and PCDATA.

Referenced by afterAttributeNameState(), afterDoctypeNameState(), attributeNameState(), attributeValueDoubleQuotedState(), attributeValueSingleQuotedState(), attributeValueUnquotedState(), beforeAttributeNameState(), beforeAttributeValueState(), beforeDoctypeNameState(), bogusCommentState(), bogusDoctypeState(), closeTagOpenState(), commentDashState(), commentEndState(), commentState(), dataState(), doctypeNameState(), entityDataState(), tagNameState(), and tagOpenState().

+ Here is the caller graph for this function:

◆ entity()

HTML5::entity ( )
private

Definition at line 1463 of file PH5P.php.

1464 {
1465 $start = $this->char;
1466
1467 // This section defines how to consume an entity. This definition is
1468 // used when parsing entities in text and in attributes.
1469
1470 // The behaviour depends on the identity of the next character (the
1471 // one immediately after the U+0026 AMPERSAND character):
1472
1473 switch ($this->character($this->char + 1)) {
1474 // U+0023 NUMBER SIGN (#)
1475 case '#':
1476
1477 // The behaviour further depends on the character after the
1478 // U+0023 NUMBER SIGN:
1479 switch ($this->character($this->char + 1)) {
1480 // U+0078 LATIN SMALL LETTER X
1481 // U+0058 LATIN CAPITAL LETTER X
1482 case 'x':
1483 case 'X':
1484 // Follow the steps below, but using the range of
1485 // characters U+0030 DIGIT ZERO through to U+0039 DIGIT
1486 // NINE, U+0061 LATIN SMALL LETTER A through to U+0066
1487 // LATIN SMALL LETTER F, and U+0041 LATIN CAPITAL LETTER
1488 // A, through to U+0046 LATIN CAPITAL LETTER F (in other
1489 // words, 0-9, A-F, a-f).
1490 $char = 1;
1491 $char_class = '0-9A-Fa-f';
1492 break;
1493
1494 // Anything else
1495 default:
1496 // Follow the steps below, but using the range of
1497 // characters U+0030 DIGIT ZERO through to U+0039 DIGIT
1498 // NINE (i.e. just 0-9).
1499 $char = 0;
1500 $char_class = '0-9';
1501 break;
1502 }
1503
1504 // Consume as many characters as match the range of characters
1505 // given above.
1506 $this->char++;
1507 $e_name = $this->characters($char_class, $this->char + $char + 1);
1508 $entity = $this->character($start, $this->char);
1509 $cond = strlen($e_name) > 0;
1510
1511 // The rest of the parsing happens bellow.
1512 break;
1513
1514 // Anything else
1515 default:
1516 // Consume the maximum number of characters possible, with the
1517 // consumed characters case-sensitively matching one of the
1518 // identifiers in the first column of the entities table.
1519 $e_name = $this->characters('0-9A-Za-z;', $this->char + 1);
1520 $len = strlen($e_name);
1521
1522 for ($c = 1; $c <= $len; $c++) {
1523 $id = substr($e_name, 0, $c);
1524 $this->char++;
1525
1526 if (in_array($id, $this->entities)) {
1527 if ($e_name[$c - 1] !== ';') {
1528 if ($c < $len && $e_name[$c] == ';') {
1529 $this->char++; // consume extra semicolon
1530 }
1531 }
1532 $entity = $id;
1533 break;
1534 }
1535 }
1536
1537 $cond = isset($entity);
1538 // The rest of the parsing happens bellow.
1539 break;
1540 }
1541
1542 if (!$cond) {
1543 // If no match can be made, then this is a parse error. No
1544 // characters are consumed, and nothing is returned.
1545 $this->char = $start;
1546 return false;
1547 }
1548
1549 // Return a character token for the character corresponding to the
1550 // entity name (as given by the second column of the entities table).
1551 return html_entity_decode('&' . $entity . ';', ENT_QUOTES, 'UTF-8');
1552 }

References $char, character(), and characters().

Referenced by entityDataState(), and entityInAttributeValueState().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ entityDataState()

HTML5::entityDataState ( )
private

Definition at line 617 of file PH5P.php.

618 {
619 // Attempt to consume an entity.
620 $entity = $this->entity();
621
622 // If nothing is returned, emit a U+0026 AMPERSAND character token.
623 // Otherwise, emit the character token that was returned.
624 $char = (!$entity) ? '&' : $entity;
625 $this->emitToken(
626 array(
627 'type' => self::CHARACTR,
628 'data' => $char
629 )
630 );
631
632 // Finally, switch to the data state.
633 $this->state = 'data';
634 }
entity()
Definition: PH5P.php:1463

References $char, emitToken(), and entity().

+ Here is the call graph for this function:

◆ entityInAttributeValueState()

HTML5::entityInAttributeValueState ( )
private

Definition at line 1169 of file PH5P.php.

1170 {
1171 // Attempt to consume an entity.
1172 $entity = $this->entity();
1173
1174 // If nothing is returned, append a U+0026 AMPERSAND character to the
1175 // current attribute's value. Otherwise, emit the character token that
1176 // was returned.
1177 $char = (!$entity)
1178 ? '&'
1179 : $entity;
1180
1181 $last = count($this->token['attr']) - 1;
1182 $this->token['attr'][$last]['value'] .= $char;
1183 }

References $char, and entity().

Referenced by attributeValueDoubleQuotedState(), attributeValueSingleQuotedState(), and attributeValueUnquotedState().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ EOF()

HTML5::EOF ( )
private

Definition at line 1566 of file PH5P.php.

1567 {
1568 $this->state = null;
1569 $this->tree->emitToken(
1570 array(
1571 'type' => self::EOF
1572 )
1573 );
1574 }
const EOF
How fgetc() reports an End Of File.
Definition: JSMin_lib.php:92

References EOF.

◆ markupDeclarationOpenState()

HTML5::markupDeclarationOpenState ( )
private

Definition at line 1214 of file PH5P.php.

1215 {
1216 /* If the next two characters are both U+002D HYPHEN-MINUS (-)
1217 characters, consume those two characters, create a comment token whose
1218 data is the empty string, and switch to the comment state. */
1219 if ($this->character($this->char + 1, 2) === '--') {
1220 $this->char += 2;
1221 $this->state = 'comment';
1222 $this->token = array(
1223 'data' => null,
1224 'type' => self::COMMENT
1225 );
1226
1227 /* Otherwise if the next seven chacacters are a case-insensitive match
1228 for the word "DOCTYPE", then consume those characters and switch to the
1229 DOCTYPE state. */
1230 } elseif (strtolower($this->character($this->char + 1, 7)) === 'doctype') {
1231 $this->char += 7;
1232 $this->state = 'doctype';
1233
1234 /* Otherwise, is is a parse error. Switch to the bogus comment state.
1235 The next character that is consumed, if any, is the first character
1236 that will be in the comment. */
1237 } else {
1238 $this->char++;
1239 $this->state = 'bogusComment';
1240 }
1241 }

References character().

+ Here is the call graph for this function:

◆ save()

HTML5::save ( )

Definition at line 477 of file PH5P.php.

478 {
479 return $this->tree->save();
480 }

◆ tagNameState()

HTML5::tagNameState ( )
private

Definition at line 809 of file PH5P.php.

810 {
811 // Consume the next input character:
812 $this->char++;
813 $char = $this->character($this->char);
814
815 if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
816 /* U+0009 CHARACTER TABULATION
817 U+000A LINE FEED (LF)
818 U+000B LINE TABULATION
819 U+000C FORM FEED (FF)
820 U+0020 SPACE
821 Switch to the before attribute name state. */
822 $this->state = 'beforeAttributeName';
823
824 } elseif ($char === '>') {
825 /* U+003E GREATER-THAN SIGN (>)
826 Emit the current tag token. Switch to the data state. */
827 $this->emitToken($this->token);
828 $this->state = 'data';
829
830 } elseif ($this->char === $this->EOF) {
831 /* EOF
832 Parse error. Emit the current tag token. Reconsume the EOF
833 character in the data state. */
834 $this->emitToken($this->token);
835
836 $this->char--;
837 $this->state = 'data';
838
839 } elseif ($char === '/') {
840 /* U+002F SOLIDUS (/)
841 Parse error unless this is a permitted slash. Switch to the before
842 attribute name state. */
843 $this->state = 'beforeAttributeName';
844
845 } else {
846 /* Anything else
847 Append the current input character to the current tag token's tag name.
848 Stay in the tag name state. */
849 $this->token['name'] .= strtolower($char);
850 $this->state = 'tagName';
851 }
852 }

References $char, character(), emitToken(), and EOF.

+ Here is the call graph for this function:

◆ tagOpenState()

HTML5::tagOpenState ( )
private

Definition at line 636 of file PH5P.php.

637 {
638 switch ($this->content_model) {
639 case self::RCDATA:
640 case self::CDATA:
641 /* If the next input character is a U+002F SOLIDUS (/) character,
642 consume it and switch to the close tag open state. If the next
643 input character is not a U+002F SOLIDUS (/) character, emit a
644 U+003C LESS-THAN SIGN character token and switch to the data
645 state to process the next input character. */
646 if ($this->character($this->char + 1) === '/') {
647 $this->char++;
648 $this->state = 'closeTagOpen';
649
650 } else {
651 $this->emitToken(
652 array(
653 'type' => self::CHARACTR,
654 'data' => '<'
655 )
656 );
657
658 $this->state = 'data';
659 }
660 break;
661
662 case self::PCDATA:
663 // If the content model flag is set to the PCDATA state
664 // Consume the next input character:
665 $this->char++;
666 $char = $this->char();
667
668 if ($char === '!') {
669 /* U+0021 EXCLAMATION MARK (!)
670 Switch to the markup declaration open state. */
671 $this->state = 'markupDeclarationOpen';
672
673 } elseif ($char === '/') {
674 /* U+002F SOLIDUS (/)
675 Switch to the close tag open state. */
676 $this->state = 'closeTagOpen';
677
678 } elseif (preg_match('/^[A-Za-z]$/', $char)) {
679 /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z
680 Create a new start tag token, set its tag name to the lowercase
681 version of the input character (add 0x0020 to the character's code
682 point), then switch to the tag name state. (Don't emit the token
683 yet; further details will be filled in before it is emitted.) */
684 $this->token = array(
685 'name' => strtolower($char),
686 'type' => self::STARTTAG,
687 'attr' => array()
688 );
689
690 $this->state = 'tagName';
691
692 } elseif ($char === '>') {
693 /* U+003E GREATER-THAN SIGN (>)
694 Parse error. Emit a U+003C LESS-THAN SIGN character token and a
695 U+003E GREATER-THAN SIGN character token. Switch to the data state. */
696 $this->emitToken(
697 array(
698 'type' => self::CHARACTR,
699 'data' => '<>'
700 )
701 );
702
703 $this->state = 'data';
704
705 } elseif ($char === '?') {
706 /* U+003F QUESTION MARK (?)
707 Parse error. Switch to the bogus comment state. */
708 $this->state = 'bogusComment';
709
710 } else {
711 /* Anything else
712 Parse error. Emit a U+003C LESS-THAN SIGN character token and
713 reconsume the current input character in the data state. */
714 $this->emitToken(
715 array(
716 'type' => self::CHARACTR,
717 'data' => '<'
718 )
719 );
720
721 $this->char--;
722 $this->state = 'data';
723 }
724 break;
725 }
726 }
const CDATA
Definition: PH5P.php:452
const RCDATA
Definition: PH5P.php:451

References $char, CDATA, char(), character(), emitToken(), PCDATA, and RCDATA.

+ Here is the call graph for this function:

Field Documentation

◆ $char

◆ $content_model

HTML5::$content_model
private

Definition at line 79 of file PH5P.php.

◆ $data

HTML5::$data
private

Definition at line 73 of file PH5P.php.

Referenced by __construct(), and bogusCommentState().

◆ $entities

HTML5::$entities
private

Definition at line 81 of file PH5P.php.

◆ $EOF

HTML5::$EOF
private

Definition at line 75 of file PH5P.php.

◆ $escape

HTML5::$escape = false
private

Definition at line 80 of file PH5P.php.

◆ $state

HTML5::$state
private

Definition at line 76 of file PH5P.php.

◆ $token

HTML5::$token
private

Definition at line 78 of file PH5P.php.

Referenced by emitToken().

◆ $tree

HTML5::$tree
private

Definition at line 77 of file PH5P.php.

◆ CDATA

const HTML5::CDATA = 2

◆ CHARACTR

◆ COMMENT

◆ DOCTYPE

◆ ENDTAG

◆ EOF

◆ PCDATA

const HTML5::PCDATA = 0

Definition at line 450 of file PH5P.php.

Referenced by __construct(), emitToken(), HTML5TreeConstructer\inHead(), and tagOpenState().

◆ PLAINTEXT

const HTML5::PLAINTEXT = 3

Definition at line 453 of file PH5P.php.

Referenced by HTML5TreeConstructer\inBody().

◆ RCDATA

const HTML5::RCDATA = 1

◆ STARTTAG


The documentation for this class was generated from the following file: