ILIAS  release_5-4 Revision v5.4.26-12-gabc799a52e6
HTML5 Class Reference
+ Collaboration diagram for HTML5:

Public Member Functions

 __construct ($data)
 
 save ()
 

Data Fields

const PCDATA = 0
 
const RCDATA = 1
 
const CDATA = 2
 
const PLAINTEXT = 3
 
const DOCTYPE = 0
 
const STARTTAG = 1
 
const ENDTAG = 2
 
const COMMENT = 3
 
const CHARACTR = 4
 
const EOF = 5
 

Private Member Functions

 char ()
 
 character ($s, $l=0)
 
 characters ($char_class, $start)
 
 dataState ()
 
 entityDataState ()
 
 tagOpenState ()
 
 closeTagOpenState ()
 
 tagNameState ()
 
 beforeAttributeNameState ()
 
 attributeNameState ()
 
 afterAttributeNameState ()
 
 beforeAttributeValueState ()
 
 attributeValueDoubleQuotedState ()
 
 attributeValueSingleQuotedState ()
 
 attributeValueUnquotedState ()
 
 entityInAttributeValueState ()
 
 bogusCommentState ()
 
 markupDeclarationOpenState ()
 
 commentState ()
 
 commentDashState ()
 
 commentEndState ()
 
 doctypeState ()
 
 beforeDoctypeNameState ()
 
 doctypeNameState ()
 
 afterDoctypeNameState ()
 
 bogusDoctypeState ()
 
 entity ()
 
 emitToken ($token)
 
 EOF ()
 

Private Attributes

 $data
 
 $char
 
 $EOF
 
 $state
 
 $tree
 
 $token
 
 $content_model
 
 $escape = false
 
 $entities
 

Detailed Description

Definition at line 70 of file PH5P.php.

Constructor & Destructor Documentation

◆ __construct()

HTML5::__construct (   $data)

Definition at line 461 of file PH5P.php.

References $data, data, and EOF.

462  {
463  $this->data = $data;
464  $this->char = -1;
465  $this->EOF = strlen($data);
466  $this->tree = new HTML5TreeConstructer;
467  $this->content_model = self::PCDATA;
468 
469  $this->state = 'data';
470 
471  while ($this->state !== null) {
472  $this->{$this->state . 'State'}();
473  }
474  }
$data
Definition: PH5P.php:72
EOF()
Definition: PH5P.php:1566
$this data['403_header']

Member Function Documentation

◆ afterAttributeNameState()

HTML5::afterAttributeNameState ( )
private

Definition at line 955 of file PH5P.php.

References EOF.

956  {
957  // Consume the next input character:
958  $this->char++;
959  $char = $this->character($this->char);
960 
961  if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
962  /* U+0009 CHARACTER TABULATION
963  U+000A LINE FEED (LF)
964  U+000B LINE TABULATION
965  U+000C FORM FEED (FF)
966  U+0020 SPACE
967  Stay in the after attribute name state. */
968  $this->state = 'afterAttributeName';
969 
970  } elseif ($char === '=') {
971  /* U+003D EQUALS SIGN (=)
972  Switch to the before attribute value state. */
973  $this->state = 'beforeAttributeValue';
974 
975  } elseif ($char === '>') {
976  /* U+003E GREATER-THAN SIGN (>)
977  Emit the current tag token. Switch to the data state. */
978  $this->emitToken($this->token);
979  $this->state = 'data';
980 
981  } elseif ($char === '/' && $this->character($this->char + 1) !== '>') {
982  /* U+002F SOLIDUS (/)
983  Parse error unless this is a permitted slash. Switch to the
984  before attribute name state. */
985  $this->state = 'beforeAttributeName';
986 
987  } elseif ($this->char === $this->EOF) {
988  /* EOF
989  Parse error. Emit the current tag token. Reconsume the EOF
990  character in the data state. */
991  $this->emitToken($this->token);
992 
993  $this->char--;
994  $this->state = 'data';
995 
996  } else {
997  /* Anything else
998  Start a new attribute in the current tag token. Set that attribute's
999  name to the current input character, and its value to the empty string.
1000  Switch to the attribute name state. */
1001  $this->token['attr'][] = array(
1002  'name' => strtolower($char),
1003  'value' => null
1004  );
1005 
1006  $this->state = 'attributeName';
1007  }
1008  }
character($s, $l=0)
Definition: PH5P.php:488
EOF()
Definition: PH5P.php:1566
emitToken($token)
Definition: PH5P.php:1554
$char
Definition: PH5P.php:73

◆ afterDoctypeNameState()

HTML5::afterDoctypeNameState ( )
private

Definition at line 1418 of file PH5P.php.

References EOF.

1419  {
1420  /* Consume the next input character: */
1421  $this->char++;
1422  $char = $this->char();
1423 
1424  if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
1425  // Stay in the DOCTYPE name state.
1426 
1427  } elseif ($char === '>') {
1428  $this->emitToken($this->token);
1429  $this->state = 'data';
1430 
1431  } elseif ($this->char === $this->EOF) {
1432  $this->emitToken($this->token);
1433  $this->char--;
1434  $this->state = 'data';
1435 
1436  } else {
1437  $this->token['error'] = true;
1438  $this->state = 'bogusDoctype';
1439  }
1440  }
char()
Definition: PH5P.php:481
EOF()
Definition: PH5P.php:1566
emitToken($token)
Definition: PH5P.php:1554
$char
Definition: PH5P.php:73

◆ attributeNameState()

HTML5::attributeNameState ( )
private

Definition at line 903 of file PH5P.php.

References EOF.

904  {
905  // Consume the next input character:
906  $this->char++;
907  $char = $this->character($this->char);
908 
909  if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
910  /* U+0009 CHARACTER TABULATION
911  U+000A LINE FEED (LF)
912  U+000B LINE TABULATION
913  U+000C FORM FEED (FF)
914  U+0020 SPACE
915  Stay in the before attribute name state. */
916  $this->state = 'afterAttributeName';
917 
918  } elseif ($char === '=') {
919  /* U+003D EQUALS SIGN (=)
920  Switch to the before attribute value state. */
921  $this->state = 'beforeAttributeValue';
922 
923  } elseif ($char === '>') {
924  /* U+003E GREATER-THAN SIGN (>)
925  Emit the current tag token. Switch to the data state. */
926  $this->emitToken($this->token);
927  $this->state = 'data';
928 
929  } elseif ($char === '/' && $this->character($this->char + 1) !== '>') {
930  /* U+002F SOLIDUS (/)
931  Parse error unless this is a permitted slash. Switch to the before
932  attribute name state. */
933  $this->state = 'beforeAttributeName';
934 
935  } elseif ($this->char === $this->EOF) {
936  /* EOF
937  Parse error. Emit the current tag token. Reconsume the EOF
938  character in the data state. */
939  $this->emitToken($this->token);
940 
941  $this->char--;
942  $this->state = 'data';
943 
944  } else {
945  /* Anything else
946  Append the current input character to the current attribute's name.
947  Stay in the attribute name state. */
948  $last = count($this->token['attr']) - 1;
949  $this->token['attr'][$last]['name'] .= strtolower($char);
950 
951  $this->state = 'attributeName';
952  }
953  }
character($s, $l=0)
Definition: PH5P.php:488
EOF()
Definition: PH5P.php:1566
emitToken($token)
Definition: PH5P.php:1554
$char
Definition: PH5P.php:73

◆ attributeValueDoubleQuotedState()

HTML5::attributeValueDoubleQuotedState ( )
private

Definition at line 1059 of file PH5P.php.

References EOF.

1060  {
1061  // Consume the next input character:
1062  $this->char++;
1063  $char = $this->character($this->char);
1064 
1065  if ($char === '"') {
1066  /* U+0022 QUOTATION MARK (")
1067  Switch to the before attribute name state. */
1068  $this->state = 'beforeAttributeName';
1069 
1070  } elseif ($char === '&') {
1071  /* U+0026 AMPERSAND (&)
1072  Switch to the entity in attribute value state. */
1073  $this->entityInAttributeValueState('double');
1074 
1075  } elseif ($this->char === $this->EOF) {
1076  /* EOF
1077  Parse error. Emit the current tag token. Reconsume the character
1078  in the data state. */
1079  $this->emitToken($this->token);
1080 
1081  $this->char--;
1082  $this->state = 'data';
1083 
1084  } else {
1085  /* Anything else
1086  Append the current input character to the current attribute's value.
1087  Stay in the attribute value (double-quoted) state. */
1088  $last = count($this->token['attr']) - 1;
1089  $this->token['attr'][$last]['value'] .= $char;
1090 
1091  $this->state = 'attributeValueDoubleQuoted';
1092  }
1093  }
character($s, $l=0)
Definition: PH5P.php:488
EOF()
Definition: PH5P.php:1566
emitToken($token)
Definition: PH5P.php:1554
entityInAttributeValueState()
Definition: PH5P.php:1168
$char
Definition: PH5P.php:73

◆ attributeValueSingleQuotedState()

HTML5::attributeValueSingleQuotedState ( )
private

Definition at line 1095 of file PH5P.php.

References EOF.

1096  {
1097  // Consume the next input character:
1098  $this->char++;
1099  $char = $this->character($this->char);
1100 
1101  if ($char === '\'') {
1102  /* U+0022 QUOTATION MARK (')
1103  Switch to the before attribute name state. */
1104  $this->state = 'beforeAttributeName';
1105 
1106  } elseif ($char === '&') {
1107  /* U+0026 AMPERSAND (&)
1108  Switch to the entity in attribute value state. */
1109  $this->entityInAttributeValueState('single');
1110 
1111  } elseif ($this->char === $this->EOF) {
1112  /* EOF
1113  Parse error. Emit the current tag token. Reconsume the character
1114  in the data state. */
1115  $this->emitToken($this->token);
1116 
1117  $this->char--;
1118  $this->state = 'data';
1119 
1120  } else {
1121  /* Anything else
1122  Append the current input character to the current attribute's value.
1123  Stay in the attribute value (single-quoted) state. */
1124  $last = count($this->token['attr']) - 1;
1125  $this->token['attr'][$last]['value'] .= $char;
1126 
1127  $this->state = 'attributeValueSingleQuoted';
1128  }
1129  }
character($s, $l=0)
Definition: PH5P.php:488
EOF()
Definition: PH5P.php:1566
emitToken($token)
Definition: PH5P.php:1554
entityInAttributeValueState()
Definition: PH5P.php:1168
$char
Definition: PH5P.php:73

◆ attributeValueUnquotedState()

HTML5::attributeValueUnquotedState ( )
private

Definition at line 1131 of file PH5P.php.

1132  {
1133  // Consume the next input character:
1134  $this->char++;
1135  $char = $this->character($this->char);
1136 
1137  if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
1138  /* U+0009 CHARACTER TABULATION
1139  U+000A LINE FEED (LF)
1140  U+000B LINE TABULATION
1141  U+000C FORM FEED (FF)
1142  U+0020 SPACE
1143  Switch to the before attribute name state. */
1144  $this->state = 'beforeAttributeName';
1145 
1146  } elseif ($char === '&') {
1147  /* U+0026 AMPERSAND (&)
1148  Switch to the entity in attribute value state. */
1149  $this->entityInAttributeValueState();
1150 
1151  } elseif ($char === '>') {
1152  /* U+003E GREATER-THAN SIGN (>)
1153  Emit the current tag token. Switch to the data state. */
1154  $this->emitToken($this->token);
1155  $this->state = 'data';
1156 
1157  } else {
1158  /* Anything else
1159  Append the current input character to the current attribute's value.
1160  Stay in the attribute value (unquoted) state. */
1161  $last = count($this->token['attr']) - 1;
1162  $this->token['attr'][$last]['value'] .= $char;
1163 
1164  $this->state = 'attributeValueUnquoted';
1165  }
1166  }
character($s, $l=0)
Definition: PH5P.php:488
emitToken($token)
Definition: PH5P.php:1554
entityInAttributeValueState()
Definition: PH5P.php:1168
$char
Definition: PH5P.php:73

◆ beforeAttributeNameState()

HTML5::beforeAttributeNameState ( )
private

Definition at line 853 of file PH5P.php.

References EOF.

854  {
855  // Consume the next input character:
856  $this->char++;
857  $char = $this->character($this->char);
858 
859  if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
860  /* U+0009 CHARACTER TABULATION
861  U+000A LINE FEED (LF)
862  U+000B LINE TABULATION
863  U+000C FORM FEED (FF)
864  U+0020 SPACE
865  Stay in the before attribute name state. */
866  $this->state = 'beforeAttributeName';
867 
868  } elseif ($char === '>') {
869  /* U+003E GREATER-THAN SIGN (>)
870  Emit the current tag token. Switch to the data state. */
871  $this->emitToken($this->token);
872  $this->state = 'data';
873 
874  } elseif ($char === '/') {
875  /* U+002F SOLIDUS (/)
876  Parse error unless this is a permitted slash. Stay in the before
877  attribute name state. */
878  $this->state = 'beforeAttributeName';
879 
880  } elseif ($this->char === $this->EOF) {
881  /* EOF
882  Parse error. Emit the current tag token. Reconsume the EOF
883  character in the data state. */
884  $this->emitToken($this->token);
885 
886  $this->char--;
887  $this->state = 'data';
888 
889  } else {
890  /* Anything else
891  Start a new attribute in the current tag token. Set that attribute's
892  name to the current input character, and its value to the empty string.
893  Switch to the attribute name state. */
894  $this->token['attr'][] = array(
895  'name' => strtolower($char),
896  'value' => null
897  );
898 
899  $this->state = 'attributeName';
900  }
901  }
character($s, $l=0)
Definition: PH5P.php:488
EOF()
Definition: PH5P.php:1566
emitToken($token)
Definition: PH5P.php:1554
$char
Definition: PH5P.php:73

◆ beforeAttributeValueState()

HTML5::beforeAttributeValueState ( )
private

Definition at line 1010 of file PH5P.php.

1011  {
1012  // Consume the next input character:
1013  $this->char++;
1014  $char = $this->character($this->char);
1015 
1016  if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
1017  /* U+0009 CHARACTER TABULATION
1018  U+000A LINE FEED (LF)
1019  U+000B LINE TABULATION
1020  U+000C FORM FEED (FF)
1021  U+0020 SPACE
1022  Stay in the before attribute value state. */
1023  $this->state = 'beforeAttributeValue';
1024 
1025  } elseif ($char === '"') {
1026  /* U+0022 QUOTATION MARK (")
1027  Switch to the attribute value (double-quoted) state. */
1028  $this->state = 'attributeValueDoubleQuoted';
1029 
1030  } elseif ($char === '&') {
1031  /* U+0026 AMPERSAND (&)
1032  Switch to the attribute value (unquoted) state and reconsume
1033  this input character. */
1034  $this->char--;
1035  $this->state = 'attributeValueUnquoted';
1036 
1037  } elseif ($char === '\'') {
1038  /* U+0027 APOSTROPHE (')
1039  Switch to the attribute value (single-quoted) state. */
1040  $this->state = 'attributeValueSingleQuoted';
1041 
1042  } elseif ($char === '>') {
1043  /* U+003E GREATER-THAN SIGN (>)
1044  Emit the current tag token. Switch to the data state. */
1045  $this->emitToken($this->token);
1046  $this->state = 'data';
1047 
1048  } else {
1049  /* Anything else
1050  Append the current input character to the current attribute's value.
1051  Switch to the attribute value (unquoted) state. */
1052  $last = count($this->token['attr']) - 1;
1053  $this->token['attr'][$last]['value'] .= $char;
1054 
1055  $this->state = 'attributeValueUnquoted';
1056  }
1057  }
character($s, $l=0)
Definition: PH5P.php:488
emitToken($token)
Definition: PH5P.php:1554
$char
Definition: PH5P.php:73

◆ beforeDoctypeNameState()

HTML5::beforeDoctypeNameState ( )
private

Definition at line 1336 of file PH5P.php.

References EOF.

1337  {
1338  /* Consume the next input character: */
1339  $this->char++;
1340  $char = $this->char();
1341 
1342  if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
1343  // Stay in the before DOCTYPE name state.
1344 
1345  } elseif (preg_match('/^[a-z]$/', $char)) {
1346  $this->token = array(
1347  'name' => strtoupper($char),
1348  'type' => self::DOCTYPE,
1349  'error' => true
1350  );
1351 
1352  $this->state = 'doctypeName';
1353 
1354  } elseif ($char === '>') {
1355  $this->emitToken(
1356  array(
1357  'name' => null,
1358  'type' => self::DOCTYPE,
1359  'error' => true
1360  )
1361  );
1362 
1363  $this->state = 'data';
1364 
1365  } elseif ($this->char === $this->EOF) {
1366  $this->emitToken(
1367  array(
1368  'name' => null,
1369  'type' => self::DOCTYPE,
1370  'error' => true
1371  )
1372  );
1373 
1374  $this->char--;
1375  $this->state = 'data';
1376 
1377  } else {
1378  $this->token = array(
1379  'name' => $char,
1380  'type' => self::DOCTYPE,
1381  'error' => true
1382  );
1383 
1384  $this->state = 'doctypeName';
1385  }
1386  }
char()
Definition: PH5P.php:481
EOF()
Definition: PH5P.php:1566
emitToken($token)
Definition: PH5P.php:1554
$char
Definition: PH5P.php:73

◆ bogusCommentState()

HTML5::bogusCommentState ( )
private

Definition at line 1184 of file PH5P.php.

References $data, and EOF.

1185  {
1186  /* Consume every character up to the first U+003E GREATER-THAN SIGN
1187  character (>) or the end of the file (EOF), whichever comes first. Emit
1188  a comment token whose data is the concatenation of all the characters
1189  starting from and including the character that caused the state machine
1190  to switch into the bogus comment state, up to and including the last
1191  consumed character before the U+003E character, if any, or up to the
1192  end of the file otherwise. (If the comment was started by the end of
1193  the file (EOF), the token is empty.) */
1194  $data = $this->characters('^>', $this->char);
1195  $this->emitToken(
1196  array(
1197  'data' => $data,
1198  'type' => self::COMMENT
1199  )
1200  );
1201 
1202  $this->char += strlen($data);
1203 
1204  /* Switch to the data state. */
1205  $this->state = 'data';
1206 
1207  /* If the end of the file was reached, reconsume the EOF character. */
1208  if ($this->char === $this->EOF) {
1209  $this->char = $this->EOF - 1;
1210  }
1211  }
$data
Definition: PH5P.php:72
EOF()
Definition: PH5P.php:1566
characters($char_class, $start)
Definition: PH5P.php:499
emitToken($token)
Definition: PH5P.php:1554

◆ bogusDoctypeState()

HTML5::bogusDoctypeState ( )
private

Definition at line 1442 of file PH5P.php.

References EOF.

1443  {
1444  /* Consume the next input character: */
1445  $this->char++;
1446  $char = $this->char();
1447 
1448  if ($char === '>') {
1449  $this->emitToken($this->token);
1450  $this->state = 'data';
1451 
1452  } elseif ($this->char === $this->EOF) {
1453  $this->emitToken($this->token);
1454  $this->char--;
1455  $this->state = 'data';
1456 
1457  } else {
1458  // Stay in the bogus DOCTYPE state.
1459  }
1460  }
char()
Definition: PH5P.php:481
EOF()
Definition: PH5P.php:1566
emitToken($token)
Definition: PH5P.php:1554
$char
Definition: PH5P.php:73

◆ char()

HTML5::char ( )
private

Definition at line 481 of file PH5P.php.

References data, and EOF.

482  {
483  return ($this->char < $this->EOF)
484  ? $this->data[$this->char]
485  : false;
486  }
EOF()
Definition: PH5P.php:1566
$char
Definition: PH5P.php:73
$this data['403_header']

◆ character()

HTML5::character (   $s,
  $l = 0 
)
private

Definition at line 488 of file PH5P.php.

References $l, $s, data, and EOF.

489  {
490  if ($s + $l < $this->EOF) {
491  if ($l === 0) {
492  return $this->data[$s];
493  } else {
494  return substr($this->data, $s, $l);
495  }
496  }
497  }
EOF()
Definition: PH5P.php:1566
$s
Definition: pwgen.php:45
global $l
Definition: afr.php:30
$this data['403_header']

◆ characters()

HTML5::characters (   $char_class,
  $start 
)
private

Definition at line 499 of file PH5P.php.

References $start, and data.

500  {
501  return preg_replace('#^([' . $char_class . ']+).*#s', '\\1', substr($this->data, $start));
502  }
$start
Definition: bench.php:8
$this data['403_header']

◆ closeTagOpenState()

HTML5::closeTagOpenState ( )
private

Definition at line 727 of file PH5P.php.

References EOF.

728  {
729  $next_node = strtolower($this->characters('A-Za-z', $this->char + 1));
730  $the_same = count($this->tree->stack) > 0 && $next_node === end($this->tree->stack)->nodeName;
731 
732  if (($this->content_model === self::RCDATA || $this->content_model === self::CDATA) &&
733  (!$the_same || ($the_same && (!preg_match(
734  '/[\t\n\x0b\x0c >\/]/',
735  $this->character($this->char + 1 + strlen($next_node))
736  ) || $this->EOF === $this->char)))
737  ) {
738  /* If the content model flag is set to the RCDATA or CDATA states then
739  examine the next few characters. If they do not match the tag name of
740  the last start tag token emitted (case insensitively), or if they do but
741  they are not immediately followed by one of the following characters:
742  * U+0009 CHARACTER TABULATION
743  * U+000A LINE FEED (LF)
744  * U+000B LINE TABULATION
745  * U+000C FORM FEED (FF)
746  * U+0020 SPACE
747  * U+003E GREATER-THAN SIGN (>)
748  * U+002F SOLIDUS (/)
749  * EOF
750  ...then there is a parse error. Emit a U+003C LESS-THAN SIGN character
751  token, a U+002F SOLIDUS character token, and switch to the data state
752  to process the next input character. */
753  $this->emitToken(
754  array(
755  'type' => self::CHARACTR,
756  'data' => '</'
757  )
758  );
759 
760  $this->state = 'data';
761 
762  } else {
763  /* Otherwise, if the content model flag is set to the PCDATA state,
764  or if the next few characters do match that tag name, consume the
765  next input character: */
766  $this->char++;
767  $char = $this->char();
768 
769  if (preg_match('/^[A-Za-z]$/', $char)) {
770  /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z
771  Create a new end tag token, set its tag name to the lowercase version
772  of the input character (add 0x0020 to the character's code point), then
773  switch to the tag name state. (Don't emit the token yet; further details
774  will be filled in before it is emitted.) */
775  $this->token = array(
776  'name' => strtolower($char),
777  'type' => self::ENDTAG
778  );
779 
780  $this->state = 'tagName';
781 
782  } elseif ($char === '>') {
783  /* U+003E GREATER-THAN SIGN (>)
784  Parse error. Switch to the data state. */
785  $this->state = 'data';
786 
787  } elseif ($this->char === $this->EOF) {
788  /* EOF
789  Parse error. Emit a U+003C LESS-THAN SIGN character token and a U+002F
790  SOLIDUS character token. Reconsume the EOF character in the data state. */
791  $this->emitToken(
792  array(
793  'type' => self::CHARACTR,
794  'data' => '</'
795  )
796  );
797 
798  $this->char--;
799  $this->state = 'data';
800 
801  } else {
802  /* Parse error. Switch to the bogus comment state. */
803  $this->state = 'bogusComment';
804  }
805  }
806  }
character($s, $l=0)
Definition: PH5P.php:488
char()
Definition: PH5P.php:481
EOF()
Definition: PH5P.php:1566
characters($char_class, $start)
Definition: PH5P.php:499
emitToken($token)
Definition: PH5P.php:1554
$char
Definition: PH5P.php:73

◆ commentDashState()

HTML5::commentDashState ( )
private

Definition at line 1269 of file PH5P.php.

References EOF.

1270  {
1271  /* Consume the next input character: */
1272  $this->char++;
1273  $char = $this->char();
1274 
1275  /* U+002D HYPHEN-MINUS (-) */
1276  if ($char === '-') {
1277  /* Switch to the comment end state */
1278  $this->state = 'commentEnd';
1279 
1280  /* EOF */
1281  } elseif ($this->char === $this->EOF) {
1282  /* Parse error. Emit the comment token. Reconsume the EOF character
1283  in the data state. */
1284  $this->emitToken($this->token);
1285  $this->char--;
1286  $this->state = 'data';
1287 
1288  /* Anything else */
1289  } else {
1290  /* Append a U+002D HYPHEN-MINUS (-) character and the input
1291  character to the comment token's data. Switch to the comment state. */
1292  $this->token['data'] .= '-' . $char;
1293  $this->state = 'comment';
1294  }
1295  }
char()
Definition: PH5P.php:481
EOF()
Definition: PH5P.php:1566
emitToken($token)
Definition: PH5P.php:1554
$char
Definition: PH5P.php:73

◆ commentEndState()

HTML5::commentEndState ( )
private

Definition at line 1297 of file PH5P.php.

References EOF.

1298  {
1299  /* Consume the next input character: */
1300  $this->char++;
1301  $char = $this->char();
1302 
1303  if ($char === '>') {
1304  $this->emitToken($this->token);
1305  $this->state = 'data';
1306 
1307  } elseif ($char === '-') {
1308  $this->token['data'] .= '-';
1309 
1310  } elseif ($this->char === $this->EOF) {
1311  $this->emitToken($this->token);
1312  $this->char--;
1313  $this->state = 'data';
1314 
1315  } else {
1316  $this->token['data'] .= '--' . $char;
1317  $this->state = 'comment';
1318  }
1319  }
char()
Definition: PH5P.php:481
EOF()
Definition: PH5P.php:1566
emitToken($token)
Definition: PH5P.php:1554
$char
Definition: PH5P.php:73

◆ commentState()

HTML5::commentState ( )
private

Definition at line 1242 of file PH5P.php.

References EOF.

1243  {
1244  /* Consume the next input character: */
1245  $this->char++;
1246  $char = $this->char();
1247 
1248  /* U+002D HYPHEN-MINUS (-) */
1249  if ($char === '-') {
1250  /* Switch to the comment dash state */
1251  $this->state = 'commentDash';
1252 
1253  /* EOF */
1254  } elseif ($this->char === $this->EOF) {
1255  /* Parse error. Emit the comment token. Reconsume the EOF character
1256  in the data state. */
1257  $this->emitToken($this->token);
1258  $this->char--;
1259  $this->state = 'data';
1260 
1261  /* Anything else */
1262  } else {
1263  /* Append the input character to the comment token's data. Stay in
1264  the comment state. */
1265  $this->token['data'] .= $char;
1266  }
1267  }
char()
Definition: PH5P.php:481
EOF()
Definition: PH5P.php:1566
emitToken($token)
Definition: PH5P.php:1554
$char
Definition: PH5P.php:73

◆ dataState()

HTML5::dataState ( )
private

Definition at line 504 of file PH5P.php.

References data, and EOF.

505  {
506  // Consume the next input character
507  $this->char++;
508  $char = $this->char();
509 
510  if ($char === '&' && ($this->content_model === self::PCDATA || $this->content_model === self::RCDATA)) {
511  /* U+0026 AMPERSAND (&)
512  When the content model flag is set to one of the PCDATA or RCDATA
513  states: switch to the entity data state. Otherwise: treat it as per
514  the "anything else" entry below. */
515  $this->state = 'entityData';
516 
517  } elseif ($char === '-') {
518  /* If the content model flag is set to either the RCDATA state or
519  the CDATA state, and the escape flag is false, and there are at
520  least three characters before this one in the input stream, and the
521  last four characters in the input stream, including this one, are
522  U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+002D HYPHEN-MINUS,
523  and U+002D HYPHEN-MINUS ("<!--"), then set the escape flag to true. */
524  if (($this->content_model === self::RCDATA || $this->content_model ===
525  self::CDATA) && $this->escape === false &&
526  $this->char >= 3 && $this->character($this->char - 4, 4) === '<!--'
527  ) {
528  $this->escape = true;
529  }
530 
531  /* In any case, emit the input character as a character token. Stay
532  in the data state. */
533  $this->emitToken(
534  array(
535  'type' => self::CHARACTR,
536  'data' => $char
537  )
538  );
539 
540  /* U+003C LESS-THAN SIGN (<) */
541  } elseif ($char === '<' && ($this->content_model === self::PCDATA ||
542  (($this->content_model === self::RCDATA ||
543  $this->content_model === self::CDATA) && $this->escape === false))
544  ) {
545  /* When the content model flag is set to the PCDATA state: switch
546  to the tag open state.
547 
548  When the content model flag is set to either the RCDATA state or
549  the CDATA state and the escape flag is false: switch to the tag
550  open state.
551 
552  Otherwise: treat it as per the "anything else" entry below. */
553  $this->state = 'tagOpen';
554 
555  /* U+003E GREATER-THAN SIGN (>) */
556  } elseif ($char === '>') {
557  /* If the content model flag is set to either the RCDATA state or
558  the CDATA state, and the escape flag is true, and the last three
559  characters in the input stream including this one are U+002D
560  HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN ("-->"),
561  set the escape flag to false. */
562  if (($this->content_model === self::RCDATA ||
563  $this->content_model === self::CDATA) && $this->escape === true &&
564  $this->character($this->char, 3) === '-->'
565  ) {
566  $this->escape = false;
567  }
568 
569  /* In any case, emit the input character as a character token.
570  Stay in the data state. */
571  $this->emitToken(
572  array(
573  'type' => self::CHARACTR,
574  'data' => $char
575  )
576  );
577 
578  } elseif ($this->char === $this->EOF) {
579  /* EOF
580  Emit an end-of-file token. */
581  $this->EOF();
582 
583  } elseif ($this->content_model === self::PLAINTEXT) {
584  /* When the content model flag is set to the PLAINTEXT state
585  THIS DIFFERS GREATLY FROM THE SPEC: Get the remaining characters of
586  the text and emit it as a character token. */
587  $this->emitToken(
588  array(
589  'type' => self::CHARACTR,
590  'data' => substr($this->data, $this->char)
591  )
592  );
593 
594  $this->EOF();
595 
596  } else {
597  /* Anything else
598  THIS DIFFERS GREATLY FROM THE SPEC: Get as many character that
599  otherwise would also be treated as a character token and emit it
600  as a single character token. Stay in the data state. */
601  $len = strcspn($this->data, '<&', $this->char);
602  $char = substr($this->data, $this->char, $len);
603  $this->char += $len - 1;
604 
605  $this->emitToken(
606  array(
607  'type' => self::CHARACTR,
608  'data' => $char
609  )
610  );
611 
612  $this->state = 'data';
613  }
614  }
character($s, $l=0)
Definition: PH5P.php:488
char()
Definition: PH5P.php:481
EOF()
Definition: PH5P.php:1566
emitToken($token)
Definition: PH5P.php:1554
$char
Definition: PH5P.php:73
$this data['403_header']

◆ doctypeNameState()

HTML5::doctypeNameState ( )
private

Definition at line 1388 of file PH5P.php.

References EOF.

1389  {
1390  /* Consume the next input character: */
1391  $this->char++;
1392  $char = $this->char();
1393 
1394  if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
1395  $this->state = 'AfterDoctypeName';
1396 
1397  } elseif ($char === '>') {
1398  $this->emitToken($this->token);
1399  $this->state = 'data';
1400 
1401  } elseif (preg_match('/^[a-z]$/', $char)) {
1402  $this->token['name'] .= strtoupper($char);
1403 
1404  } elseif ($this->char === $this->EOF) {
1405  $this->emitToken($this->token);
1406  $this->char--;
1407  $this->state = 'data';
1408 
1409  } else {
1410  $this->token['name'] .= $char;
1411  }
1412 
1413  $this->token['error'] = ($this->token['name'] === 'HTML')
1414  ? false
1415  : true;
1416  }
char()
Definition: PH5P.php:481
EOF()
Definition: PH5P.php:1566
emitToken($token)
Definition: PH5P.php:1554
$char
Definition: PH5P.php:73

◆ doctypeState()

HTML5::doctypeState ( )
private

Definition at line 1321 of file PH5P.php.

1322  {
1323  /* Consume the next input character: */
1324  $this->char++;
1325  $char = $this->char();
1326 
1327  if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
1328  $this->state = 'beforeDoctypeName';
1329 
1330  } else {
1331  $this->char--;
1332  $this->state = 'beforeDoctypeName';
1333  }
1334  }
char()
Definition: PH5P.php:481
$char
Definition: PH5P.php:73

◆ emitToken()

HTML5::emitToken (   $token)
private

Definition at line 1554 of file PH5P.php.

References PHPMailer\PHPMailer\$token.

1555  {
1556  $emit = $this->tree->emitToken($token);
1557 
1558  if (is_int($emit)) {
1559  $this->content_model = $emit;
1560 
1561  } elseif ($token['type'] === self::ENDTAG) {
1562  $this->content_model = self::PCDATA;
1563  }
1564  }
$token
Definition: PH5P.php:77

◆ entity()

HTML5::entity ( )
private

Definition at line 1462 of file PH5P.php.

References $c, $id, and $start.

1463  {
1464  $start = $this->char;
1465 
1466  // This section defines how to consume an entity. This definition is
1467  // used when parsing entities in text and in attributes.
1468 
1469  // The behaviour depends on the identity of the next character (the
1470  // one immediately after the U+0026 AMPERSAND character):
1471 
1472  switch ($this->character($this->char + 1)) {
1473  // U+0023 NUMBER SIGN (#)
1474  case '#':
1475 
1476  // The behaviour further depends on the character after the
1477  // U+0023 NUMBER SIGN:
1478  switch ($this->character($this->char + 1)) {
1479  // U+0078 LATIN SMALL LETTER X
1480  // U+0058 LATIN CAPITAL LETTER X
1481  case 'x':
1482  case 'X':
1483  // Follow the steps below, but using the range of
1484  // characters U+0030 DIGIT ZERO through to U+0039 DIGIT
1485  // NINE, U+0061 LATIN SMALL LETTER A through to U+0066
1486  // LATIN SMALL LETTER F, and U+0041 LATIN CAPITAL LETTER
1487  // A, through to U+0046 LATIN CAPITAL LETTER F (in other
1488  // words, 0-9, A-F, a-f).
1489  $char = 1;
1490  $char_class = '0-9A-Fa-f';
1491  break;
1492 
1493  // Anything else
1494  default:
1495  // Follow the steps below, but using the range of
1496  // characters U+0030 DIGIT ZERO through to U+0039 DIGIT
1497  // NINE (i.e. just 0-9).
1498  $char = 0;
1499  $char_class = '0-9';
1500  break;
1501  }
1502 
1503  // Consume as many characters as match the range of characters
1504  // given above.
1505  $this->char++;
1506  $e_name = $this->characters($char_class, $this->char + $char + 1);
1507  $entity = $this->character($start, $this->char);
1508  $cond = strlen($e_name) > 0;
1509 
1510  // The rest of the parsing happens below.
1511  break;
1512 
1513  // Anything else
1514  default:
1515  // Consume the maximum number of characters possible, with the
1516  // consumed characters case-sensitively matching one of the
1517  // identifiers in the first column of the entities table.
1518 
1519  $e_name = $this->characters('0-9A-Za-z;', $this->char + 1);
1520  $len = strlen($e_name);
1521 
1522  for ($c = 1; $c <= $len; $c++) {
1523  $id = substr($e_name, 0, $c);
1524  $this->char++;
1525 
1526  if (in_array($id, $this->entities)) {
1527  if ($e_name[$c - 1] !== ';') {
1528  if ($c < $len && $e_name[$c] == ';') {
1529  $this->char++; // consume extra semicolon
1530  }
1531  }
1532  $entity = $id;
1533  break;
1534  }
1535  }
1536 
1537  $cond = isset($entity);
1538  // The rest of the parsing happens below.
1539  break;
1540  }
1541 
1542  if (!$cond) {
1543  // If no match can be made, then this is a parse error. No
1544  // characters are consumed, and nothing is returned.
1545  $this->char = $start;
1546  return false;
1547  }
1548 
1549  // Return a character token for the character corresponding to the
1550  // entity name (as given by the second column of the entities table).
1551  return html_entity_decode('&' . rtrim($entity, ';') . ';', ENT_QUOTES, 'UTF-8');
1552  }
character($s, $l=0)
Definition: PH5P.php:488
if(!array_key_exists('StateId', $_REQUEST)) $id
characters($char_class, $start)
Definition: PH5P.php:499
$start
Definition: bench.php:8
$char
Definition: PH5P.php:73

◆ entityDataState()

HTML5::entityDataState ( )
private

Definition at line 616 of file PH5P.php.

617  {
618  // Attempt to consume an entity.
619  $entity = $this->entity();
620 
621  // If nothing is returned, emit a U+0026 AMPERSAND character token.
622  // Otherwise, emit the character token that was returned.
623  $char = (!$entity) ? '&' : $entity;
624  $this->emitToken(
625  array(
626  'type' => self::CHARACTR,
627  'data' => $char
628  )
629  );
630 
631  // Finally, switch to the data state.
632  $this->state = 'data';
633  }
entity()
Definition: PH5P.php:1462
emitToken($token)
Definition: PH5P.php:1554
$char
Definition: PH5P.php:73

◆ entityInAttributeValueState()

HTML5::entityInAttributeValueState ( )
private

Definition at line 1168 of file PH5P.php.

1169  {
1170  // Attempt to consume an entity.
1171  $entity = $this->entity();
1172 
1173  // If nothing is returned, append a U+0026 AMPERSAND character to the
1174  // current attribute's value. Otherwise, emit the character token that
1175  // was returned.
1176  $char = (!$entity)
1177  ? '&'
1178  : $entity;
1179 
1180  $last = count($this->token['attr']) - 1;
1181  $this->token['attr'][$last]['value'] .= $char;
1182  }
entity()
Definition: PH5P.php:1462
$char
Definition: PH5P.php:73

◆ EOF()

HTML5::EOF ( )
private

Definition at line 1566 of file PH5P.php.

References EOF.

Referenced by HTML5TreeConstructer\initPhase(), HTML5TreeConstructer\mainPhase(), HTML5TreeConstructer\rootElementPhase(), and HTML5TreeConstructer\trailingEndPhase().

1567  {
1568  $this->state = null;
1569  $this->tree->emitToken(
1570  array(
1571  'type' => self::EOF
1572  )
1573  );
1574  }
const EOF
How fgetc() reports an End Of File.
Definition: JSMin_lib.php:92
+ Here is the caller graph for this function:

◆ markupDeclarationOpenState()

HTML5::markupDeclarationOpenState ( )
private

Definition at line 1213 of file PH5P.php.

1214  {
1215  /* If the next two characters are both U+002D HYPHEN-MINUS (-)
1216  characters, consume those two characters, create a comment token whose
1217  data is the empty string, and switch to the comment state. */
1218  if ($this->character($this->char + 1, 2) === '--') {
1219  $this->char += 2;
1220  $this->state = 'comment';
1221  $this->token = array(
1222  'data' => null,
1223  'type' => self::COMMENT
1224  );
1225 
1226  /* Otherwise if the next seven chacacters are a case-insensitive match
1227  for the word "DOCTYPE", then consume those characters and switch to the
1228  DOCTYPE state. */
1229  } elseif (strtolower($this->character($this->char + 1, 7)) === 'doctype') {
1230  $this->char += 7;
1231  $this->state = 'doctype';
1232 
1233  /* Otherwise, is is a parse error. Switch to the bogus comment state.
1234  The next character that is consumed, if any, is the first character
1235  that will be in the comment. */
1236  } else {
1237  $this->char++;
1238  $this->state = 'bogusComment';
1239  }
1240  }
character($s, $l=0)
Definition: PH5P.php:488

◆ save()

HTML5::save ( )

Definition at line 476 of file PH5P.php.

477  {
478  return $this->tree->save();
479  }

◆ tagNameState()

HTML5::tagNameState ( )
private

Definition at line 808 of file PH5P.php.

References EOF.

809  {
810  // Consume the next input character:
811  $this->char++;
812  $char = $this->character($this->char);
813 
814  if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
815  /* U+0009 CHARACTER TABULATION
816  U+000A LINE FEED (LF)
817  U+000B LINE TABULATION
818  U+000C FORM FEED (FF)
819  U+0020 SPACE
820  Switch to the before attribute name state. */
821  $this->state = 'beforeAttributeName';
822 
823  } elseif ($char === '>') {
824  /* U+003E GREATER-THAN SIGN (>)
825  Emit the current tag token. Switch to the data state. */
826  $this->emitToken($this->token);
827  $this->state = 'data';
828 
829  } elseif ($this->char === $this->EOF) {
830  /* EOF
831  Parse error. Emit the current tag token. Reconsume the EOF
832  character in the data state. */
833  $this->emitToken($this->token);
834 
835  $this->char--;
836  $this->state = 'data';
837 
838  } elseif ($char === '/') {
839  /* U+002F SOLIDUS (/)
840  Parse error unless this is a permitted slash. Switch to the before
841  attribute name state. */
842  $this->state = 'beforeAttributeName';
843 
844  } else {
845  /* Anything else
846  Append the current input character to the current tag token's tag name.
847  Stay in the tag name state. */
848  $this->token['name'] .= strtolower($char);
849  $this->state = 'tagName';
850  }
851  }
character($s, $l=0)
Definition: PH5P.php:488
EOF()
Definition: PH5P.php:1566
emitToken($token)
Definition: PH5P.php:1554
$char
Definition: PH5P.php:73

◆ tagOpenState()

HTML5::tagOpenState ( )
private

Definition at line 635 of file PH5P.php.

636  {
637  switch ($this->content_model) {
638  case self::RCDATA:
639  case self::CDATA:
640  /* If the next input character is a U+002F SOLIDUS (/) character,
641  consume it and switch to the close tag open state. If the next
642  input character is not a U+002F SOLIDUS (/) character, emit a
643  U+003C LESS-THAN SIGN character token and switch to the data
644  state to process the next input character. */
645  if ($this->character($this->char + 1) === '/') {
646  $this->char++;
647  $this->state = 'closeTagOpen';
648 
649  } else {
650  $this->emitToken(
651  array(
652  'type' => self::CHARACTR,
653  'data' => '<'
654  )
655  );
656 
657  $this->state = 'data';
658  }
659  break;
660 
661  case self::PCDATA:
662  // If the content model flag is set to the PCDATA state
663  // Consume the next input character:
664  $this->char++;
665  $char = $this->char();
666 
667  if ($char === '!') {
668  /* U+0021 EXCLAMATION MARK (!)
669  Switch to the markup declaration open state. */
670  $this->state = 'markupDeclarationOpen';
671 
672  } elseif ($char === '/') {
673  /* U+002F SOLIDUS (/)
674  Switch to the close tag open state. */
675  $this->state = 'closeTagOpen';
676 
677  } elseif (preg_match('/^[A-Za-z]$/', $char)) {
678  /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z
679  Create a new start tag token, set its tag name to the lowercase
680  version of the input character (add 0x0020 to the character's code
681  point), then switch to the tag name state. (Don't emit the token
682  yet; further details will be filled in before it is emitted.) */
683  $this->token = array(
684  'name' => strtolower($char),
685  'type' => self::STARTTAG,
686  'attr' => array()
687  );
688 
689  $this->state = 'tagName';
690 
691  } elseif ($char === '>') {
692  /* U+003E GREATER-THAN SIGN (>)
693  Parse error. Emit a U+003C LESS-THAN SIGN character token and a
694  U+003E GREATER-THAN SIGN character token. Switch to the data state. */
695  $this->emitToken(
696  array(
697  'type' => self::CHARACTR,
698  'data' => '<>'
699  )
700  );
701 
702  $this->state = 'data';
703 
704  } elseif ($char === '?') {
705  /* U+003F QUESTION MARK (?)
706  Parse error. Switch to the bogus comment state. */
707  $this->state = 'bogusComment';
708 
709  } else {
710  /* Anything else
711  Parse error. Emit a U+003C LESS-THAN SIGN character token and
712  reconsume the current input character in the data state. */
713  $this->emitToken(
714  array(
715  'type' => self::CHARACTR,
716  'data' => '<'
717  )
718  );
719 
720  $this->char--;
721  $this->state = 'data';
722  }
723  break;
724  }
725  }
character($s, $l=0)
Definition: PH5P.php:488
char()
Definition: PH5P.php:481
emitToken($token)
Definition: PH5P.php:1554
$char
Definition: PH5P.php:73

Field Documentation

◆ $char

HTML5::$char
private

Definition at line 73 of file PH5P.php.

◆ $content_model

HTML5::$content_model
private

Definition at line 78 of file PH5P.php.

◆ $data

HTML5::$data
private

Definition at line 72 of file PH5P.php.

◆ $entities

HTML5::$entities
private

Definition at line 80 of file PH5P.php.

◆ $EOF

HTML5::$EOF
private

Definition at line 74 of file PH5P.php.

◆ $escape

HTML5::$escape = false
private

Definition at line 79 of file PH5P.php.

◆ $state

HTML5::$state
private

Definition at line 75 of file PH5P.php.

◆ $token

HTML5::$token
private

Definition at line 77 of file PH5P.php.

◆ $tree

HTML5::$tree
private

Definition at line 76 of file PH5P.php.

◆ CDATA

const HTML5::CDATA = 2

Definition at line 451 of file PH5P.php.

Referenced by HTML5TreeConstructer\inBody(), and HTML5TreeConstructer\inHead().

◆ CHARACTR

◆ COMMENT

◆ DOCTYPE

◆ ENDTAG

◆ EOF

const HTML5::EOF = 5

Definition at line 459 of file PH5P.php.

◆ PCDATA

const HTML5::PCDATA = 0

Definition at line 449 of file PH5P.php.

Referenced by HTML5TreeConstructer\inHead().

◆ PLAINTEXT

const HTML5::PLAINTEXT = 3

Definition at line 452 of file PH5P.php.

Referenced by HTML5TreeConstructer\inBody().

◆ RCDATA

const HTML5::RCDATA = 1

Definition at line 450 of file PH5P.php.

Referenced by HTML5TreeConstructer\inBody(), and HTML5TreeConstructer\inHead().

◆ STARTTAG


The documentation for this class was generated from the following file: