ILIAS  release_5-2 Revision v5.2.25-18-g3f80b828510
HTML5 Class Reference
+ Collaboration diagram for HTML5:

Public Member Functions

 __construct ($data)
 
 save ()
 
 __construct ($data)
 
 save ()
 

Data Fields

const PCDATA = 0
 
const RCDATA = 1
 
const CDATA = 2
 
const PLAINTEXT = 3
 
const DOCTYPE = 0
 
const STARTTAG = 1
 
const ENDTAG = 2
 
const COMMENT = 3
 
const CHARACTR = 4
 
const EOF = 5
 

Private Member Functions

 char ()
 
 character ($s, $l=0)
 
 characters ($char_class, $start)
 
 dataState ()
 
 entityDataState ()
 
 tagOpenState ()
 
 closeTagOpenState ()
 
 tagNameState ()
 
 beforeAttributeNameState ()
 
 attributeNameState ()
 
 afterAttributeNameState ()
 
 beforeAttributeValueState ()
 
 attributeValueDoubleQuotedState ()
 
 attributeValueSingleQuotedState ()
 
 attributeValueUnquotedState ()
 
 entityInAttributeValueState ()
 
 bogusCommentState ()
 
 markupDeclarationOpenState ()
 
 commentState ()
 
 commentDashState ()
 
 commentEndState ()
 
 doctypeState ()
 
 beforeDoctypeNameState ()
 
 doctypeNameState ()
 
 afterDoctypeNameState ()
 
 bogusDoctypeState ()
 
 entity ()
 
 emitToken ($token)
 
 EOF ()
 
 char ()
 
 character ($s, $l=0)
 
 characters ($char_class, $start)
 
 dataState ()
 
 entityDataState ()
 
 tagOpenState ()
 
 closeTagOpenState ()
 
 tagNameState ()
 
 beforeAttributeNameState ()
 
 attributeNameState ()
 
 afterAttributeNameState ()
 
 beforeAttributeValueState ()
 
 attributeValueDoubleQuotedState ()
 
 attributeValueSingleQuotedState ()
 
 attributeValueUnquotedState ()
 
 entityInAttributeValueState ()
 
 bogusCommentState ()
 
 markupDeclarationOpenState ()
 
 commentState ()
 
 commentDashState ()
 
 commentEndState ()
 
 doctypeState ()
 
 beforeDoctypeNameState ()
 
 doctypeNameState ()
 
 afterDoctypeNameState ()
 
 bogusDoctypeState ()
 
 entity ()
 
 emitToken ($token)
 
 EOF ()
 

Private Attributes

 $data
 
 $char
 
 $EOF
 
 $state
 
 $tree
 
 $token
 
 $content_model
 
 $escape = false
 
 $entities
 

Detailed Description

Definition at line 70 of file PH5P.php.

Constructor & Destructor Documentation

◆ __construct() [1/2]

HTML5::__construct (   $data)

Definition at line 461 of file PH5P.php.

References $data, data, and EOF.

462  {
463  $this->data = $data;
464  $this->char = -1;
465  $this->EOF = strlen($data);
466  $this->tree = new HTML5TreeConstructer;
467  $this->content_model = self::PCDATA;
468 
469  $this->state = 'data';
470 
471  while ($this->state !== null) {
472  $this->{$this->state . 'State'}();
473  }
474  }
Add some data
$data
Definition: PH5P.php:72
EOF()
Definition: PH5P.php:1565

◆ __construct() [2/2]

HTML5::__construct (   $data)

Definition at line 67 of file PH5P.php.

References $data, data, and EOF().

68  {
69  $data = str_replace("\r\n", "\n", $data);
70  $date = str_replace("\r", null, $data);
71 
72  $this->data = $data;
73  $this->char = -1;
74  $this->EOF = strlen($data);
75  $this->tree = new HTML5TreeConstructer;
76  $this->content_model = self::PCDATA;
77 
78  $this->state = 'data';
79 
80  while($this->state !== null) {
81  $this->{$this->state.'State'}();
82  }
83  }
Add some data
$data
Definition: PH5P.php:72
EOF()
Definition: PH5P.php:1565
+ Here is the call graph for this function:

Member Function Documentation

◆ afterAttributeNameState() [1/2]

HTML5::afterAttributeNameState ( )
private

Definition at line 535 of file PH5P.php.

References $char, array, character(), emitToken(), and EOF().

536  {
537  // Consume the next input character:
538  $this->char++;
539  $char = $this->character($this->char);
540 
541  if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
542  /* U+0009 CHARACTER TABULATION
543  U+000A LINE FEED (LF)
544  U+000B LINE TABULATION
545  U+000C FORM FEED (FF)
546  U+0020 SPACE
547  Stay in the after attribute name state. */
548  $this->state = 'afterAttributeName';
549 
550  } elseif($char === '=') {
551  /* U+003D EQUALS SIGN (=)
552  Switch to the before attribute value state. */
553  $this->state = 'beforeAttributeValue';
554 
555  } elseif($char === '>') {
556  /* U+003E GREATER-THAN SIGN (>)
557  Emit the current tag token. Switch to the data state. */
558  $this->emitToken($this->token);
559  $this->state = 'data';
560 
561  } elseif($char === '/' && $this->character($this->char + 1) !== '>') {
562  /* U+002F SOLIDUS (/)
563  Parse error unless this is a permitted slash. Switch to the
564  before attribute name state. */
565  $this->state = 'beforeAttributeName';
566 
567  } elseif($this->char === $this->EOF) {
568  /* EOF
569  Parse error. Emit the current tag token. Reconsume the EOF
570  character in the data state. */
571  $this->emitToken($this->token);
572 
573  $this->char--;
574  $this->state = 'data';
575 
576  } else {
577  /* Anything else
578  Start a new attribute in the current tag token. Set that attribute's
579  name to the current input character, and its value to the empty string.
580  Switch to the attribute name state. */
581  $this->token['attr'][] = array(
582  'name' => strtolower($char),
583  'value' => null
584  );
585 
586  $this->state = 'attributeName';
587  }
588  }
character($s, $l=0)
Definition: PH5P.php:488
EOF()
Definition: PH5P.php:1565
emitToken($token)
Definition: PH5P.php:1553
$char
Definition: PH5P.php:73
Create styles array
The data for the language used.
+ Here is the call graph for this function:

◆ afterAttributeNameState() [2/2]

HTML5::afterAttributeNameState ( )
private

Definition at line 955 of file PH5P.php.

References array, and EOF.

956  {
957  // Consume the next input character:
958  $this->char++;
959  $char = $this->character($this->char);
960 
961  if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
962  /* U+0009 CHARACTER TABULATION
963  U+000A LINE FEED (LF)
964  U+000B LINE TABULATION
965  U+000C FORM FEED (FF)
966  U+0020 SPACE
967  Stay in the after attribute name state. */
968  $this->state = 'afterAttributeName';
969 
970  } elseif ($char === '=') {
971  /* U+003D EQUALS SIGN (=)
972  Switch to the before attribute value state. */
973  $this->state = 'beforeAttributeValue';
974 
975  } elseif ($char === '>') {
976  /* U+003E GREATER-THAN SIGN (>)
977  Emit the current tag token. Switch to the data state. */
978  $this->emitToken($this->token);
979  $this->state = 'data';
980 
981  } elseif ($char === '/' && $this->character($this->char + 1) !== '>') {
982  /* U+002F SOLIDUS (/)
983  Parse error unless this is a permitted slash. Switch to the
984  before attribute name state. */
985  $this->state = 'beforeAttributeName';
986 
987  } elseif ($this->char === $this->EOF) {
988  /* EOF
989  Parse error. Emit the current tag token. Reconsume the EOF
990  character in the data state. */
991  $this->emitToken($this->token);
992 
993  $this->char--;
994  $this->state = 'data';
995 
996  } else {
997  /* Anything else
998  Start a new attribute in the current tag token. Set that attribute's
999  name to the current input character, and its value to the empty string.
1000  Switch to the attribute name state. */
1001  $this->token['attr'][] = array(
1002  'name' => strtolower($char),
1003  'value' => null
1004  );
1005 
1006  $this->state = 'attributeName';
1007  }
1008  }
character($s, $l=0)
Definition: PH5P.php:488
EOF()
Definition: PH5P.php:1565
emitToken($token)
Definition: PH5P.php:1553
$char
Definition: PH5P.php:73
Create styles array
The data for the language used.

◆ afterDoctypeNameState() [1/2]

HTML5::afterDoctypeNameState ( )
private

Definition at line 991 of file PH5P.php.

References $char, char(), emitToken(), and EOF().

992  {
993  /* Consume the next input character: */
994  $this->char++;
995  $char = $this->char();
996 
997  if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
998  // Stay in the DOCTYPE name state.
999 
1000  } elseif($char === '>') {
1001  $this->emitToken($this->token);
1002  $this->state = 'data';
1003 
1004  } elseif($this->char === $this->EOF) {
1005  $this->emitToken($this->token);
1006  $this->char--;
1007  $this->state = 'data';
1008 
1009  } else {
1010  $this->token['error'] = true;
1011  $this->state = 'bogusDoctype';
1012  }
1013  }
char()
Definition: PH5P.php:481
EOF()
Definition: PH5P.php:1565
emitToken($token)
Definition: PH5P.php:1553
$char
Definition: PH5P.php:73
+ Here is the call graph for this function:

◆ afterDoctypeNameState() [2/2]

HTML5::afterDoctypeNameState ( )
private

Definition at line 1418 of file PH5P.php.

References EOF.

1419  {
1420  /* Consume the next input character: */
1421  $this->char++;
1422  $char = $this->char();
1423 
1424  if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
1425  // Stay in the DOCTYPE name state.
1426 
1427  } elseif ($char === '>') {
1428  $this->emitToken($this->token);
1429  $this->state = 'data';
1430 
1431  } elseif ($this->char === $this->EOF) {
1432  $this->emitToken($this->token);
1433  $this->char--;
1434  $this->state = 'data';
1435 
1436  } else {
1437  $this->token['error'] = true;
1438  $this->state = 'bogusDoctype';
1439  }
1440  }
char()
Definition: PH5P.php:481
EOF()
Definition: PH5P.php:1565
emitToken($token)
Definition: PH5P.php:1553
$char
Definition: PH5P.php:73

◆ attributeNameState() [1/2]

HTML5::attributeNameState ( )
private

Definition at line 483 of file PH5P.php.

References $char, character(), emitToken(), and EOF().

484  {
485  // Consume the next input character:
486  $this->char++;
487  $char = $this->character($this->char);
488 
489  if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
490  /* U+0009 CHARACTER TABULATION
491  U+000A LINE FEED (LF)
492  U+000B LINE TABULATION
493  U+000C FORM FEED (FF)
494  U+0020 SPACE
495  Stay in the before attribute name state. */
496  $this->state = 'afterAttributeName';
497 
498  } elseif($char === '=') {
499  /* U+003D EQUALS SIGN (=)
500  Switch to the before attribute value state. */
501  $this->state = 'beforeAttributeValue';
502 
503  } elseif($char === '>') {
504  /* U+003E GREATER-THAN SIGN (>)
505  Emit the current tag token. Switch to the data state. */
506  $this->emitToken($this->token);
507  $this->state = 'data';
508 
509  } elseif($char === '/' && $this->character($this->char + 1) !== '>') {
510  /* U+002F SOLIDUS (/)
511  Parse error unless this is a permitted slash. Switch to the before
512  attribute name state. */
513  $this->state = 'beforeAttributeName';
514 
515  } elseif($this->char === $this->EOF) {
516  /* EOF
517  Parse error. Emit the current tag token. Reconsume the EOF
518  character in the data state. */
519  $this->emitToken($this->token);
520 
521  $this->char--;
522  $this->state = 'data';
523 
524  } else {
525  /* Anything else
526  Append the current input character to the current attribute's name.
527  Stay in the attribute name state. */
528  $last = count($this->token['attr']) - 1;
529  $this->token['attr'][$last]['name'] .= strtolower($char);
530 
531  $this->state = 'attributeName';
532  }
533  }
character($s, $l=0)
Definition: PH5P.php:488
EOF()
Definition: PH5P.php:1565
emitToken($token)
Definition: PH5P.php:1553
$char
Definition: PH5P.php:73
+ Here is the call graph for this function:

◆ attributeNameState() [2/2]

HTML5::attributeNameState ( )
private

Definition at line 903 of file PH5P.php.

References EOF.

904  {
905  // Consume the next input character:
906  $this->char++;
907  $char = $this->character($this->char);
908 
909  if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
910  /* U+0009 CHARACTER TABULATION
911  U+000A LINE FEED (LF)
912  U+000B LINE TABULATION
913  U+000C FORM FEED (FF)
914  U+0020 SPACE
915  Stay in the before attribute name state. */
916  $this->state = 'afterAttributeName';
917 
918  } elseif ($char === '=') {
919  /* U+003D EQUALS SIGN (=)
920  Switch to the before attribute value state. */
921  $this->state = 'beforeAttributeValue';
922 
923  } elseif ($char === '>') {
924  /* U+003E GREATER-THAN SIGN (>)
925  Emit the current tag token. Switch to the data state. */
926  $this->emitToken($this->token);
927  $this->state = 'data';
928 
929  } elseif ($char === '/' && $this->character($this->char + 1) !== '>') {
930  /* U+002F SOLIDUS (/)
931  Parse error unless this is a permitted slash. Switch to the before
932  attribute name state. */
933  $this->state = 'beforeAttributeName';
934 
935  } elseif ($this->char === $this->EOF) {
936  /* EOF
937  Parse error. Emit the current tag token. Reconsume the EOF
938  character in the data state. */
939  $this->emitToken($this->token);
940 
941  $this->char--;
942  $this->state = 'data';
943 
944  } else {
945  /* Anything else
946  Append the current input character to the current attribute's name.
947  Stay in the attribute name state. */
948  $last = count($this->token['attr']) - 1;
949  $this->token['attr'][$last]['name'] .= strtolower($char);
950 
951  $this->state = 'attributeName';
952  }
953  }
character($s, $l=0)
Definition: PH5P.php:488
EOF()
Definition: PH5P.php:1565
emitToken($token)
Definition: PH5P.php:1553
$char
Definition: PH5P.php:73

◆ attributeValueDoubleQuotedState() [1/2]

HTML5::attributeValueDoubleQuotedState ( )
private

Definition at line 639 of file PH5P.php.

References $char, character(), emitToken(), entityInAttributeValueState(), and EOF().

640  {
641  // Consume the next input character:
642  $this->char++;
643  $char = $this->character($this->char);
644 
645  if($char === '"') {
646  /* U+0022 QUOTATION MARK (")
647  Switch to the before attribute name state. */
648  $this->state = 'beforeAttributeName';
649 
650  } elseif($char === '&') {
651  /* U+0026 AMPERSAND (&)
652  Switch to the entity in attribute value state. */
653  $this->entityInAttributeValueState('double');
654 
655  } elseif($this->char === $this->EOF) {
656  /* EOF
657  Parse error. Emit the current tag token. Reconsume the character
658  in the data state. */
659  $this->emitToken($this->token);
660 
661  $this->char--;
662  $this->state = 'data';
663 
664  } else {
665  /* Anything else
666  Append the current input character to the current attribute's value.
667  Stay in the attribute value (double-quoted) state. */
668  $last = count($this->token['attr']) - 1;
669  $this->token['attr'][$last]['value'] .= $char;
670 
671  $this->state = 'attributeValueDoubleQuoted';
672  }
673  }
character($s, $l=0)
Definition: PH5P.php:488
EOF()
Definition: PH5P.php:1565
emitToken($token)
Definition: PH5P.php:1553
entityInAttributeValueState()
Definition: PH5P.php:1168
$char
Definition: PH5P.php:73
+ Here is the call graph for this function:

◆ attributeValueDoubleQuotedState() [2/2]

HTML5::attributeValueDoubleQuotedState ( )
private

Definition at line 1059 of file PH5P.php.

References EOF.

1060  {
1061  // Consume the next input character:
1062  $this->char++;
1063  $char = $this->character($this->char);
1064 
1065  if ($char === '"') {
1066  /* U+0022 QUOTATION MARK (")
1067  Switch to the before attribute name state. */
1068  $this->state = 'beforeAttributeName';
1069 
1070  } elseif ($char === '&') {
1071  /* U+0026 AMPERSAND (&)
1072  Switch to the entity in attribute value state. */
1073  $this->entityInAttributeValueState('double');
1074 
1075  } elseif ($this->char === $this->EOF) {
1076  /* EOF
1077  Parse error. Emit the current tag token. Reconsume the character
1078  in the data state. */
1079  $this->emitToken($this->token);
1080 
1081  $this->char--;
1082  $this->state = 'data';
1083 
1084  } else {
1085  /* Anything else
1086  Append the current input character to the current attribute's value.
1087  Stay in the attribute value (double-quoted) state. */
1088  $last = count($this->token['attr']) - 1;
1089  $this->token['attr'][$last]['value'] .= $char;
1090 
1091  $this->state = 'attributeValueDoubleQuoted';
1092  }
1093  }
character($s, $l=0)
Definition: PH5P.php:488
EOF()
Definition: PH5P.php:1565
emitToken($token)
Definition: PH5P.php:1553
entityInAttributeValueState()
Definition: PH5P.php:1168
$char
Definition: PH5P.php:73

◆ attributeValueSingleQuotedState() [1/2]

HTML5::attributeValueSingleQuotedState ( )
private

Definition at line 675 of file PH5P.php.

References $char, character(), emitToken(), entityInAttributeValueState(), and EOF().

676  {
677  // Consume the next input character:
678  $this->char++;
679  $char = $this->character($this->char);
680 
681  if($char === '\'') {
682  /* U+0022 QUOTATION MARK (')
683  Switch to the before attribute name state. */
684  $this->state = 'beforeAttributeName';
685 
686  } elseif($char === '&') {
687  /* U+0026 AMPERSAND (&)
688  Switch to the entity in attribute value state. */
689  $this->entityInAttributeValueState('single');
690 
691  } elseif($this->char === $this->EOF) {
692  /* EOF
693  Parse error. Emit the current tag token. Reconsume the character
694  in the data state. */
695  $this->emitToken($this->token);
696 
697  $this->char--;
698  $this->state = 'data';
699 
700  } else {
701  /* Anything else
702  Append the current input character to the current attribute's value.
703  Stay in the attribute value (single-quoted) state. */
704  $last = count($this->token['attr']) - 1;
705  $this->token['attr'][$last]['value'] .= $char;
706 
707  $this->state = 'attributeValueSingleQuoted';
708  }
709  }
character($s, $l=0)
Definition: PH5P.php:488
EOF()
Definition: PH5P.php:1565
emitToken($token)
Definition: PH5P.php:1553
entityInAttributeValueState()
Definition: PH5P.php:1168
$char
Definition: PH5P.php:73
+ Here is the call graph for this function:

◆ attributeValueSingleQuotedState() [2/2]

HTML5::attributeValueSingleQuotedState ( )
private

Definition at line 1095 of file PH5P.php.

References EOF.

1096  {
1097  // Consume the next input character:
1098  $this->char++;
1099  $char = $this->character($this->char);
1100 
1101  if ($char === '\'') {
1102  /* U+0022 QUOTATION MARK (')
1103  Switch to the before attribute name state. */
1104  $this->state = 'beforeAttributeName';
1105 
1106  } elseif ($char === '&') {
1107  /* U+0026 AMPERSAND (&)
1108  Switch to the entity in attribute value state. */
1109  $this->entityInAttributeValueState('single');
1110 
1111  } elseif ($this->char === $this->EOF) {
1112  /* EOF
1113  Parse error. Emit the current tag token. Reconsume the character
1114  in the data state. */
1115  $this->emitToken($this->token);
1116 
1117  $this->char--;
1118  $this->state = 'data';
1119 
1120  } else {
1121  /* Anything else
1122  Append the current input character to the current attribute's value.
1123  Stay in the attribute value (single-quoted) state. */
1124  $last = count($this->token['attr']) - 1;
1125  $this->token['attr'][$last]['value'] .= $char;
1126 
1127  $this->state = 'attributeValueSingleQuoted';
1128  }
1129  }
character($s, $l=0)
Definition: PH5P.php:488
EOF()
Definition: PH5P.php:1565
emitToken($token)
Definition: PH5P.php:1553
entityInAttributeValueState()
Definition: PH5P.php:1168
$char
Definition: PH5P.php:73

◆ attributeValueUnquotedState() [1/2]

HTML5::attributeValueUnquotedState ( )
private

Definition at line 711 of file PH5P.php.

References $char, character(), emitToken(), and entityInAttributeValueState().

712  {
713  // Consume the next input character:
714  $this->char++;
715  $char = $this->character($this->char);
716 
717  if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
718  /* U+0009 CHARACTER TABULATION
719  U+000A LINE FEED (LF)
720  U+000B LINE TABULATION
721  U+000C FORM FEED (FF)
722  U+0020 SPACE
723  Switch to the before attribute name state. */
724  $this->state = 'beforeAttributeName';
725 
726  } elseif($char === '&') {
727  /* U+0026 AMPERSAND (&)
728  Switch to the entity in attribute value state. */
729  $this->entityInAttributeValueState('non');
730 
731  } elseif($char === '>') {
732  /* U+003E GREATER-THAN SIGN (>)
733  Emit the current tag token. Switch to the data state. */
734  $this->emitToken($this->token);
735  $this->state = 'data';
736 
737  } else {
738  /* Anything else
739  Append the current input character to the current attribute's value.
740  Stay in the attribute value (unquoted) state. */
741  $last = count($this->token['attr']) - 1;
742  $this->token['attr'][$last]['value'] .= $char;
743 
744  $this->state = 'attributeValueUnquoted';
745  }
746  }
character($s, $l=0)
Definition: PH5P.php:488
emitToken($token)
Definition: PH5P.php:1553
entityInAttributeValueState()
Definition: PH5P.php:1168
$char
Definition: PH5P.php:73
+ Here is the call graph for this function:

◆ attributeValueUnquotedState() [2/2]

HTML5::attributeValueUnquotedState ( )
private

Definition at line 1131 of file PH5P.php.

1132  {
1133  // Consume the next input character:
1134  $this->char++;
1135  $char = $this->character($this->char);
1136 
1137  if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
1138  /* U+0009 CHARACTER TABULATION
1139  U+000A LINE FEED (LF)
1140  U+000B LINE TABULATION
1141  U+000C FORM FEED (FF)
1142  U+0020 SPACE
1143  Switch to the before attribute name state. */
1144  $this->state = 'beforeAttributeName';
1145 
1146  } elseif ($char === '&') {
1147  /* U+0026 AMPERSAND (&)
1148  Switch to the entity in attribute value state. */
1149  $this->entityInAttributeValueState();
1150 
1151  } elseif ($char === '>') {
1152  /* U+003E GREATER-THAN SIGN (>)
1153  Emit the current tag token. Switch to the data state. */
1154  $this->emitToken($this->token);
1155  $this->state = 'data';
1156 
1157  } else {
1158  /* Anything else
1159  Append the current input character to the current attribute's value.
1160  Stay in the attribute value (unquoted) state. */
1161  $last = count($this->token['attr']) - 1;
1162  $this->token['attr'][$last]['value'] .= $char;
1163 
1164  $this->state = 'attributeValueUnquoted';
1165  }
1166  }
character($s, $l=0)
Definition: PH5P.php:488
emitToken($token)
Definition: PH5P.php:1553
entityInAttributeValueState()
Definition: PH5P.php:1168
$char
Definition: PH5P.php:73

◆ beforeAttributeNameState() [1/2]

HTML5::beforeAttributeNameState ( )
private

Definition at line 433 of file PH5P.php.

References $char, array, character(), emitToken(), and EOF().

434  {
435  // Consume the next input character:
436  $this->char++;
437  $char = $this->character($this->char);
438 
439  if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
440  /* U+0009 CHARACTER TABULATION
441  U+000A LINE FEED (LF)
442  U+000B LINE TABULATION
443  U+000C FORM FEED (FF)
444  U+0020 SPACE
445  Stay in the before attribute name state. */
446  $this->state = 'beforeAttributeName';
447 
448  } elseif($char === '>') {
449  /* U+003E GREATER-THAN SIGN (>)
450  Emit the current tag token. Switch to the data state. */
451  $this->emitToken($this->token);
452  $this->state = 'data';
453 
454  } elseif($char === '/') {
455  /* U+002F SOLIDUS (/)
456  Parse error unless this is a permitted slash. Stay in the before
457  attribute name state. */
458  $this->state = 'beforeAttributeName';
459 
460  } elseif($this->char === $this->EOF) {
461  /* EOF
462  Parse error. Emit the current tag token. Reconsume the EOF
463  character in the data state. */
464  $this->emitToken($this->token);
465 
466  $this->char--;
467  $this->state = 'data';
468 
469  } else {
470  /* Anything else
471  Start a new attribute in the current tag token. Set that attribute's
472  name to the current input character, and its value to the empty string.
473  Switch to the attribute name state. */
474  $this->token['attr'][] = array(
475  'name' => strtolower($char),
476  'value' => null
477  );
478 
479  $this->state = 'attributeName';
480  }
481  }
character($s, $l=0)
Definition: PH5P.php:488
EOF()
Definition: PH5P.php:1565
emitToken($token)
Definition: PH5P.php:1553
$char
Definition: PH5P.php:73
Create styles array
The data for the language used.
+ Here is the call graph for this function:

◆ beforeAttributeNameState() [2/2]

HTML5::beforeAttributeNameState ( )
private

Definition at line 853 of file PH5P.php.

References array, and EOF.

854  {
855  // Consume the next input character:
856  $this->char++;
857  $char = $this->character($this->char);
858 
859  if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
860  /* U+0009 CHARACTER TABULATION
861  U+000A LINE FEED (LF)
862  U+000B LINE TABULATION
863  U+000C FORM FEED (FF)
864  U+0020 SPACE
865  Stay in the before attribute name state. */
866  $this->state = 'beforeAttributeName';
867 
868  } elseif ($char === '>') {
869  /* U+003E GREATER-THAN SIGN (>)
870  Emit the current tag token. Switch to the data state. */
871  $this->emitToken($this->token);
872  $this->state = 'data';
873 
874  } elseif ($char === '/') {
875  /* U+002F SOLIDUS (/)
876  Parse error unless this is a permitted slash. Stay in the before
877  attribute name state. */
878  $this->state = 'beforeAttributeName';
879 
880  } elseif ($this->char === $this->EOF) {
881  /* EOF
882  Parse error. Emit the current tag token. Reconsume the EOF
883  character in the data state. */
884  $this->emitToken($this->token);
885 
886  $this->char--;
887  $this->state = 'data';
888 
889  } else {
890  /* Anything else
891  Start a new attribute in the current tag token. Set that attribute's
892  name to the current input character, and its value to the empty string.
893  Switch to the attribute name state. */
894  $this->token['attr'][] = array(
895  'name' => strtolower($char),
896  'value' => null
897  );
898 
899  $this->state = 'attributeName';
900  }
901  }
character($s, $l=0)
Definition: PH5P.php:488
EOF()
Definition: PH5P.php:1565
emitToken($token)
Definition: PH5P.php:1553
$char
Definition: PH5P.php:73
Create styles array
The data for the language used.

◆ beforeAttributeValueState() [1/2]

HTML5::beforeAttributeValueState ( )
private

Definition at line 590 of file PH5P.php.

References $char, character(), and emitToken().

591  {
592  // Consume the next input character:
593  $this->char++;
594  $char = $this->character($this->char);
595 
596  if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
597  /* U+0009 CHARACTER TABULATION
598  U+000A LINE FEED (LF)
599  U+000B LINE TABULATION
600  U+000C FORM FEED (FF)
601  U+0020 SPACE
602  Stay in the before attribute value state. */
603  $this->state = 'beforeAttributeValue';
604 
605  } elseif($char === '"') {
606  /* U+0022 QUOTATION MARK (")
607  Switch to the attribute value (double-quoted) state. */
608  $this->state = 'attributeValueDoubleQuoted';
609 
610  } elseif($char === '&') {
611  /* U+0026 AMPERSAND (&)
612  Switch to the attribute value (unquoted) state and reconsume
613  this input character. */
614  $this->char--;
615  $this->state = 'attributeValueUnquoted';
616 
617  } elseif($char === '\'') {
618  /* U+0027 APOSTROPHE (')
619  Switch to the attribute value (single-quoted) state. */
620  $this->state = 'attributeValueSingleQuoted';
621 
622  } elseif($char === '>') {
623  /* U+003E GREATER-THAN SIGN (>)
624  Emit the current tag token. Switch to the data state. */
625  $this->emitToken($this->token);
626  $this->state = 'data';
627 
628  } else {
629  /* Anything else
630  Append the current input character to the current attribute's value.
631  Switch to the attribute value (unquoted) state. */
632  $last = count($this->token['attr']) - 1;
633  $this->token['attr'][$last]['value'] .= $char;
634 
635  $this->state = 'attributeValueUnquoted';
636  }
637  }
character($s, $l=0)
Definition: PH5P.php:488
emitToken($token)
Definition: PH5P.php:1553
$char
Definition: PH5P.php:73
+ Here is the call graph for this function:

◆ beforeAttributeValueState() [2/2]

HTML5::beforeAttributeValueState ( )
private

Definition at line 1010 of file PH5P.php.

1011  {
1012  // Consume the next input character:
1013  $this->char++;
1014  $char = $this->character($this->char);
1015 
1016  if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
1017  /* U+0009 CHARACTER TABULATION
1018  U+000A LINE FEED (LF)
1019  U+000B LINE TABULATION
1020  U+000C FORM FEED (FF)
1021  U+0020 SPACE
1022  Stay in the before attribute value state. */
1023  $this->state = 'beforeAttributeValue';
1024 
1025  } elseif ($char === '"') {
1026  /* U+0022 QUOTATION MARK (")
1027  Switch to the attribute value (double-quoted) state. */
1028  $this->state = 'attributeValueDoubleQuoted';
1029 
1030  } elseif ($char === '&') {
1031  /* U+0026 AMPERSAND (&)
1032  Switch to the attribute value (unquoted) state and reconsume
1033  this input character. */
1034  $this->char--;
1035  $this->state = 'attributeValueUnquoted';
1036 
1037  } elseif ($char === '\'') {
1038  /* U+0027 APOSTROPHE (')
1039  Switch to the attribute value (single-quoted) state. */
1040  $this->state = 'attributeValueSingleQuoted';
1041 
1042  } elseif ($char === '>') {
1043  /* U+003E GREATER-THAN SIGN (>)
1044  Emit the current tag token. Switch to the data state. */
1045  $this->emitToken($this->token);
1046  $this->state = 'data';
1047 
1048  } else {
1049  /* Anything else
1050  Append the current input character to the current attribute's value.
1051  Switch to the attribute value (unquoted) state. */
1052  $last = count($this->token['attr']) - 1;
1053  $this->token['attr'][$last]['value'] .= $char;
1054 
1055  $this->state = 'attributeValueUnquoted';
1056  }
1057  }
character($s, $l=0)
Definition: PH5P.php:488
emitToken($token)
Definition: PH5P.php:1553
$char
Definition: PH5P.php:73

◆ beforeDoctypeNameState() [1/2]

HTML5::beforeDoctypeNameState ( )
private

Definition at line 913 of file PH5P.php.

References $char, array, char(), emitToken(), and EOF().

914  {
915  /* Consume the next input character: */
916  $this->char++;
917  $char = $this->char();
918 
919  if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
920  // Stay in the before DOCTYPE name state.
921 
922  } elseif(preg_match('/^[a-z]$/', $char)) {
923  $this->token = array(
924  'name' => strtoupper($char),
925  'type' => self::DOCTYPE,
926  'error' => true
927  );
928 
929  $this->state = 'doctypeName';
930 
931  } elseif($char === '>') {
932  $this->emitToken(array(
933  'name' => null,
934  'type' => self::DOCTYPE,
935  'error' => true
936  ));
937 
938  $this->state = 'data';
939 
940  } elseif($this->char === $this->EOF) {
941  $this->emitToken(array(
942  'name' => null,
943  'type' => self::DOCTYPE,
944  'error' => true
945  ));
946 
947  $this->char--;
948  $this->state = 'data';
949 
950  } else {
951  $this->token = array(
952  'name' => $char,
953  'type' => self::DOCTYPE,
954  'error' => true
955  );
956 
957  $this->state = 'doctypeName';
958  }
959  }
char()
Definition: PH5P.php:481
EOF()
Definition: PH5P.php:1565
emitToken($token)
Definition: PH5P.php:1553
$char
Definition: PH5P.php:73
Create styles array
The data for the language used.
+ Here is the call graph for this function:

◆ beforeDoctypeNameState() [2/2]

HTML5::beforeDoctypeNameState ( )
private

Definition at line 1336 of file PH5P.php.

References array, and EOF.

1337  {
1338  /* Consume the next input character: */
1339  $this->char++;
1340  $char = $this->char();
1341 
1342  if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
1343  // Stay in the before DOCTYPE name state.
1344 
1345  } elseif (preg_match('/^[a-z]$/', $char)) {
1346  $this->token = array(
1347  'name' => strtoupper($char),
1348  'type' => self::DOCTYPE,
1349  'error' => true
1350  );
1351 
1352  $this->state = 'doctypeName';
1353 
1354  } elseif ($char === '>') {
1355  $this->emitToken(
1356  array(
1357  'name' => null,
1358  'type' => self::DOCTYPE,
1359  'error' => true
1360  )
1361  );
1362 
1363  $this->state = 'data';
1364 
1365  } elseif ($this->char === $this->EOF) {
1366  $this->emitToken(
1367  array(
1368  'name' => null,
1369  'type' => self::DOCTYPE,
1370  'error' => true
1371  )
1372  );
1373 
1374  $this->char--;
1375  $this->state = 'data';
1376 
1377  } else {
1378  $this->token = array(
1379  'name' => $char,
1380  'type' => self::DOCTYPE,
1381  'error' => true
1382  );
1383 
1384  $this->state = 'doctypeName';
1385  }
1386  }
char()
Definition: PH5P.php:481
EOF()
Definition: PH5P.php:1565
emitToken($token)
Definition: PH5P.php:1553
$char
Definition: PH5P.php:73
Create styles array
The data for the language used.

◆ bogusCommentState() [1/2]

HTML5::bogusCommentState ( )
private

Definition at line 763 of file PH5P.php.

References array, characters(), emitToken(), and EOF().

764  {
765  /* Consume every character up to the first U+003E GREATER-THAN SIGN
766  character (>) or the end of the file (EOF), whichever comes first. Emit
767  a comment token whose data is the concatenation of all the characters
768  starting from and including the character that caused the state machine
769  to switch into the bogus comment state, up to and including the last
770  consumed character before the U+003E character, if any, or up to the
771  end of the file otherwise. (If the comment was started by the end of
772  the file (EOF), the token is empty.) */
773  $data = $this->characters('^>', $this->char);
774  $this->emitToken(array(
775  'data' => $data,
776  'type' => self::COMMENT
777  ));
778 
779  $this->char += strlen($data);
780 
781  /* Switch to the data state. */
782  $this->state = 'data';
783 
784  /* If the end of the file was reached, reconsume the EOF character. */
785  if($this->char === $this->EOF) {
786  $this->char = $this->EOF - 1;
787  }
788  }
$data
Definition: PH5P.php:72
EOF()
Definition: PH5P.php:1565
characters($char_class, $start)
Definition: PH5P.php:499
emitToken($token)
Definition: PH5P.php:1553
Create styles array
The data for the language used.
+ Here is the call graph for this function:

◆ bogusCommentState() [2/2]

HTML5::bogusCommentState ( )
private

Definition at line 1184 of file PH5P.php.

References $data, array, and EOF.

1185  {
1186  /* Consume every character up to the first U+003E GREATER-THAN SIGN
1187  character (>) or the end of the file (EOF), whichever comes first. Emit
1188  a comment token whose data is the concatenation of all the characters
1189  starting from and including the character that caused the state machine
1190  to switch into the bogus comment state, up to and including the last
1191  consumed character before the U+003E character, if any, or up to the
1192  end of the file otherwise. (If the comment was started by the end of
1193  the file (EOF), the token is empty.) */
1194  $data = $this->characters('^>', $this->char);
1195  $this->emitToken(
1196  array(
1197  'data' => $data,
1198  'type' => self::COMMENT
1199  )
1200  );
1201 
1202  $this->char += strlen($data);
1203 
1204  /* Switch to the data state. */
1205  $this->state = 'data';
1206 
1207  /* If the end of the file was reached, reconsume the EOF character. */
1208  if ($this->char === $this->EOF) {
1209  $this->char = $this->EOF - 1;
1210  }
1211  }
$data
Definition: PH5P.php:72
EOF()
Definition: PH5P.php:1565
characters($char_class, $start)
Definition: PH5P.php:499
emitToken($token)
Definition: PH5P.php:1553
Create styles array
The data for the language used.

◆ bogusDoctypeState() [1/2]

HTML5::bogusDoctypeState ( )
private

Definition at line 1015 of file PH5P.php.

References $char, char(), emitToken(), and EOF().

1016  {
1017  /* Consume the next input character: */
1018  $this->char++;
1019  $char = $this->char();
1020 
1021  if($char === '>') {
1022  $this->emitToken($this->token);
1023  $this->state = 'data';
1024 
1025  } elseif($this->char === $this->EOF) {
1026  $this->emitToken($this->token);
1027  $this->char--;
1028  $this->state = 'data';
1029 
1030  } else {
1031  // Stay in the bogus DOCTYPE state.
1032  }
1033  }
char()
Definition: PH5P.php:481
EOF()
Definition: PH5P.php:1565
emitToken($token)
Definition: PH5P.php:1553
$char
Definition: PH5P.php:73
+ Here is the call graph for this function:

◆ bogusDoctypeState() [2/2]

HTML5::bogusDoctypeState ( )
private

Definition at line 1442 of file PH5P.php.

References EOF.

1443  {
1444  /* Consume the next input character: */
1445  $this->char++;
1446  $char = $this->char();
1447 
1448  if ($char === '>') {
1449  $this->emitToken($this->token);
1450  $this->state = 'data';
1451 
1452  } elseif ($this->char === $this->EOF) {
1453  $this->emitToken($this->token);
1454  $this->char--;
1455  $this->state = 'data';
1456 
1457  } else {
1458  // Stay in the bogus DOCTYPE state.
1459  }
1460  }
char()
Definition: PH5P.php:481
EOF()
Definition: PH5P.php:1565
emitToken($token)
Definition: PH5P.php:1553
$char
Definition: PH5P.php:73

◆ char() [1/2]

HTML5::char ( )
private

Definition at line 90 of file PH5P.php.

References $char, data, and EOF().

91  {
92  return ($this->char < $this->EOF)
93  ? $this->data[$this->char]
94  : false;
95  }
Add some data
EOF()
Definition: PH5P.php:1565
$char
Definition: PH5P.php:73
+ Here is the call graph for this function:

◆ char() [2/2]

HTML5::char ( )
private

Definition at line 481 of file PH5P.php.

References data, and EOF.

Referenced by afterDoctypeNameState(), beforeDoctypeNameState(), bogusDoctypeState(), closeTagOpenState(), commentDashState(), commentEndState(), commentState(), dataState(), doctypeNameState(), doctypeState(), and tagOpenState().

482  {
483  return ($this->char < $this->EOF)
484  ? $this->data[$this->char]
485  : false;
486  }
Add some data
EOF()
Definition: PH5P.php:1565
$char
Definition: PH5P.php:73
+ Here is the caller graph for this function:

◆ character() [1/2]

HTML5::character (   $s,
  $l = 0 
)
private

Definition at line 97 of file PH5P.php.

References $l, data, and EOF().

98  {
99  if($s + $l < $this->EOF) {
100  if($l === 0) {
101  return $this->data[$s];
102  } else {
103  return substr($this->data, $s, $l);
104  }
105  }
106  }
Add some data
EOF()
Definition: PH5P.php:1565
global $l
Definition: afr.php:30
+ Here is the call graph for this function:

◆ character() [2/2]

HTML5::character (   $s,
  $l = 0 
)
private

Definition at line 488 of file PH5P.php.

References $l, data, and EOF.

Referenced by afterAttributeNameState(), attributeNameState(), attributeValueDoubleQuotedState(), attributeValueSingleQuotedState(), attributeValueUnquotedState(), beforeAttributeNameState(), beforeAttributeValueState(), closeTagOpenState(), dataState(), entity(), markupDeclarationOpenState(), tagNameState(), and tagOpenState().

489  {
490  if ($s + $l < $this->EOF) {
491  if ($l === 0) {
492  return $this->data[$s];
493  } else {
494  return substr($this->data, $s, $l);
495  }
496  }
497  }
Add some data
EOF()
Definition: PH5P.php:1565
global $l
Definition: afr.php:30
+ Here is the caller graph for this function:

◆ characters() [1/2]

HTML5::characters (   $char_class,
  $start 
)
private

Definition at line 108 of file PH5P.php.

References $start, and data.

109  {
110  return preg_replace('#^(['.$char_class.']+).*#s', '\\1', substr($this->data, $start));
111  }
Add some data

◆ characters() [2/2]

HTML5::characters (   $char_class,
  $start 
)
private

Definition at line 499 of file PH5P.php.

References $start, and data.

Referenced by bogusCommentState(), closeTagOpenState(), and entity().

500  {
501  return preg_replace('#^([' . $char_class . ']+).*#s', '\\1', substr($this->data, $start));
502  }
Add some data
+ Here is the caller graph for this function:

◆ closeTagOpenState() [1/2]

HTML5::closeTagOpenState ( )
private

Definition at line 314 of file PH5P.php.

References $char, array, char(), character(), characters(), emitToken(), and EOF().

315  {
316  $next_node = strtolower($this->characters('A-Za-z', $this->char + 1));
317  $the_same = count($this->tree->stack) > 0 && $next_node === end($this->tree->stack)->nodeName;
318 
319  if(($this->content_model === self::RCDATA || $this->content_model === self::CDATA) &&
320  (!$the_same || ($the_same && (!preg_match('/[\t\n\x0b\x0c >\/]/',
321  $this->character($this->char + 1 + strlen($next_node))) || $this->EOF === $this->char)))) {
322  /* If the content model flag is set to the RCDATA or CDATA states then
323  examine the next few characters. If they do not match the tag name of
324  the last start tag token emitted (case insensitively), or if they do but
325  they are not immediately followed by one of the following characters:
326  * U+0009 CHARACTER TABULATION
327  * U+000A LINE FEED (LF)
328  * U+000B LINE TABULATION
329  * U+000C FORM FEED (FF)
330  * U+0020 SPACE
331  * U+003E GREATER-THAN SIGN (>)
332  * U+002F SOLIDUS (/)
333  * EOF
334  ...then there is a parse error. Emit a U+003C LESS-THAN SIGN character
335  token, a U+002F SOLIDUS character token, and switch to the data state
336  to process the next input character. */
337  $this->emitToken(array(
338  'type' => self::CHARACTR,
339  'data' => '</'
340  ));
341 
342  $this->state = 'data';
343 
344  } else {
345  /* Otherwise, if the content model flag is set to the PCDATA state,
346  or if the next few characters do match that tag name, consume the
347  next input character: */
348  $this->char++;
349  $char = $this->char();
350 
351  if(preg_match('/^[A-Za-z]$/', $char)) {
352  /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z
353  Create a new end tag token, set its tag name to the lowercase version
354  of the input character (add 0x0020 to the character's code point), then
355  switch to the tag name state. (Don't emit the token yet; further details
356  will be filled in before it is emitted.) */
357  $this->token = array(
358  'name' => strtolower($char),
359  'type' => self::ENDTAG
360  );
361 
362  $this->state = 'tagName';
363 
364  } elseif($char === '>') {
365  /* U+003E GREATER-THAN SIGN (>)
366  Parse error. Switch to the data state. */
367  $this->state = 'data';
368 
369  } elseif($this->char === $this->EOF) {
370  /* EOF
371  Parse error. Emit a U+003C LESS-THAN SIGN character token and a U+002F
372  SOLIDUS character token. Reconsume the EOF character in the data state. */
373  $this->emitToken(array(
374  'type' => self::CHARACTR,
375  'data' => '</'
376  ));
377 
378  $this->char--;
379  $this->state = 'data';
380 
381  } else {
382  /* Parse error. Switch to the bogus comment state. */
383  $this->state = 'bogusComment';
384  }
385  }
386  }
character($s, $l=0)
Definition: PH5P.php:488
char()
Definition: PH5P.php:481
EOF()
Definition: PH5P.php:1565
characters($char_class, $start)
Definition: PH5P.php:499
emitToken($token)
Definition: PH5P.php:1553
$char
Definition: PH5P.php:73
Create styles array
The data for the language used.
+ Here is the call graph for this function:

◆ closeTagOpenState() [2/2]

HTML5::closeTagOpenState ( )
private

Definition at line 727 of file PH5P.php.

References array, and EOF.

728  {
729  $next_node = strtolower($this->characters('A-Za-z', $this->char + 1));
730  $the_same = count($this->tree->stack) > 0 && $next_node === end($this->tree->stack)->nodeName;
731 
732  if (($this->content_model === self::RCDATA || $this->content_model === self::CDATA) &&
733  (!$the_same || ($the_same && (!preg_match(
734  '/[\t\n\x0b\x0c >\/]/',
735  $this->character($this->char + 1 + strlen($next_node))
736  ) || $this->EOF === $this->char)))
737  ) {
738  /* If the content model flag is set to the RCDATA or CDATA states then
739  examine the next few characters. If they do not match the tag name of
740  the last start tag token emitted (case insensitively), or if they do but
741  they are not immediately followed by one of the following characters:
742  * U+0009 CHARACTER TABULATION
743  * U+000A LINE FEED (LF)
744  * U+000B LINE TABULATION
745  * U+000C FORM FEED (FF)
746  * U+0020 SPACE
747  * U+003E GREATER-THAN SIGN (>)
748  * U+002F SOLIDUS (/)
749  * EOF
750  ...then there is a parse error. Emit a U+003C LESS-THAN SIGN character
751  token, a U+002F SOLIDUS character token, and switch to the data state
752  to process the next input character. */
753  $this->emitToken(
754  array(
755  'type' => self::CHARACTR,
756  'data' => '</'
757  )
758  );
759 
760  $this->state = 'data';
761 
762  } else {
763  /* Otherwise, if the content model flag is set to the PCDATA state,
764  or if the next few characters do match that tag name, consume the
765  next input character: */
766  $this->char++;
767  $char = $this->char();
768 
769  if (preg_match('/^[A-Za-z]$/', $char)) {
770  /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z
771  Create a new end tag token, set its tag name to the lowercase version
772  of the input character (add 0x0020 to the character's code point), then
773  switch to the tag name state. (Don't emit the token yet; further details
774  will be filled in before it is emitted.) */
775  $this->token = array(
776  'name' => strtolower($char),
777  'type' => self::ENDTAG
778  );
779 
780  $this->state = 'tagName';
781 
782  } elseif ($char === '>') {
783  /* U+003E GREATER-THAN SIGN (>)
784  Parse error. Switch to the data state. */
785  $this->state = 'data';
786 
787  } elseif ($this->char === $this->EOF) {
788  /* EOF
789  Parse error. Emit a U+003C LESS-THAN SIGN character token and a U+002F
790  SOLIDUS character token. Reconsume the EOF character in the data state. */
791  $this->emitToken(
792  array(
793  'type' => self::CHARACTR,
794  'data' => '</'
795  )
796  );
797 
798  $this->char--;
799  $this->state = 'data';
800 
801  } else {
802  /* Parse error. Switch to the bogus comment state. */
803  $this->state = 'bogusComment';
804  }
805  }
806  }
character($s, $l=0)
Definition: PH5P.php:488
char()
Definition: PH5P.php:481
EOF()
Definition: PH5P.php:1565
characters($char_class, $start)
Definition: PH5P.php:499
emitToken($token)
Definition: PH5P.php:1553
$char
Definition: PH5P.php:73
Create styles array
The data for the language used.

◆ commentDashState() [1/2]

HTML5::commentDashState ( )
private

Definition at line 846 of file PH5P.php.

References $char, char(), emitToken(), and EOF().

847  {
848  /* Consume the next input character: */
849  $this->char++;
850  $char = $this->char();
851 
852  /* U+002D HYPHEN-MINUS (-) */
853  if($char === '-') {
854  /* Switch to the comment end state */
855  $this->state = 'commentEnd';
856 
857  /* EOF */
858  } elseif($this->char === $this->EOF) {
859  /* Parse error. Emit the comment token. Reconsume the EOF character
860  in the data state. */
861  $this->emitToken($this->token);
862  $this->char--;
863  $this->state = 'data';
864 
865  /* Anything else */
866  } else {
867  /* Append a U+002D HYPHEN-MINUS (-) character and the input
868  character to the comment token's data. Switch to the comment state. */
869  $this->token['data'] .= '-'.$char;
870  $this->state = 'comment';
871  }
872  }
char()
Definition: PH5P.php:481
EOF()
Definition: PH5P.php:1565
emitToken($token)
Definition: PH5P.php:1553
$char
Definition: PH5P.php:73
+ Here is the call graph for this function:

◆ commentDashState() [2/2]

HTML5::commentDashState ( )
private

Definition at line 1269 of file PH5P.php.

References EOF.

1270  {
1271  /* Consume the next input character: */
1272  $this->char++;
1273  $char = $this->char();
1274 
1275  /* U+002D HYPHEN-MINUS (-) */
1276  if ($char === '-') {
1277  /* Switch to the comment end state */
1278  $this->state = 'commentEnd';
1279 
1280  /* EOF */
1281  } elseif ($this->char === $this->EOF) {
1282  /* Parse error. Emit the comment token. Reconsume the EOF character
1283  in the data state. */
1284  $this->emitToken($this->token);
1285  $this->char--;
1286  $this->state = 'data';
1287 
1288  /* Anything else */
1289  } else {
1290  /* Append a U+002D HYPHEN-MINUS (-) character and the input
1291  character to the comment token's data. Switch to the comment state. */
1292  $this->token['data'] .= '-' . $char;
1293  $this->state = 'comment';
1294  }
1295  }
char()
Definition: PH5P.php:481
EOF()
Definition: PH5P.php:1565
emitToken($token)
Definition: PH5P.php:1553
$char
Definition: PH5P.php:73

◆ commentEndState() [1/2]

HTML5::commentEndState ( )
private

Definition at line 874 of file PH5P.php.

References $char, char(), emitToken(), and EOF().

875  {
876  /* Consume the next input character: */
877  $this->char++;
878  $char = $this->char();
879 
880  if($char === '>') {
881  $this->emitToken($this->token);
882  $this->state = 'data';
883 
884  } elseif($char === '-') {
885  $this->token['data'] .= '-';
886 
887  } elseif($this->char === $this->EOF) {
888  $this->emitToken($this->token);
889  $this->char--;
890  $this->state = 'data';
891 
892  } else {
893  $this->token['data'] .= '--'.$char;
894  $this->state = 'comment';
895  }
896  }
char()
Definition: PH5P.php:481
EOF()
Definition: PH5P.php:1565
emitToken($token)
Definition: PH5P.php:1553
$char
Definition: PH5P.php:73
+ Here is the call graph for this function:

◆ commentEndState() [2/2]

HTML5::commentEndState ( )
private

Definition at line 1297 of file PH5P.php.

References EOF.

1298  {
1299  /* Consume the next input character: */
1300  $this->char++;
1301  $char = $this->char();
1302 
1303  if ($char === '>') {
1304  $this->emitToken($this->token);
1305  $this->state = 'data';
1306 
1307  } elseif ($char === '-') {
1308  $this->token['data'] .= '-';
1309 
1310  } elseif ($this->char === $this->EOF) {
1311  $this->emitToken($this->token);
1312  $this->char--;
1313  $this->state = 'data';
1314 
1315  } else {
1316  $this->token['data'] .= '--' . $char;
1317  $this->state = 'comment';
1318  }
1319  }
char()
Definition: PH5P.php:481
EOF()
Definition: PH5P.php:1565
emitToken($token)
Definition: PH5P.php:1553
$char
Definition: PH5P.php:73

◆ commentState() [1/2]

HTML5::commentState ( )
private

Definition at line 819 of file PH5P.php.

References $char, char(), emitToken(), and EOF().

820  {
821  /* Consume the next input character: */
822  $this->char++;
823  $char = $this->char();
824 
825  /* U+002D HYPHEN-MINUS (-) */
826  if($char === '-') {
827  /* Switch to the comment dash state */
828  $this->state = 'commentDash';
829 
830  /* EOF */
831  } elseif($this->char === $this->EOF) {
832  /* Parse error. Emit the comment token. Reconsume the EOF character
833  in the data state. */
834  $this->emitToken($this->token);
835  $this->char--;
836  $this->state = 'data';
837 
838  /* Anything else */
839  } else {
840  /* Append the input character to the comment token's data. Stay in
841  the comment state. */
842  $this->token['data'] .= $char;
843  }
844  }
char()
Definition: PH5P.php:481
EOF()
Definition: PH5P.php:1565
emitToken($token)
Definition: PH5P.php:1553
$char
Definition: PH5P.php:73
+ Here is the call graph for this function:

◆ commentState() [2/2]

HTML5::commentState ( )
private

Definition at line 1242 of file PH5P.php.

References EOF.

1243  {
1244  /* Consume the next input character: */
1245  $this->char++;
1246  $char = $this->char();
1247 
1248  /* U+002D HYPHEN-MINUS (-) */
1249  if ($char === '-') {
1250  /* Switch to the comment dash state */
1251  $this->state = 'commentDash';
1252 
1253  /* EOF */
1254  } elseif ($this->char === $this->EOF) {
1255  /* Parse error. Emit the comment token. Reconsume the EOF character
1256  in the data state. */
1257  $this->emitToken($this->token);
1258  $this->char--;
1259  $this->state = 'data';
1260 
1261  /* Anything else */
1262  } else {
1263  /* Append the input character to the comment token's data. Stay in
1264  the comment state. */
1265  $this->token['data'] .= $char;
1266  }
1267  }
char()
Definition: PH5P.php:481
EOF()
Definition: PH5P.php:1565
emitToken($token)
Definition: PH5P.php:1553
$char
Definition: PH5P.php:73

◆ dataState() [1/2]

HTML5::dataState ( )
private

Definition at line 113 of file PH5P.php.

References $char, array, char(), character(), data, emitToken(), and EOF().

114  {
115  // Consume the next input character
116  $this->char++;
117  $char = $this->char();
118 
119  if($char === '&' && ($this->content_model === self::PCDATA || $this->content_model === self::RCDATA)) {
120  /* U+0026 AMPERSAND (&)
121  When the content model flag is set to one of the PCDATA or RCDATA
122  states: switch to the entity data state. Otherwise: treat it as per
123  the "anything else" entry below. */
124  $this->state = 'entityData';
125 
126  } elseif($char === '-') {
127  /* If the content model flag is set to either the RCDATA state or
128  the CDATA state, and the escape flag is false, and there are at
129  least three characters before this one in the input stream, and the
130  last four characters in the input stream, including this one, are
131  U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+002D HYPHEN-MINUS,
132  and U+002D HYPHEN-MINUS ("<!--"), then set the escape flag to true. */
133  if(($this->content_model === self::RCDATA || $this->content_model ===
134  self::CDATA) && $this->escape === false &&
135  $this->char >= 3 && $this->character($this->char - 4, 4) === '<!--') {
136  $this->escape = true;
137  }
138 
139  /* In any case, emit the input character as a character token. Stay
140  in the data state. */
141  $this->emitToken(array(
142  'type' => self::CHARACTR,
143  'data' => $char
144  ));
145 
146  /* U+003C LESS-THAN SIGN (<) */
147  } elseif($char === '<' && ($this->content_model === self::PCDATA ||
148  (($this->content_model === self::RCDATA ||
149  $this->content_model === self::CDATA) && $this->escape === false))) {
150  /* When the content model flag is set to the PCDATA state: switch
151  to the tag open state.
152 
153  When the content model flag is set to either the RCDATA state or
154  the CDATA state and the escape flag is false: switch to the tag
155  open state.
156 
157  Otherwise: treat it as per the "anything else" entry below. */
158  $this->state = 'tagOpen';
159 
160  /* U+003E GREATER-THAN SIGN (>) */
161  } elseif($char === '>') {
162  /* If the content model flag is set to either the RCDATA state or
163  the CDATA state, and the escape flag is true, and the last three
164  characters in the input stream including this one are U+002D
165  HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN ("-->"),
166  set the escape flag to false. */
167  if(($this->content_model === self::RCDATA ||
168  $this->content_model === self::CDATA) && $this->escape === true &&
169  $this->character($this->char, 3) === '-->') {
170  $this->escape = false;
171  }
172 
173  /* In any case, emit the input character as a character token.
174  Stay in the data state. */
175  $this->emitToken(array(
176  'type' => self::CHARACTR,
177  'data' => $char
178  ));
179 
180  } elseif($this->char === $this->EOF) {
181  /* EOF
182  Emit an end-of-file token. */
183  $this->EOF();
184 
185  } elseif($this->content_model === self::PLAINTEXT) {
186  /* When the content model flag is set to the PLAINTEXT state
187  THIS DIFFERS GREATLY FROM THE SPEC: Get the remaining characters of
188  the text and emit it as a character token. */
189  $this->emitToken(array(
190  'type' => self::CHARACTR,
191  'data' => substr($this->data, $this->char)
192  ));
193 
194  $this->EOF();
195 
196  } else {
197  /* Anything else
198  THIS DIFFERS GREATLY FROM THE SPEC: Get as many character that
199  otherwise would also be treated as a character token and emit it
200  as a single character token. Stay in the data state. */
201  $len = strcspn($this->data, '<&', $this->char);
202  $char = substr($this->data, $this->char, $len);
203  $this->char += $len - 1;
204 
205  $this->emitToken(array(
206  'type' => self::CHARACTR,
207  'data' => $char
208  ));
209 
210  $this->state = 'data';
211  }
212  }
Add some data
character($s, $l=0)
Definition: PH5P.php:488
char()
Definition: PH5P.php:481
EOF()
Definition: PH5P.php:1565
emitToken($token)
Definition: PH5P.php:1553
$char
Definition: PH5P.php:73
Create styles array
The data for the language used.
+ Here is the call graph for this function:

◆ dataState() [2/2]

HTML5::dataState ( )
private

Definition at line 504 of file PH5P.php.

References array, data, and EOF.

505  {
506  // Consume the next input character
507  $this->char++;
508  $char = $this->char();
509 
510  if ($char === '&' && ($this->content_model === self::PCDATA || $this->content_model === self::RCDATA)) {
511  /* U+0026 AMPERSAND (&)
512  When the content model flag is set to one of the PCDATA or RCDATA
513  states: switch to the entity data state. Otherwise: treat it as per
514  the "anything else" entry below. */
515  $this->state = 'entityData';
516 
517  } elseif ($char === '-') {
518  /* If the content model flag is set to either the RCDATA state or
519  the CDATA state, and the escape flag is false, and there are at
520  least three characters before this one in the input stream, and the
521  last four characters in the input stream, including this one, are
522  U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+002D HYPHEN-MINUS,
523  and U+002D HYPHEN-MINUS ("<!--"), then set the escape flag to true. */
524  if (($this->content_model === self::RCDATA || $this->content_model ===
525  self::CDATA) && $this->escape === false &&
526  $this->char >= 3 && $this->character($this->char - 4, 4) === '<!--'
527  ) {
528  $this->escape = true;
529  }
530 
531  /* In any case, emit the input character as a character token. Stay
532  in the data state. */
533  $this->emitToken(
534  array(
535  'type' => self::CHARACTR,
536  'data' => $char
537  )
538  );
539 
540  /* U+003C LESS-THAN SIGN (<) */
541  } elseif ($char === '<' && ($this->content_model === self::PCDATA ||
542  (($this->content_model === self::RCDATA ||
543  $this->content_model === self::CDATA) && $this->escape === false))
544  ) {
545  /* When the content model flag is set to the PCDATA state: switch
546  to the tag open state.
547 
548  When the content model flag is set to either the RCDATA state or
549  the CDATA state and the escape flag is false: switch to the tag
550  open state.
551 
552  Otherwise: treat it as per the "anything else" entry below. */
553  $this->state = 'tagOpen';
554 
555  /* U+003E GREATER-THAN SIGN (>) */
556  } elseif ($char === '>') {
557  /* If the content model flag is set to either the RCDATA state or
558  the CDATA state, and the escape flag is true, and the last three
559  characters in the input stream including this one are U+002D
560  HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN ("-->"),
561  set the escape flag to false. */
562  if (($this->content_model === self::RCDATA ||
563  $this->content_model === self::CDATA) && $this->escape === true &&
564  $this->character($this->char, 3) === '-->'
565  ) {
566  $this->escape = false;
567  }
568 
569  /* In any case, emit the input character as a character token.
570  Stay in the data state. */
571  $this->emitToken(
572  array(
573  'type' => self::CHARACTR,
574  'data' => $char
575  )
576  );
577 
578  } elseif ($this->char === $this->EOF) {
579  /* EOF
580  Emit an end-of-file token. */
581  $this->EOF();
582 
583  } elseif ($this->content_model === self::PLAINTEXT) {
584  /* When the content model flag is set to the PLAINTEXT state
585  THIS DIFFERS GREATLY FROM THE SPEC: Get the remaining characters of
586  the text and emit it as a character token. */
587  $this->emitToken(
588  array(
589  'type' => self::CHARACTR,
590  'data' => substr($this->data, $this->char)
591  )
592  );
593 
594  $this->EOF();
595 
596  } else {
597  /* Anything else
598  THIS DIFFERS GREATLY FROM THE SPEC: Get as many character that
599  otherwise would also be treated as a character token and emit it
600  as a single character token. Stay in the data state. */
601  $len = strcspn($this->data, '<&', $this->char);
602  $char = substr($this->data, $this->char, $len);
603  $this->char += $len - 1;
604 
605  $this->emitToken(
606  array(
607  'type' => self::CHARACTR,
608  'data' => $char
609  )
610  );
611 
612  $this->state = 'data';
613  }
614  }
Add some data
character($s, $l=0)
Definition: PH5P.php:488
char()
Definition: PH5P.php:481
EOF()
Definition: PH5P.php:1565
emitToken($token)
Definition: PH5P.php:1553
$char
Definition: PH5P.php:73
Create styles array
The data for the language used.

◆ doctypeNameState() [1/2]

HTML5::doctypeNameState ( )
private

Definition at line 961 of file PH5P.php.

References $char, char(), emitToken(), and EOF().

962  {
963  /* Consume the next input character: */
964  $this->char++;
965  $char = $this->char();
966 
967  if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
968  $this->state = 'AfterDoctypeName';
969 
970  } elseif($char === '>') {
971  $this->emitToken($this->token);
972  $this->state = 'data';
973 
974  } elseif(preg_match('/^[a-z]$/', $char)) {
975  $this->token['name'] .= strtoupper($char);
976 
977  } elseif($this->char === $this->EOF) {
978  $this->emitToken($this->token);
979  $this->char--;
980  $this->state = 'data';
981 
982  } else {
983  $this->token['name'] .= $char;
984  }
985 
986  $this->token['error'] = ($this->token['name'] === 'HTML')
987  ? false
988  : true;
989  }
char()
Definition: PH5P.php:481
EOF()
Definition: PH5P.php:1565
emitToken($token)
Definition: PH5P.php:1553
$char
Definition: PH5P.php:73
+ Here is the call graph for this function:

◆ doctypeNameState() [2/2]

HTML5::doctypeNameState ( )
private

Definition at line 1388 of file PH5P.php.

References EOF.

1389  {
1390  /* Consume the next input character: */
1391  $this->char++;
1392  $char = $this->char();
1393 
1394  if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
1395  $this->state = 'AfterDoctypeName';
1396 
1397  } elseif ($char === '>') {
1398  $this->emitToken($this->token);
1399  $this->state = 'data';
1400 
1401  } elseif (preg_match('/^[a-z]$/', $char)) {
1402  $this->token['name'] .= strtoupper($char);
1403 
1404  } elseif ($this->char === $this->EOF) {
1405  $this->emitToken($this->token);
1406  $this->char--;
1407  $this->state = 'data';
1408 
1409  } else {
1410  $this->token['name'] .= $char;
1411  }
1412 
1413  $this->token['error'] = ($this->token['name'] === 'HTML')
1414  ? false
1415  : true;
1416  }
char()
Definition: PH5P.php:481
EOF()
Definition: PH5P.php:1565
emitToken($token)
Definition: PH5P.php:1553
$char
Definition: PH5P.php:73

◆ doctypeState() [1/2]

HTML5::doctypeState ( )
private

Definition at line 898 of file PH5P.php.

References $char, and char().

899  {
900  /* Consume the next input character: */
901  $this->char++;
902  $char = $this->char();
903 
904  if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
905  $this->state = 'beforeDoctypeName';
906 
907  } else {
908  $this->char--;
909  $this->state = 'beforeDoctypeName';
910  }
911  }
char()
Definition: PH5P.php:481
$char
Definition: PH5P.php:73
+ Here is the call graph for this function:

◆ doctypeState() [2/2]

HTML5::doctypeState ( )
private

Definition at line 1321 of file PH5P.php.

1322  {
1323  /* Consume the next input character: */
1324  $this->char++;
1325  $char = $this->char();
1326 
1327  if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
1328  $this->state = 'beforeDoctypeName';
1329 
1330  } else {
1331  $this->char--;
1332  $this->state = 'beforeDoctypeName';
1333  }
1334  }
char()
Definition: PH5P.php:481
$char
Definition: PH5P.php:73

◆ emitToken() [1/2]

HTML5::emitToken (   $token)
private

Definition at line 1121 of file PH5P.php.

References $token.

1122  {
1123  $emit = $this->tree->emitToken($token);
1124 
1125  if(is_int($emit)) {
1126  $this->content_model = $emit;
1127 
1128  } elseif($token['type'] === self::ENDTAG) {
1129  $this->content_model = self::PCDATA;
1130  }
1131  }
$token
Definition: PH5P.php:77

◆ emitToken() [2/2]

HTML5::emitToken (   $token)
private

Definition at line 1553 of file PH5P.php.

Referenced by afterAttributeNameState(), afterDoctypeNameState(), attributeNameState(), attributeValueDoubleQuotedState(), attributeValueSingleQuotedState(), attributeValueUnquotedState(), beforeAttributeNameState(), beforeAttributeValueState(), beforeDoctypeNameState(), bogusCommentState(), bogusDoctypeState(), closeTagOpenState(), commentDashState(), commentEndState(), commentState(), dataState(), doctypeNameState(), entityDataState(), entityInAttributeValueState(), HTML5TreeConstructer\inBody(), tagNameState(), and tagOpenState().

1554  {
1555  $emit = $this->tree->emitToken($token);
1556 
1557  if (is_int($emit)) {
1558  $this->content_model = $emit;
1559 
1560  } elseif ($token['type'] === self::ENDTAG) {
1561  $this->content_model = self::PCDATA;
1562  }
1563  }
$token
Definition: PH5P.php:77
+ Here is the caller graph for this function:

◆ entity() [1/2]

HTML5::entity ( )
private

Definition at line 1035 of file PH5P.php.

References $char, $start, character(), and characters().

1036  {
1037  $start = $this->char;
1038 
1039  // This section defines how to consume an entity. This definition is
1040  // used when parsing entities in text and in attributes.
1041 
1042  // The behaviour depends on the identity of the next character (the
1043  // one immediately after the U+0026 AMPERSAND character):
1044 
1045  switch($this->character($this->char + 1)) {
1046  // U+0023 NUMBER SIGN (#)
1047  case '#':
1048 
1049  // The behaviour further depends on the character after the
1050  // U+0023 NUMBER SIGN:
1051  switch($this->character($this->char + 1)) {
1052  // U+0078 LATIN SMALL LETTER X
1053  // U+0058 LATIN CAPITAL LETTER X
1054  case 'x':
1055  case 'X':
1056  // Follow the steps below, but using the range of
1057  // characters U+0030 DIGIT ZERO through to U+0039 DIGIT
1058  // NINE, U+0061 LATIN SMALL LETTER A through to U+0066
1059  // LATIN SMALL LETTER F, and U+0041 LATIN CAPITAL LETTER
1060  // A, through to U+0046 LATIN CAPITAL LETTER F (in other
1061  // words, 0-9, A-F, a-f).
1062  $char = 1;
1063  $char_class = '0-9A-Fa-f';
1064  break;
1065 
1066  // Anything else
1067  default:
1068  // Follow the steps below, but using the range of
1069  // characters U+0030 DIGIT ZERO through to U+0039 DIGIT
1070  // NINE (i.e. just 0-9).
1071  $char = 0;
1072  $char_class = '0-9';
1073  break;
1074  }
1075 
1076  // Consume as many characters as match the range of characters
1077  // given above.
1078  $this->char++;
1079  $e_name = $this->characters($char_class, $this->char + $char + 1);
1080  $entity = $this->character($start, $this->char);
1081  $cond = strlen($e_name) > 0;
1082 
1083  // The rest of the parsing happens bellow.
1084  break;
1085 
1086  // Anything else
1087  default:
1088  // Consume the maximum number of characters possible, with the
1089  // consumed characters case-sensitively matching one of the
1090  // identifiers in the first column of the entities table.
1091  $e_name = $this->characters('0-9A-Za-z;', $this->char + 1);
1092  $len = strlen($e_name);
1093 
1094  for($c = 1; $c <= $len; $c++) {
1095  $id = substr($e_name, 0, $c);
1096  $this->char++;
1097 
1098  if(in_array($id, $this->entities)) {
1099  $entity = $id;
1100  break;
1101  }
1102  }
1103 
1104  $cond = isset($entity);
1105  // The rest of the parsing happens bellow.
1106  break;
1107  }
1108 
1109  if(!$cond) {
1110  // If no match can be made, then this is a parse error. No
1111  // characters are consumed, and nothing is returned.
1112  $this->char = $start;
1113  return false;
1114  }
1115 
1116  // Return a character token for the character corresponding to the
1117  // entity name (as given by the second column of the entities table).
1118  return html_entity_decode('&'.$entity.';', ENT_QUOTES, 'UTF-8');
1119  }
character($s, $l=0)
Definition: PH5P.php:488
characters($char_class, $start)
Definition: PH5P.php:499
$char
Definition: PH5P.php:73
+ Here is the call graph for this function:

◆ entity() [2/2]

HTML5::entity ( )
private

Definition at line 1462 of file PH5P.php.

References $start.

Referenced by entityDataState(), and entityInAttributeValueState().

1463  {
1464  $start = $this->char;
1465 
1466  // This section defines how to consume an entity. This definition is
1467  // used when parsing entities in text and in attributes.
1468 
1469  // The behaviour depends on the identity of the next character (the
1470  // one immediately after the U+0026 AMPERSAND character):
1471 
1472  switch ($this->character($this->char + 1)) {
1473  // U+0023 NUMBER SIGN (#)
1474  case '#':
1475 
1476  // The behaviour further depends on the character after the
1477  // U+0023 NUMBER SIGN:
1478  switch ($this->character($this->char + 1)) {
1479  // U+0078 LATIN SMALL LETTER X
1480  // U+0058 LATIN CAPITAL LETTER X
1481  case 'x':
1482  case 'X':
1483  // Follow the steps below, but using the range of
1484  // characters U+0030 DIGIT ZERO through to U+0039 DIGIT
1485  // NINE, U+0061 LATIN SMALL LETTER A through to U+0066
1486  // LATIN SMALL LETTER F, and U+0041 LATIN CAPITAL LETTER
1487  // A, through to U+0046 LATIN CAPITAL LETTER F (in other
1488  // words, 0-9, A-F, a-f).
1489  $char = 1;
1490  $char_class = '0-9A-Fa-f';
1491  break;
1492 
1493  // Anything else
1494  default:
1495  // Follow the steps below, but using the range of
1496  // characters U+0030 DIGIT ZERO through to U+0039 DIGIT
1497  // NINE (i.e. just 0-9).
1498  $char = 0;
1499  $char_class = '0-9';
1500  break;
1501  }
1502 
1503  // Consume as many characters as match the range of characters
1504  // given above.
1505  $this->char++;
1506  $e_name = $this->characters($char_class, $this->char + $char + 1);
1507  $entity = $this->character($start, $this->char);
1508  $cond = strlen($e_name) > 0;
1509 
1510  // The rest of the parsing happens bellow.
1511  break;
1512 
1513  // Anything else
1514  default:
1515  // Consume the maximum number of characters possible, with the
1516  // consumed characters case-sensitively matching one of the
1517  // identifiers in the first column of the entities table.
1518  $e_name = $this->characters('0-9A-Za-z;', $this->char + 1);
1519  $len = strlen($e_name);
1520 
1521  for ($c = 1; $c <= $len; $c++) {
1522  $id = substr($e_name, 0, $c);
1523  $this->char++;
1524 
1525  if (in_array($id, $this->entities)) {
1526  if ($e_name[$c - 1] !== ';') {
1527  if ($c < $len && $e_name[$c] == ';') {
1528  $this->char++; // consume extra semicolon
1529  }
1530  }
1531  $entity = $id;
1532  break;
1533  }
1534  }
1535 
1536  $cond = isset($entity);
1537  // The rest of the parsing happens bellow.
1538  break;
1539  }
1540 
1541  if (!$cond) {
1542  // If no match can be made, then this is a parse error. No
1543  // characters are consumed, and nothing is returned.
1544  $this->char = $start;
1545  return false;
1546  }
1547 
1548  // Return a character token for the character corresponding to the
1549  // entity name (as given by the second column of the entities table).
1550  return html_entity_decode('&' . $entity . ';', ENT_QUOTES, 'UTF-8');
1551  }
character($s, $l=0)
Definition: PH5P.php:488
characters($char_class, $start)
Definition: PH5P.php:499
$char
Definition: PH5P.php:73
+ Here is the caller graph for this function:

◆ entityDataState() [1/2]

HTML5::entityDataState ( )
private

Definition at line 214 of file PH5P.php.

References $char, emitToken(), and entity().

215  {
216  // Attempt to consume an entity.
217  $entity = $this->entity();
218 
219  // If nothing is returned, emit a U+0026 AMPERSAND character token.
220  // Otherwise, emit the character token that was returned.
221  $char = (!$entity) ? '&' : $entity;
222  $this->emitToken($char);
223 
224  // Finally, switch to the data state.
225  $this->state = 'data';
226  }
entity()
Definition: PH5P.php:1462
emitToken($token)
Definition: PH5P.php:1553
$char
Definition: PH5P.php:73
+ Here is the call graph for this function:

◆ entityDataState() [2/2]

HTML5::entityDataState ( )
private

Definition at line 616 of file PH5P.php.

References array.

617  {
618  // Attempt to consume an entity.
619  $entity = $this->entity();
620 
621  // If nothing is returned, emit a U+0026 AMPERSAND character token.
622  // Otherwise, emit the character token that was returned.
623  $char = (!$entity) ? '&' : $entity;
624  $this->emitToken(
625  array(
626  'type' => self::CHARACTR,
627  'data' => $char
628  )
629  );
630 
631  // Finally, switch to the data state.
632  $this->state = 'data';
633  }
entity()
Definition: PH5P.php:1462
emitToken($token)
Definition: PH5P.php:1553
$char
Definition: PH5P.php:73
Create styles array
The data for the language used.

◆ entityInAttributeValueState() [1/2]

HTML5::entityInAttributeValueState ( )
private

Definition at line 748 of file PH5P.php.

References $char, emitToken(), and entity().

749  {
750  // Attempt to consume an entity.
751  $entity = $this->entity();
752 
753  // If nothing is returned, append a U+0026 AMPERSAND character to the
754  // current attribute's value. Otherwise, emit the character token that
755  // was returned.
756  $char = (!$entity)
757  ? '&'
758  : $entity;
759 
760  $this->emitToken($char);
761  }
entity()
Definition: PH5P.php:1462
emitToken($token)
Definition: PH5P.php:1553
$char
Definition: PH5P.php:73
+ Here is the call graph for this function:

◆ entityInAttributeValueState() [2/2]

HTML5::entityInAttributeValueState ( )
private

Definition at line 1168 of file PH5P.php.

Referenced by attributeValueDoubleQuotedState(), attributeValueSingleQuotedState(), and attributeValueUnquotedState().

1169  {
1170  // Attempt to consume an entity.
1171  $entity = $this->entity();
1172 
1173  // If nothing is returned, append a U+0026 AMPERSAND character to the
1174  // current attribute's value. Otherwise, emit the character token that
1175  // was returned.
1176  $char = (!$entity)
1177  ? '&'
1178  : $entity;
1179 
1180  $last = count($this->token['attr']) - 1;
1181  $this->token['attr'][$last]['value'] .= $char;
1182  }
entity()
Definition: PH5P.php:1462
$char
Definition: PH5P.php:73
+ Here is the caller graph for this function:

◆ EOF() [1/2]

HTML5::EOF ( )
private

Definition at line 1133 of file PH5P.php.

References array, and EOF.

1134  {
1135  $this->state = null;
1136  $this->tree->emitToken(array(
1137  'type' => self::EOF
1138  ));
1139  }
Create styles array
The data for the language used.
const EOF
How fgetc() reports an End Of File.
Definition: JSMin_lib.php:92

◆ EOF() [2/2]

HTML5::EOF ( )
private

Definition at line 1565 of file PH5P.php.

References array, and EOF.

Referenced by __construct(), afterAttributeNameState(), afterDoctypeNameState(), attributeNameState(), attributeValueDoubleQuotedState(), attributeValueSingleQuotedState(), beforeAttributeNameState(), beforeDoctypeNameState(), bogusCommentState(), bogusDoctypeState(), char(), character(), closeTagOpenState(), commentDashState(), commentEndState(), commentState(), dataState(), doctypeNameState(), HTML5TreeConstructer\initPhase(), HTML5TreeConstructer\mainPhase(), HTML5TreeConstructer\rootElementPhase(), tagNameState(), and HTML5TreeConstructer\trailingEndPhase().

1566  {
1567  $this->state = null;
1568  $this->tree->emitToken(
1569  array(
1570  'type' => self::EOF
1571  )
1572  );
1573  }
Create styles array
The data for the language used.
const EOF
How fgetc() reports an End Of File.
Definition: JSMin_lib.php:92
+ Here is the caller graph for this function:

◆ markupDeclarationOpenState() [1/2]

HTML5::markupDeclarationOpenState ( )
private

Definition at line 790 of file PH5P.php.

References array, and character().

791  {
792  /* If the next two characters are both U+002D HYPHEN-MINUS (-)
793  characters, consume those two characters, create a comment token whose
794  data is the empty string, and switch to the comment state. */
795  if($this->character($this->char + 1, 2) === '--') {
796  $this->char += 2;
797  $this->state = 'comment';
798  $this->token = array(
799  'data' => null,
800  'type' => self::COMMENT
801  );
802 
803  /* Otherwise if the next seven chacacters are a case-insensitive match
804  for the word "DOCTYPE", then consume those characters and switch to the
805  DOCTYPE state. */
806  } elseif(strtolower($this->character($this->char + 1, 7)) === 'doctype') {
807  $this->char += 7;
808  $this->state = 'doctype';
809 
810  /* Otherwise, is is a parse error. Switch to the bogus comment state.
811  The next character that is consumed, if any, is the first character
812  that will be in the comment. */
813  } else {
814  $this->char++;
815  $this->state = 'bogusComment';
816  }
817  }
character($s, $l=0)
Definition: PH5P.php:488
Create styles array
The data for the language used.
+ Here is the call graph for this function:

◆ markupDeclarationOpenState() [2/2]

HTML5::markupDeclarationOpenState ( )
private

Definition at line 1213 of file PH5P.php.

References array.

1214  {
1215  /* If the next two characters are both U+002D HYPHEN-MINUS (-)
1216  characters, consume those two characters, create a comment token whose
1217  data is the empty string, and switch to the comment state. */
1218  if ($this->character($this->char + 1, 2) === '--') {
1219  $this->char += 2;
1220  $this->state = 'comment';
1221  $this->token = array(
1222  'data' => null,
1223  'type' => self::COMMENT
1224  );
1225 
1226  /* Otherwise if the next seven chacacters are a case-insensitive match
1227  for the word "DOCTYPE", then consume those characters and switch to the
1228  DOCTYPE state. */
1229  } elseif (strtolower($this->character($this->char + 1, 7)) === 'doctype') {
1230  $this->char += 7;
1231  $this->state = 'doctype';
1232 
1233  /* Otherwise, is is a parse error. Switch to the bogus comment state.
1234  The next character that is consumed, if any, is the first character
1235  that will be in the comment. */
1236  } else {
1237  $this->char++;
1238  $this->state = 'bogusComment';
1239  }
1240  }
character($s, $l=0)
Definition: PH5P.php:488
Create styles array
The data for the language used.

◆ save() [1/2]

HTML5::save ( )

Definition at line 85 of file PH5P.php.

86  {
87  return $this->tree->save();
88  }

◆ save() [2/2]

HTML5::save ( )

Definition at line 476 of file PH5P.php.

477  {
478  return $this->tree->save();
479  }

◆ tagNameState() [1/2]

HTML5::tagNameState ( )
private

Definition at line 388 of file PH5P.php.

References $char, character(), emitToken(), and EOF().

389  {
390  // Consume the next input character:
391  $this->char++;
392  $char = $this->character($this->char);
393 
394  if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
395  /* U+0009 CHARACTER TABULATION
396  U+000A LINE FEED (LF)
397  U+000B LINE TABULATION
398  U+000C FORM FEED (FF)
399  U+0020 SPACE
400  Switch to the before attribute name state. */
401  $this->state = 'beforeAttributeName';
402 
403  } elseif($char === '>') {
404  /* U+003E GREATER-THAN SIGN (>)
405  Emit the current tag token. Switch to the data state. */
406  $this->emitToken($this->token);
407  $this->state = 'data';
408 
409  } elseif($this->char === $this->EOF) {
410  /* EOF
411  Parse error. Emit the current tag token. Reconsume the EOF
412  character in the data state. */
413  $this->emitToken($this->token);
414 
415  $this->char--;
416  $this->state = 'data';
417 
418  } elseif($char === '/') {
419  /* U+002F SOLIDUS (/)
420  Parse error unless this is a permitted slash. Switch to the before
421  attribute name state. */
422  $this->state = 'beforeAttributeName';
423 
424  } else {
425  /* Anything else
426  Append the current input character to the current tag token's tag name.
427  Stay in the tag name state. */
428  $this->token['name'] .= strtolower($char);
429  $this->state = 'tagName';
430  }
431  }
character($s, $l=0)
Definition: PH5P.php:488
EOF()
Definition: PH5P.php:1565
emitToken($token)
Definition: PH5P.php:1553
$char
Definition: PH5P.php:73
+ Here is the call graph for this function:

◆ tagNameState() [2/2]

HTML5::tagNameState ( )
private

Definition at line 808 of file PH5P.php.

References EOF.

809  {
810  // Consume the next input character:
811  $this->char++;
812  $char = $this->character($this->char);
813 
814  if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
815  /* U+0009 CHARACTER TABULATION
816  U+000A LINE FEED (LF)
817  U+000B LINE TABULATION
818  U+000C FORM FEED (FF)
819  U+0020 SPACE
820  Switch to the before attribute name state. */
821  $this->state = 'beforeAttributeName';
822 
823  } elseif ($char === '>') {
824  /* U+003E GREATER-THAN SIGN (>)
825  Emit the current tag token. Switch to the data state. */
826  $this->emitToken($this->token);
827  $this->state = 'data';
828 
829  } elseif ($this->char === $this->EOF) {
830  /* EOF
831  Parse error. Emit the current tag token. Reconsume the EOF
832  character in the data state. */
833  $this->emitToken($this->token);
834 
835  $this->char--;
836  $this->state = 'data';
837 
838  } elseif ($char === '/') {
839  /* U+002F SOLIDUS (/)
840  Parse error unless this is a permitted slash. Switch to the before
841  attribute name state. */
842  $this->state = 'beforeAttributeName';
843 
844  } else {
845  /* Anything else
846  Append the current input character to the current tag token's tag name.
847  Stay in the tag name state. */
848  $this->token['name'] .= strtolower($char);
849  $this->state = 'tagName';
850  }
851  }
character($s, $l=0)
Definition: PH5P.php:488
EOF()
Definition: PH5P.php:1565
emitToken($token)
Definition: PH5P.php:1553
$char
Definition: PH5P.php:73

◆ tagOpenState() [1/2]

HTML5::tagOpenState ( )
private

Definition at line 228 of file PH5P.php.

References $char, array, char(), character(), and emitToken().

229  {
230  switch($this->content_model) {
231  case self::RCDATA:
232  case self::CDATA:
233  /* If the next input character is a U+002F SOLIDUS (/) character,
234  consume it and switch to the close tag open state. If the next
235  input character is not a U+002F SOLIDUS (/) character, emit a
236  U+003C LESS-THAN SIGN character token and switch to the data
237  state to process the next input character. */
238  if($this->character($this->char + 1) === '/') {
239  $this->char++;
240  $this->state = 'closeTagOpen';
241 
242  } else {
243  $this->emitToken(array(
244  'type' => self::CHARACTR,
245  'data' => '<'
246  ));
247 
248  $this->state = 'data';
249  }
250  break;
251 
252  case self::PCDATA:
253  // If the content model flag is set to the PCDATA state
254  // Consume the next input character:
255  $this->char++;
256  $char = $this->char();
257 
258  if($char === '!') {
259  /* U+0021 EXCLAMATION MARK (!)
260  Switch to the markup declaration open state. */
261  $this->state = 'markupDeclarationOpen';
262 
263  } elseif($char === '/') {
264  /* U+002F SOLIDUS (/)
265  Switch to the close tag open state. */
266  $this->state = 'closeTagOpen';
267 
268  } elseif(preg_match('/^[A-Za-z]$/', $char)) {
269  /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z
270  Create a new start tag token, set its tag name to the lowercase
271  version of the input character (add 0x0020 to the character's code
272  point), then switch to the tag name state. (Don't emit the token
273  yet; further details will be filled in before it is emitted.) */
274  $this->token = array(
275  'name' => strtolower($char),
276  'type' => self::STARTTAG,
277  'attr' => array()
278  );
279 
280  $this->state = 'tagName';
281 
282  } elseif($char === '>') {
283  /* U+003E GREATER-THAN SIGN (>)
284  Parse error. Emit a U+003C LESS-THAN SIGN character token and a
285  U+003E GREATER-THAN SIGN character token. Switch to the data state. */
286  $this->emitToken(array(
287  'type' => self::CHARACTR,
288  'data' => '<>'
289  ));
290 
291  $this->state = 'data';
292 
293  } elseif($char === '?') {
294  /* U+003F QUESTION MARK (?)
295  Parse error. Switch to the bogus comment state. */
296  $this->state = 'bogusComment';
297 
298  } else {
299  /* Anything else
300  Parse error. Emit a U+003C LESS-THAN SIGN character token and
301  reconsume the current input character in the data state. */
302  $this->emitToken(array(
303  'type' => self::CHARACTR,
304  'data' => '<'
305  ));
306 
307  $this->char--;
308  $this->state = 'data';
309  }
310  break;
311  }
312  }
character($s, $l=0)
Definition: PH5P.php:488
char()
Definition: PH5P.php:481
emitToken($token)
Definition: PH5P.php:1553
$char
Definition: PH5P.php:73
Create styles array
The data for the language used.
+ Here is the call graph for this function:

◆ tagOpenState() [2/2]

HTML5::tagOpenState ( )
private

Definition at line 635 of file PH5P.php.

References array.

636  {
637  switch ($this->content_model) {
638  case self::RCDATA:
639  case self::CDATA:
640  /* If the next input character is a U+002F SOLIDUS (/) character,
641  consume it and switch to the close tag open state. If the next
642  input character is not a U+002F SOLIDUS (/) character, emit a
643  U+003C LESS-THAN SIGN character token and switch to the data
644  state to process the next input character. */
645  if ($this->character($this->char + 1) === '/') {
646  $this->char++;
647  $this->state = 'closeTagOpen';
648 
649  } else {
650  $this->emitToken(
651  array(
652  'type' => self::CHARACTR,
653  'data' => '<'
654  )
655  );
656 
657  $this->state = 'data';
658  }
659  break;
660 
661  case self::PCDATA:
662  // If the content model flag is set to the PCDATA state
663  // Consume the next input character:
664  $this->char++;
665  $char = $this->char();
666 
667  if ($char === '!') {
668  /* U+0021 EXCLAMATION MARK (!)
669  Switch to the markup declaration open state. */
670  $this->state = 'markupDeclarationOpen';
671 
672  } elseif ($char === '/') {
673  /* U+002F SOLIDUS (/)
674  Switch to the close tag open state. */
675  $this->state = 'closeTagOpen';
676 
677  } elseif (preg_match('/^[A-Za-z]$/', $char)) {
678  /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z
679  Create a new start tag token, set its tag name to the lowercase
680  version of the input character (add 0x0020 to the character's code
681  point), then switch to the tag name state. (Don't emit the token
682  yet; further details will be filled in before it is emitted.) */
683  $this->token = array(
684  'name' => strtolower($char),
685  'type' => self::STARTTAG,
686  'attr' => array()
687  );
688 
689  $this->state = 'tagName';
690 
691  } elseif ($char === '>') {
692  /* U+003E GREATER-THAN SIGN (>)
693  Parse error. Emit a U+003C LESS-THAN SIGN character token and a
694  U+003E GREATER-THAN SIGN character token. Switch to the data state. */
695  $this->emitToken(
696  array(
697  'type' => self::CHARACTR,
698  'data' => '<>'
699  )
700  );
701 
702  $this->state = 'data';
703 
704  } elseif ($char === '?') {
705  /* U+003F QUESTION MARK (?)
706  Parse error. Switch to the bogus comment state. */
707  $this->state = 'bogusComment';
708 
709  } else {
710  /* Anything else
711  Parse error. Emit a U+003C LESS-THAN SIGN character token and
712  reconsume the current input character in the data state. */
713  $this->emitToken(
714  array(
715  'type' => self::CHARACTR,
716  'data' => '<'
717  )
718  );
719 
720  $this->char--;
721  $this->state = 'data';
722  }
723  break;
724  }
725  }
character($s, $l=0)
Definition: PH5P.php:488
char()
Definition: PH5P.php:481
emitToken($token)
Definition: PH5P.php:1553
$char
Definition: PH5P.php:73
Create styles array
The data for the language used.

Field Documentation

◆ $char

◆ $content_model

HTML5::$content_model
private

Definition at line 78 of file PH5P.php.

◆ $data

HTML5::$data
private

◆ $entities

HTML5::$entities
private

Definition at line 80 of file PH5P.php.

◆ $EOF

HTML5::$EOF
private

Definition at line 74 of file PH5P.php.

◆ $escape

HTML5::$escape = false
private

Definition at line 79 of file PH5P.php.

◆ $state

HTML5::$state
private

Definition at line 75 of file PH5P.php.

◆ $token

◆ $tree

HTML5::$tree
private

Definition at line 76 of file PH5P.php.

◆ CDATA

const HTML5::CDATA = 2

Definition at line 451 of file PH5P.php.

Referenced by HTML5TreeConstructer\inBody(), and HTML5TreeConstructer\inHead().

◆ CHARACTR

◆ COMMENT

◆ DOCTYPE

◆ ENDTAG

◆ EOF

const HTML5::EOF = 5

Definition at line 459 of file PH5P.php.

◆ PCDATA

const HTML5::PCDATA = 0

Definition at line 449 of file PH5P.php.

Referenced by HTML5TreeConstructer\inHead().

◆ PLAINTEXT

const HTML5::PLAINTEXT = 3

Definition at line 452 of file PH5P.php.

Referenced by HTML5TreeConstructer\inBody().

◆ RCDATA

const HTML5::RCDATA = 1

Definition at line 450 of file PH5P.php.

Referenced by HTML5TreeConstructer\inBody(), and HTML5TreeConstructer\inHead().

◆ STARTTAG


The documentation for this class was generated from the following file: