33        return $matches[1] . htmlspecialchars($matches[2], ENT_COMPAT, 
'UTF-8') . $matches[3];
 
   47        if (
$config->get(
'HTML.Trusted')) {
 
   48            $html = preg_replace_callback(
 
   49                '#(<script[^>]*>)(\s*[^<].+?)(</script>)#si',
 
   50                array($this, 
'scriptCallback'),
 
   62        $maintain_line_numbers = 
$config->get(
'Core.MaintainLineNumbers');
 
   64        if ($maintain_line_numbers === 
null) {
 
   67            $maintain_line_numbers = 
$config->get(
'Core.CollectErrors');
 
   70        if ($maintain_line_numbers) {
 
   73            $length = strlen(
$html);
 
   75            $current_line = 
false;
 
   79        $context->register(
'CurrentLine', $current_line);
 
   80        $context->register(
'CurrentCol', $current_col);
 
   84        $synchronize_interval = 
$config->get(
'Core.DirectLexLineNumberSyncInterval');
 
   87        if (
$config->get(
'Core.CollectErrors')) {
 
   88            $e =& 
$context->get(
'ErrorCollector');
 
   99            if ($maintain_line_numbers) {
 
  101                $rcursor = $cursor - (int)$inside_tag;
 
  107                $nl_pos = strrpos(
$html, $nl, $rcursor - $length);
 
  108                $current_col = $rcursor - (is_bool($nl_pos) ? 0 : $nl_pos + 1);
 
  111                if ($synchronize_interval && 
 
  113                    $loops % $synchronize_interval === 0) { 
 
  118            $position_next_lt = strpos(
$html, 
'<', $cursor);
 
  119            $position_next_gt = strpos(
$html, 
'>', $cursor);
 
  123            if ($position_next_lt === $cursor) {
 
  128            if (!$inside_tag && $position_next_lt !== 
false) {
 
  136                            $position_next_lt - $cursor
 
  140                if ($maintain_line_numbers) {
 
  141                    $token->rawPosition($current_line, $current_col);
 
  142                    $current_line += $this->
substrCount(
$html, $nl, $cursor, $position_next_lt - $cursor);
 
  145                $cursor = $position_next_lt + 1;
 
  148            } elseif (!$inside_tag) {
 
  151                if ($cursor === strlen(
$html)) {
 
  164                if ($maintain_line_numbers) {
 
  165                    $token->rawPosition($current_line, $current_col);
 
  169            } elseif ($inside_tag && $position_next_gt !== 
false) {
 
  172                $strlen_segment = $position_next_gt - $cursor;
 
  174                if ($strlen_segment < 1) {
 
  181                $segment = substr(
$html, $cursor, $strlen_segment);
 
  183                if ($segment === 
false) {
 
  190                if (substr($segment, 0, 3) === 
'!--') {
 
  192                    $position_comment_end = strpos(
$html, 
'-->', $cursor);
 
  193                    if ($position_comment_end === 
false) {
 
  198                            $e->send(E_WARNING, 
'Lexer: Unclosed comment');
 
  200                        $position_comment_end = strlen(
$html);
 
  205                    $strlen_segment = $position_comment_end - $cursor;
 
  206                    $segment = substr(
$html, $cursor, $strlen_segment);
 
  215                    if ($maintain_line_numbers) {
 
  216                        $token->rawPosition($current_line, $current_col);
 
  220                    $cursor = 
$end ? $position_comment_end : $position_comment_end + 3;
 
  226                $is_end_tag = (strpos($segment, 
'/') === 0);
 
  228                    $type = substr($segment, 1);
 
  230                    if ($maintain_line_numbers) {
 
  231                        $token->rawPosition($current_line, $current_col);
 
  232                        $current_line += $this->
substrCount(
$html, $nl, $cursor, $position_next_gt - $cursor);
 
  236                    $cursor = $position_next_gt + 1;
 
  243                if (!ctype_alpha($segment[0])) {
 
  246                        $e->send(E_NOTICE, 
'Lexer: Unescaped lt');
 
  249                    if ($maintain_line_numbers) {
 
  250                        $token->rawPosition($current_line, $current_col);
 
  251                        $current_line += $this->
substrCount(
$html, $nl, $cursor, $position_next_gt - $cursor);
 
  262                $is_self_closing = (strrpos($segment, 
'/') === $strlen_segment - 1);
 
  263                if ($is_self_closing) {
 
  265                    $segment = substr($segment, 0, $strlen_segment);
 
  269                $position_first_space = strcspn($segment, $this->_whitespace);
 
  271                if ($position_first_space >= $strlen_segment) {
 
  272                    if ($is_self_closing) {
 
  277                    if ($maintain_line_numbers) {
 
  278                        $token->rawPosition($current_line, $current_col);
 
  279                        $current_line += $this->
substrCount(
$html, $nl, $cursor, $position_next_gt - $cursor);
 
  283                    $cursor = $position_next_gt + 1;
 
  288                $type = substr($segment, 0, $position_first_space);
 
  293                            $position_first_space
 
  296                if ($attribute_string) {
 
  306                if ($is_self_closing) {
 
  311                if ($maintain_line_numbers) {
 
  312                    $token->rawPosition($current_line, $current_col);
 
  313                    $current_line += $this->
substrCount(
$html, $nl, $cursor, $position_next_gt - $cursor);
 
  316                $cursor = $position_next_gt + 1;
 
  322                    $e->send(E_WARNING, 
'Lexer: Missing gt');
 
  331                if ($maintain_line_numbers) {
 
  332                    $token->rawPosition($current_line, $current_col);
 
  354    protected function substrCount($haystack, $needle, $offset, $length)
 
  357        if ($oldVersion === 
null) {
 
  358            $oldVersion = version_compare(PHP_VERSION, 
'5.1', 
'<');
 
  361            $haystack = substr($haystack, $offset, $length);
 
  362            return substr_count($haystack, $needle);
 
  364            return substr_count($haystack, $needle, $offset, $length);
 
  378        $string = (string)$string; 
 
  385        if (
$config->get(
'Core.CollectErrors')) {
 
  386            $e =& 
$context->get(
'ErrorCollector');
 
  391        $num_equal = substr_count($string, 
'=');
 
  392        $has_space = strpos($string, 
' ');
 
  393        if ($num_equal === 0 && !$has_space) {
 
  395            return array($string => $string);
 
  396        } elseif ($num_equal === 1 && !$has_space) {
 
  398            list(
$key, $quoted_value) = explode(
'=', $string);
 
  399            $quoted_value = trim($quoted_value);
 
  402                    $e->send(E_ERROR, 
'Lexer: Missing attribute key');
 
  406            if (!$quoted_value) {
 
  407                return array(
$key => 
'');
 
  409            $first_char = @$quoted_value[0];
 
  410            $last_char = @$quoted_value[strlen($quoted_value) - 1];
 
  412            $same_quote = ($first_char == $last_char);
 
  413            $open_quote = ($first_char == 
'"' || $first_char == 
"'");
 
  415            if ($same_quote && $open_quote) {
 
  417                $value = substr($quoted_value, 1, strlen($quoted_value) - 2);
 
  422                        $e->send(E_ERROR, 
'Lexer: Missing end quote');
 
  424                    $value = substr($quoted_value, 1);
 
  426                    $value = $quoted_value;
 
  429            if ($value === 
false) {
 
  438        $size = strlen($string); 
 
  445        while ($cursor < 
$size) {
 
  446            if ($old_cursor >= $cursor) {
 
  447                throw new Exception(
"Infinite loop detected");
 
  449            $old_cursor = $cursor;
 
  451            $cursor += ($value = strspn($string, $this->_whitespace, $cursor));
 
  454            $key_begin = $cursor; 
 
  457            $cursor += strcspn($string, $this->_whitespace . 
'=', $cursor);
 
  461            $key = substr($string, $key_begin, $key_end - $key_begin);
 
  465                    $e->send(E_ERROR, 
'Lexer: Missing attribute key');
 
  467                $cursor += 1 + strcspn($string, $this->_whitespace, $cursor + 1); 
 
  472            $cursor += strspn($string, $this->_whitespace, $cursor);
 
  474            if ($cursor >= 
$size) {
 
  481            $first_char = @$string[$cursor];
 
  483            if ($first_char == 
'=') {
 
  487                $cursor += strspn($string, $this->_whitespace, $cursor);
 
  489                if ($cursor === 
false) {
 
  496                $char = @$string[$cursor];
 
  498                if ($char == 
'"' || $char == 
"'") {
 
  501                    $value_begin = $cursor;
 
  502                    $cursor = strpos($string, $char, $cursor);
 
  503                    $value_end = $cursor;
 
  506                    $value_begin = $cursor;
 
  507                    $cursor += strcspn($string, $this->_whitespace, $cursor);
 
  508                    $value_end = $cursor;
 
  512                if ($cursor === 
false) {
 
  514                    $value_end = $cursor;
 
  517                $value = substr($string, $value_begin, $value_end - $value_begin);
 
  518                if ($value === 
false) {
 
  530                        $e->send(E_ERROR, 
'Lexer: Missing attribute key');
 
An exception for terminatinating execution or to throw for unit testing.
Our in-house implementation of a parser.
$_whitespace
Whitespace characters for str(c)spn.
$tracksLineNumbers
@type bool
parseAttributeString($string, $config, $context)
Takes the inside of an HTML tag and makes an assoc array of attributes.
substrCount($haystack, $needle, $offset, $length)
PHP 5.0.x compatible substr_count that implements offset and length.
scriptCallback($matches)
Callback function for script CDATA fudge.
tokenizeHTML($html, $config, $context)
Forgivingly lexes HTML (SGML-style) markup into tokens.
normalize($html, $config, $context)
Takes a piece of HTML and normalizes it by converting entities, fixing encoding, extracting bits,...
parseText($string, $config)
parseAttr($string, $config)
Concrete empty token class.
Concrete end token class.
Concrete start token class.
Concrete text token class.