33 return $matches[1] . htmlspecialchars($matches[2], ENT_COMPAT,
'UTF-8') . $matches[3];
47 if (
$config->get(
'HTML.Trusted')) {
48 $html = preg_replace_callback(
49 '#(<script[^>]*>)(\s*[^<].+?)(</script>)#si',
50 array($this,
'scriptCallback'),
62 $maintain_line_numbers =
$config->get(
'Core.MaintainLineNumbers');
64 if ($maintain_line_numbers === null) {
67 $maintain_line_numbers =
$config->get(
'Core.CollectErrors');
70 if ($maintain_line_numbers) {
73 $length = strlen(
$html);
75 $current_line =
false;
79 $context->register(
'CurrentLine', $current_line);
80 $context->register(
'CurrentCol', $current_col);
84 $synchronize_interval =
$config->get(
'Core.DirectLexLineNumberSyncInterval');
87 if (
$config->get(
'Core.CollectErrors')) {
88 $e =& $context->get(
'ErrorCollector');
99 if ($maintain_line_numbers) {
101 $rcursor = $cursor - (int)$inside_tag;
107 $nl_pos = strrpos(
$html, $nl, $rcursor - $length);
108 $current_col = $rcursor - (is_bool($nl_pos) ? 0 : $nl_pos + 1);
111 if ($synchronize_interval &&
113 $loops % $synchronize_interval === 0) {
118 $position_next_lt = strpos(
$html,
'<', $cursor);
119 $position_next_gt = strpos(
$html,
'>', $cursor);
123 if ($position_next_lt === $cursor) {
128 if (!$inside_tag && $position_next_lt !==
false) {
136 $position_next_lt - $cursor
140 if ($maintain_line_numbers) {
141 $token->rawPosition($current_line, $current_col);
142 $current_line += $this->
substrCount(
$html, $nl, $cursor, $position_next_lt - $cursor);
145 $cursor = $position_next_lt + 1;
148 } elseif (!$inside_tag) {
151 if ($cursor === strlen(
$html)) {
164 if ($maintain_line_numbers) {
165 $token->rawPosition($current_line, $current_col);
169 } elseif ($inside_tag && $position_next_gt !==
false) {
172 $strlen_segment = $position_next_gt - $cursor;
174 if ($strlen_segment < 1) {
181 $segment = substr(
$html, $cursor, $strlen_segment);
183 if ($segment ===
false) {
190 if (substr($segment, 0, 3) ===
'!--') {
192 $position_comment_end = strpos(
$html,
'-->', $cursor);
193 if ($position_comment_end ===
false) {
198 $e->send(E_WARNING,
'Lexer: Unclosed comment');
200 $position_comment_end = strlen(
$html);
205 $strlen_segment = $position_comment_end - $cursor;
206 $segment = substr(
$html, $cursor, $strlen_segment);
215 if ($maintain_line_numbers) {
216 $token->rawPosition($current_line, $current_col);
220 $cursor =
$end ? $position_comment_end : $position_comment_end + 3;
226 $is_end_tag = (strpos($segment,
'/') === 0);
228 $type = substr($segment, 1);
230 if ($maintain_line_numbers) {
231 $token->rawPosition($current_line, $current_col);
232 $current_line += $this->
substrCount(
$html, $nl, $cursor, $position_next_gt - $cursor);
236 $cursor = $position_next_gt + 1;
243 if (!ctype_alpha($segment[0])) {
246 $e->send(E_NOTICE,
'Lexer: Unescaped lt');
249 if ($maintain_line_numbers) {
250 $token->rawPosition($current_line, $current_col);
251 $current_line += $this->
substrCount(
$html, $nl, $cursor, $position_next_gt - $cursor);
262 $is_self_closing = (strrpos($segment,
'/') === $strlen_segment - 1);
263 if ($is_self_closing) {
265 $segment = substr($segment, 0, $strlen_segment);
269 $position_first_space = strcspn($segment, $this->_whitespace);
271 if ($position_first_space >= $strlen_segment) {
272 if ($is_self_closing) {
277 if ($maintain_line_numbers) {
278 $token->rawPosition($current_line, $current_col);
279 $current_line += $this->
substrCount(
$html, $nl, $cursor, $position_next_gt - $cursor);
283 $cursor = $position_next_gt + 1;
288 $type = substr($segment, 0, $position_first_space);
293 $position_first_space
296 if ($attribute_string) {
306 if ($is_self_closing) {
311 if ($maintain_line_numbers) {
312 $token->rawPosition($current_line, $current_col);
313 $current_line += $this->
substrCount(
$html, $nl, $cursor, $position_next_gt - $cursor);
316 $cursor = $position_next_gt + 1;
322 $e->send(E_WARNING,
'Lexer: Missing gt');
331 if ($maintain_line_numbers) {
332 $token->rawPosition($current_line, $current_col);
341 $context->destroy(
'CurrentLine');
342 $context->destroy(
'CurrentCol');
354 protected function substrCount($haystack, $needle, $offset, $length)
357 if ($oldVersion === null) {
358 $oldVersion = version_compare(PHP_VERSION,
'5.1',
'<');
361 $haystack = substr($haystack, $offset, $length);
362 return substr_count($haystack, $needle);
364 return substr_count($haystack, $needle, $offset, $length);
378 $string = (
string)$string;
385 if (
$config->get(
'Core.CollectErrors')) {
386 $e =& $context->get(
'ErrorCollector');
391 $num_equal = substr_count($string,
'=');
392 $has_space = strpos($string,
' ');
393 if ($num_equal === 0 && !$has_space) {
395 return array($string => $string);
396 } elseif ($num_equal === 1 && !$has_space) {
398 list(
$key, $quoted_value) = explode(
'=', $string);
399 $quoted_value = trim($quoted_value);
402 $e->send(E_ERROR,
'Lexer: Missing attribute key');
406 if (!$quoted_value) {
409 $first_char = @$quoted_value[0];
410 $last_char = @$quoted_value[strlen($quoted_value) - 1];
412 $same_quote = ($first_char == $last_char);
413 $open_quote = ($first_char ==
'"' || $first_char ==
"'");
415 if ($same_quote && $open_quote) {
417 $value = substr($quoted_value, 1, strlen($quoted_value) - 2);
422 $e->send(E_ERROR,
'Lexer: Missing end quote');
424 $value = substr($quoted_value, 1);
426 $value = $quoted_value;
429 if ($value ===
false) {
438 $size = strlen($string);
445 while ($cursor <
$size) {
446 if ($old_cursor >= $cursor) {
447 throw new Exception(
"Infinite loop detected");
449 $old_cursor = $cursor;
451 $cursor += ($value = strspn($string, $this->_whitespace, $cursor));
454 $key_begin = $cursor;
457 $cursor += strcspn($string, $this->_whitespace .
'=', $cursor);
461 $key = substr($string, $key_begin, $key_end - $key_begin);
465 $e->send(E_ERROR,
'Lexer: Missing attribute key');
467 $cursor += 1 + strcspn($string, $this->_whitespace, $cursor + 1);
472 $cursor += strspn($string, $this->_whitespace, $cursor);
474 if ($cursor >=
$size) {
481 $first_char = @$string[$cursor];
483 if ($first_char ==
'=') {
487 $cursor += strspn($string, $this->_whitespace, $cursor);
489 if ($cursor ===
false) {
496 $char = @$string[$cursor];
498 if ($char ==
'"' || $char ==
"'") {
501 $value_begin = $cursor;
502 $cursor = strpos($string, $char, $cursor);
503 $value_end = $cursor;
506 $value_begin = $cursor;
507 $cursor += strcspn($string, $this->_whitespace, $cursor);
508 $value_end = $cursor;
512 if ($cursor ===
false) {
514 $value_end = $cursor;
517 $value = substr($string, $value_begin, $value_end - $value_begin);
518 if ($value ===
false) {
530 $e->send(E_ERROR,
'Lexer: Missing attribute key');
$_whitespace
Whitespace characters for str(c)spn.
Concrete end token class.
parseText($string, $config)
Forgivingly lexes HTML (SGML-style) markup into tokens.
Concrete start token class.
Our in-house implementation of a parser.
substrCount($haystack, $needle, $offset, $length)
PHP 5.0.x compatible substr_count that implements offset and length.
normalize($html, $config, $context)
Takes a piece of HTML and normalizes it by converting entities, fixing encoding, extracting bits...
parseAttr($string, $config)
Create styles array
The data for the language used.
Concrete empty token class.
scriptCallback($matches)
Callback function for script CDATA fudge.
Concrete text token class.
tokenizeHTML($html, $config, $context)
parseAttributeString($string, $config, $context)
Takes the inside of an HTML tag and makes an assoc array of attributes.