47 if (
$config->get(
'HTML.Trusted')) {
48 $html = preg_replace_callback(
49 '#(<script[^>]*>)(\s*[^<].+?)(</script>)#si',
50 array($this,
'scriptCallback'),
62 $maintain_line_numbers =
$config->get(
'Core.MaintainLineNumbers');
64 if ($maintain_line_numbers === null) {
67 $maintain_line_numbers =
$config->get(
'Core.CollectErrors');
70 if ($maintain_line_numbers) {
73 $length = strlen(
$html);
75 $current_line =
false;
79 $context->register(
'CurrentLine', $current_line);
80 $context->register(
'CurrentCol', $current_col);
84 $synchronize_interval =
$config->get(
'Core.DirectLexLineNumberSyncInterval');
87 if (
$config->get(
'Core.CollectErrors')) {
88 $e =&
$context->get(
'ErrorCollector');
99 if ($maintain_line_numbers) {
101 $rcursor = $cursor - (int)$inside_tag;
107 $nl_pos = strrpos(
$html, $nl, $rcursor - $length);
108 $current_col = $rcursor - (is_bool($nl_pos) ? 0 : $nl_pos + 1);
111 if ($synchronize_interval &&
113 $loops % $synchronize_interval === 0) {
118 $position_next_lt = strpos(
$html,
'<', $cursor);
119 $position_next_gt = strpos(
$html,
'>', $cursor);
123 if ($position_next_lt === $cursor) {
128 if (!$inside_tag && $position_next_lt !==
false) {
136 $position_next_lt - $cursor
140 if ($maintain_line_numbers) {
141 $token->rawPosition($current_line, $current_col);
142 $current_line += $this->
substrCount(
$html, $nl, $cursor, $position_next_lt - $cursor);
145 $cursor = $position_next_lt + 1;
148 } elseif (!$inside_tag) {
151 if ($cursor === strlen(
$html)) {
164 if ($maintain_line_numbers) {
165 $token->rawPosition($current_line, $current_col);
169 } elseif ($inside_tag && $position_next_gt !==
false) {
172 $strlen_segment = $position_next_gt - $cursor;
174 if ($strlen_segment < 1) {
181 $segment = substr(
$html, $cursor, $strlen_segment);
183 if ($segment ===
false) {
190 if (substr($segment, 0, 3) ===
'!--') {
192 $position_comment_end = strpos(
$html,
'-->', $cursor);
193 if ($position_comment_end ===
false) {
198 $e->send(E_WARNING,
'Lexer: Unclosed comment');
200 $position_comment_end = strlen(
$html);
205 $strlen_segment = $position_comment_end - $cursor;
206 $segment = substr(
$html, $cursor, $strlen_segment);
215 if ($maintain_line_numbers) {
216 $token->rawPosition($current_line, $current_col);
220 $cursor =
$end ? $position_comment_end : $position_comment_end + 3;
226 $is_end_tag = (strpos($segment,
'/') === 0);
228 $type = substr($segment, 1);
230 if ($maintain_line_numbers) {
231 $token->rawPosition($current_line, $current_col);
232 $current_line += $this->
substrCount(
$html, $nl, $cursor, $position_next_gt - $cursor);
236 $cursor = $position_next_gt + 1;
243 if (!ctype_alpha($segment[0])) {
246 $e->send(E_NOTICE,
'Lexer: Unescaped lt');
249 if ($maintain_line_numbers) {
250 $token->rawPosition($current_line, $current_col);
251 $current_line += $this->
substrCount(
$html, $nl, $cursor, $position_next_gt - $cursor);
262 $is_self_closing = (strrpos($segment,
'/') === $strlen_segment - 1);
263 if ($is_self_closing) {
265 $segment = substr($segment, 0, $strlen_segment);
269 $position_first_space = strcspn($segment, $this->_whitespace);
271 if ($position_first_space >= $strlen_segment) {
272 if ($is_self_closing) {
277 if ($maintain_line_numbers) {
278 $token->rawPosition($current_line, $current_col);
279 $current_line += $this->
substrCount(
$html, $nl, $cursor, $position_next_gt - $cursor);
283 $cursor = $position_next_gt + 1;
288 $type = substr($segment, 0, $position_first_space);
293 $position_first_space
296 if ($attribute_string) {
306 if ($is_self_closing) {
311 if ($maintain_line_numbers) {
312 $token->rawPosition($current_line, $current_col);
313 $current_line += $this->
substrCount(
$html, $nl, $cursor, $position_next_gt - $cursor);
316 $cursor = $position_next_gt + 1;
322 $e->send(E_WARNING,
'Lexer: Missing gt');
331 if ($maintain_line_numbers) {
332 $token->rawPosition($current_line, $current_col);
Concrete end token class.
parseText($string, $config)
Concrete start token class.
substrCount($haystack, $needle, $offset, $length)
PHP 5.0.x compatible substr_count that implements offset and length.
normalize($html, $config, $context)
Takes a piece of HTML and normalizes it by converting entities, fixing encoding, extracting bits...
Concrete empty token class.
Concrete text token class.
parseAttributeString($string, $config, $context)
Takes the inside of an HTML tag and makes an assoc array of attributes.