47 if (
$config->get(
'HTML.Trusted')) {
48 $html = preg_replace_callback(
49 '#(<script[^>]*>)(\s*[^<].+?)(</script>)#si',
50 array($this,
'scriptCallback'),
62 $maintain_line_numbers =
$config->get(
'Core.MaintainLineNumbers');
64 if ($maintain_line_numbers === null) {
67 $maintain_line_numbers =
$config->get(
'Core.CollectErrors');
70 if ($maintain_line_numbers) {
73 $length = strlen(
$html);
75 $current_line =
false;
79 $context->register(
'CurrentLine', $current_line);
80 $context->register(
'CurrentCol', $current_col);
84 $synchronize_interval =
$config->get(
'Core.DirectLexLineNumberSyncInterval');
87 if (
$config->get(
'Core.CollectErrors')) {
88 $e =& $context->get(
'ErrorCollector');
99 if ($maintain_line_numbers) {
101 $rcursor = $cursor - (int)$inside_tag;
107 $nl_pos = strrpos(
$html, $nl, $rcursor - $length);
108 $current_col = $rcursor - (is_bool($nl_pos) ? 0 : $nl_pos + 1);
111 if ($synchronize_interval &&
113 $loops % $synchronize_interval === 0) {
118 $position_next_lt = strpos(
$html,
'<', $cursor);
119 $position_next_gt = strpos(
$html,
'>', $cursor);
123 if ($position_next_lt === $cursor) {
128 if (!$inside_tag && $position_next_lt !==
false) {
136 $position_next_lt - $cursor
140 if ($maintain_line_numbers) {
141 $token->rawPosition($current_line, $current_col);
142 $current_line += $this->
substrCount(
$html, $nl, $cursor, $position_next_lt - $cursor);
145 $cursor = $position_next_lt + 1;
148 } elseif (!$inside_tag) {
151 if ($cursor === strlen(
$html)) {
164 if ($maintain_line_numbers) {
165 $token->rawPosition($current_line, $current_col);
169 } elseif ($inside_tag && $position_next_gt !==
false) {
172 $strlen_segment = $position_next_gt - $cursor;
174 if ($strlen_segment < 1) {
181 $segment = substr(
$html, $cursor, $strlen_segment);
183 if ($segment ===
false) {
190 if (substr($segment, 0, 3) ===
'!--') {
192 $position_comment_end = strpos(
$html,
'-->', $cursor);
193 if ($position_comment_end ===
false) {
198 $e->send(E_WARNING,
'Lexer: Unclosed comment');
200 $position_comment_end = strlen(
$html);
205 $strlen_segment = $position_comment_end - $cursor;
206 $segment = substr(
$html, $cursor, $strlen_segment);
215 if ($maintain_line_numbers) {
216 $token->rawPosition($current_line, $current_col);
220 $cursor =
$end ? $position_comment_end : $position_comment_end + 3;
226 $is_end_tag = (strpos($segment,
'/') === 0);
228 $type = substr($segment, 1);
230 if ($maintain_line_numbers) {
231 $token->rawPosition($current_line, $current_col);
232 $current_line += $this->
substrCount(
$html, $nl, $cursor, $position_next_gt - $cursor);
236 $cursor = $position_next_gt + 1;
243 if (!ctype_alpha($segment[0])) {
246 $e->send(E_NOTICE,
'Lexer: Unescaped lt');
249 if ($maintain_line_numbers) {
250 $token->rawPosition($current_line, $current_col);
251 $current_line += $this->
substrCount(
$html, $nl, $cursor, $position_next_gt - $cursor);
262 $is_self_closing = (strrpos($segment,
'/') === $strlen_segment - 1);
263 if ($is_self_closing) {
265 $segment = substr($segment, 0, $strlen_segment);
269 $position_first_space = strcspn($segment, $this->_whitespace);
271 if ($position_first_space >= $strlen_segment) {
272 if ($is_self_closing) {
277 if ($maintain_line_numbers) {
278 $token->rawPosition($current_line, $current_col);
279 $current_line += $this->
substrCount(
$html, $nl, $cursor, $position_next_gt - $cursor);
283 $cursor = $position_next_gt + 1;
288 $type = substr($segment, 0, $position_first_space);
293 $position_first_space
296 if ($attribute_string) {
306 if ($is_self_closing) {
311 if ($maintain_line_numbers) {
312 $token->rawPosition($current_line, $current_col);
313 $current_line += $this->
substrCount(
$html, $nl, $cursor, $position_next_gt - $cursor);
316 $cursor = $position_next_gt + 1;
322 $e->send(E_WARNING,
'Lexer: Missing gt');
331 if ($maintain_line_numbers) {
332 $token->rawPosition($current_line, $current_col);
341 $context->destroy(
'CurrentLine');
342 $context->destroy(
'CurrentCol');
Concrete end token class.
parseText($string, $config)
Concrete start token class.
substrCount($haystack, $needle, $offset, $length)
PHP 5.0.x compatible substr_count that implements offset and length.
normalize($html, $config, $context)
Takes a piece of HTML and normalizes it by converting entities, fixing encoding, extracting bits...
Create styles array
The data for the language used.
Concrete empty token class.
Concrete text token class.
parseAttributeString($string, $config, $context)
Takes the inside of an HTML tag and makes an assoc array of attributes.