69 public static function create($config)
74 "Passing a prototype to
75 HTMLPurifier_Lexer::create() is deprecated, please instead
80 $lexer = $config->get(
'Core.LexerImpl');
84 $config->get(
'Core.MaintainLineNumbers') ||
85 $config->get(
'Core.CollectErrors');
88 if (is_object($lexer)) {
91 if (is_null($lexer)) {
94 if ($needs_tracking) {
99 if (class_exists(
'DOMDocument') &&
100 method_exists(
'DOMDocument',
'loadHTML') &&
101 !extension_loaded(
'domxml')
109 $lexer =
'DirectLex';
127 "Cannot instantiate unrecognized Lexer type " .
128 htmlspecialchars($lexer)
139 if ($needs_tracking && !$inst->tracksLineNumbers) {
141 'Cannot use lexer that does not support line numbers with ' .
142 'Core.MaintainLineNumbers or Core.CollectErrors (use DirectLex instead)'
189 if ($string ===
'') {
194 $num_amp = substr_count($string,
'&') - substr_count($string,
'& ') -
195 ($string[strlen($string) - 1] ===
'&' ? 1 : 0);
200 $num_esc_amp = substr_count($string,
'&');
201 $string = strtr($string, $this->_special_entity2str);
204 $num_amp_2 = substr_count($string,
'&') - substr_count($string,
'& ') -
205 ($string[strlen($string) - 1] ===
'&' ? 1 : 0);
207 if ($num_amp_2 <= $num_esc_amp) {
212 $string = $this->_entity_parser->substituteSpecialEntities($string);
225 trigger_error(
'Call to abstract class', E_USER_ERROR);
235 return preg_replace_callback(
236 '/<!\[CDATA\[(.+?)\]\]>/s',
237 array(
'HTMLPurifier_Lexer',
'CDATACallback'),
249 return preg_replace_callback(
250 '#<!--//--><!\[CDATA\[//><!--(.+?)//--><!\]\]>#s',
251 array(
'HTMLPurifier_Lexer',
'CDATACallback'),
264 '#<!--\[if [^>]+\]>.*?<!\[endif\]-->#si',
282 return htmlspecialchars($matches[1], ENT_COMPAT,
'UTF-8');
297 if ($config->get(
'Core.NormalizeNewlines')) {
298 $html = str_replace(
"\r\n",
"\n", $html);
299 $html = str_replace(
"\r",
"\n", $html);
302 if ($config->get(
'HTML.Trusted')) {
313 if ($config->get(
'Core.ConvertDocumentToFragment')) {
315 if ($config->get(
'Core.CollectErrors')) {
316 $e =& $context->get(
'ErrorCollector');
319 if ($e && $new_html != $html) {
320 $e->send(E_WARNING,
'Lexer: Extracted body');
326 $html = $this->_entity_parser->substituteNonSpecialEntities($html);
334 if ($config->get(
'Core.RemoveProcessingInstructions')) {
335 $html = preg_replace(
'#<\?.+?\?>#s',
'', $html);
348 $result = preg_match(
'!<body[^>]*>(.*)</body>!is', $html, $matches);