72 trigger_error(
"Passing a prototype to
73 HTMLPurifier_Lexer::create() is deprecated, please instead
74 use %Core.LexerImpl", E_USER_WARNING);
76 $lexer =
$config->get(
'Core.LexerImpl');
80 $config->get(
'Core.MaintainLineNumbers') ||
81 $config->get(
'Core.CollectErrors');
84 if (is_object($lexer)) {
88 if (is_null($lexer)) {
do {
91 if ($needs_tracking) {
97 class_exists(
'DOMDocument') &&
98 method_exists(
'DOMDocument',
'loadHTML') &&
99 !extension_loaded(
'domxml')
107 $lexer =
'DirectLex';
124 throw new HTMLPurifier_Exception(
"Cannot instantiate unrecognized Lexer type " . htmlspecialchars($lexer));
132 if ($needs_tracking && !$inst->tracksLineNumbers) {
133 throw new HTMLPurifier_Exception(
'Cannot use lexer that does not support line numbers with Core.MaintainLineNumbers or Core.CollectErrors (use DirectLex instead)');
177 if ($string ===
'')
return '';
180 $num_amp = substr_count($string,
'&') - substr_count($string,
'& ') -
181 ($string[strlen($string)-1] ===
'&' ? 1 : 0);
183 if (!$num_amp)
return $string;
184 $num_esc_amp = substr_count($string,
'&');
185 $string = strtr($string, $this->_special_entity2str);
188 $num_amp_2 = substr_count($string,
'&') - substr_count($string,
'& ') -
189 ($string[strlen($string)-1] ===
'&' ? 1 : 0);
191 if ($num_amp_2 <= $num_esc_amp)
return $string;
194 $string = $this->_entity_parser->substituteSpecialEntities($string);
205 trigger_error(
'Call to abstract class', E_USER_ERROR);
215 return preg_replace_callback(
216 '/<!\[CDATA\[(.+?)\]\]>/s',
217 array(
'HTMLPurifier_Lexer',
'CDATACallback'),
226 return preg_replace_callback(
227 '#<!--//--><!\[CDATA\[//><!--(.+?)//--><!\]\]>#s',
228 array(
'HTMLPurifier_Lexer',
'CDATACallback'),
238 '#<!--\[if [^>]+\]>.*?<!\[endif\]-->#si',
255 return htmlspecialchars($matches[1], ENT_COMPAT,
'UTF-8');
266 if (
$config->get(
'Core.NormalizeNewlines')) {
267 $html = str_replace(
"\r\n",
"\n", $html);
268 $html = str_replace(
"\r",
"\n", $html);
271 if (
$config->get(
'HTML.Trusted')) {
282 if (
$config->get(
'Core.ConvertDocumentToFragment')) {
284 if (
$config->get(
'Core.CollectErrors')) {
285 $e =& $context->get(
'ErrorCollector');
288 if ($e && $new_html != $html) {
289 $e->send(E_WARNING,
'Lexer: Extracted body');
295 $html = $this->_entity_parser->substituteNonSpecialEntities($html);
303 if (
$config->get(
'Core.RemoveProcessingInstructions')) {
304 $html = preg_replace(
'#<\?.+?\?>#s',
'', $html);
316 $result = preg_match(
'!<body[^>]*>(.*)</body>!is', $html, $matches);