24 '/&([A-Za-z0-9\x80-\xff]+); 37 $space =
'[\x09\x0a\x0d\x20]';
40 "/(?:^|$space)($attrib+) 43 # The attribute value: quoted or alone 46 | ([a-zA-Z0-9!#$%&()*,\\-.\\/:;<>?@[\\]^_`{|}~]+) 47 | (\#[0-9a-fA-F]+) # Technically wrong, but lots of 48 # colors are specified like this. 49 # We'll be normalizing it. 325 if ($codepoint < 0x80) {
326 return chr($codepoint);
328 if ($codepoint < 0x800) {
329 return chr($codepoint >> 6 & 0x3f | 0xc0) .
330 chr($codepoint & 0x3f | 0x80);
332 if ($codepoint < 0x10000) {
333 return chr($codepoint >> 12 & 0x0f | 0xe0) .
334 chr($codepoint >> 6 & 0x3f | 0x80) .
335 chr($codepoint & 0x3f | 0x80);
337 if ($codepoint < 0x110000) {
338 return chr($codepoint >> 18 & 0x07 | 0xf0) .
339 chr($codepoint >> 12 & 0x3f | 0x80) .
340 chr($codepoint >> 6 & 0x3f | 0x80) .
341 chr($codepoint & 0x3f | 0x80);
360 return ($codepoint == 0x09)
361 || ($codepoint == 0x0a)
362 || ($codepoint == 0x0d)
363 || ($codepoint >= 0x20 && $codepoint <= 0xd7ff)
364 || ($codepoint >= 0xe000 && $codepoint <= 0xfffd)
365 || ($codepoint >= 0x10000 && $codepoint <= 0x10ffff);
379 return preg_replace_callback(
381 array(
'Sanitizer',
'decodeCharReferencesCallback' ),
392 if ($matches[1] !=
'') {
394 } elseif ($matches[2] !=
'') {
396 } elseif ($matches[3] !=
'') {
398 } elseif ($matches[4] !=
'') {
401 # Last case should be an ampersand by itself 418 return UTF8_REPLACEMENT;
434 if (isset($wgHtmlEntityAliases[$name])) {
435 $name = $wgHtmlEntityAliases[$name];
437 if (isset($wgHtmlEntities[$name])) {
global $wgHtmlEntities
List of all named character entities defined in HTML 4.01 http://www.w3.org/TR/html4/sgml/entities.html.
global $wgHtmlEntityAliases
Character entity aliases accepted by MediaWiki.
static decodeCharReferencesCallback($matches)
const MW_CHAR_REFS_REGEX
This file is part of ILIAS, a powerful learning management system published by ILIAS open source e-Le...
static decodeCharReferences($text)
Decode any character references, numeric or named entities, in the text and return a UTF-8 string...
static validateCodepoint($codepoint)
Returns true if a given Unicode codepoint is a valid character in XML.
static decodeChar($codepoint)
Return UTF-8 string for a codepoint if that is a valid character reference, otherwise U+FFFD REPLACEM...
codepointToUtf8($codepoint)
static decodeEntity($name)
If the named entity is defined in the HTML 4.0/XHTML 1.0 DTD, return the UTF-8 encoding of that chara...
$attrib
Regular expression to match HTML/XML attribute pairs within a tag.