32     '/&([A-Za-z0-9\x80-\xff]+);    45 $space = 
'[\x09\x0a\x0d\x20]';
    48     "/(?:^|$space)($attrib+)    51                  # The attribute value: quoted or alone    54                  |  ([a-zA-Z0-9!#$%&()*,\\-.\\/:;<>?@[\\]^_`{|}~]+)    55                  |  (\#[0-9a-fA-F]+) # Technically wrong, but lots of    56                                                          # colors are specified like this.    57                                                          # We'll be normalizing it.   333     if ($codepoint < 0x80) {
   334         return chr($codepoint);
   336     if ($codepoint < 0x800) {
   337         return chr($codepoint >> 6 & 0x3f | 0xc0) .
   338             chr($codepoint & 0x3f | 0x80);
   340     if ($codepoint < 0x10000) {
   341         return chr($codepoint >> 12 & 0x0f | 0xe0) .
   342             chr($codepoint >> 6 & 0x3f | 0x80) .
   343             chr($codepoint & 0x3f | 0x80);
   345     if ($codepoint < 0x110000) {
   346         return chr($codepoint >> 18 & 0x07 | 0xf0) .
   347             chr($codepoint >> 12 & 0x3f | 0x80) .
   348             chr($codepoint >> 6 & 0x3f | 0x80) .
   349             chr($codepoint & 0x3f | 0x80);
   368         return ($codepoint == 0x09)
   369             || ($codepoint == 0x0a)
   370             || ($codepoint == 0x0d)
   371             || ($codepoint >= 0x20 && $codepoint <= 0xd7ff)
   372             || ($codepoint >= 0xe000 && $codepoint <= 0xfffd)
   373             || ($codepoint >= 0x10000 && $codepoint <= 0x10ffff);
   387         return preg_replace_callback(
   389             array( 
'Sanitizer', 
'decodeCharReferencesCallback' ),
   400         if ($matches[1] != 
'') {
   402         } elseif ($matches[2] != 
'') {
   404         } elseif ($matches[3] != 
'') {
   406         } elseif ($matches[4] != 
'') {
   409         # Last case should be an ampersand by itself   426             return UTF8_REPLACEMENT;
   442         if (isset($wgHtmlEntityAliases[$name])) {
   443             $name = $wgHtmlEntityAliases[$name];
   445         if (isset($wgHtmlEntities[$name])) {
 global $wgHtmlEntities
List of all named character entities defined in HTML 4.01 http://www.w3.org/TR/html4/sgml/entities.html. 
 
global $wgHtmlEntityAliases
Character entity aliases accepted by MediaWiki. 
 
static decodeCharReferencesCallback($matches)
 
const MW_CHAR_REFS_REGEX
Regular expression to match various types of character references in Sanitizer::normalizeCharReferenc...
 
static decodeCharReferences($text)
Decode any character references, numeric or named entities, in the text and return a UTF-8 string...
 
static validateCodepoint($codepoint)
Returns true if a given Unicode codepoint is a valid character in XML. 
 
static decodeChar($codepoint)
Return UTF-8 string for a codepoint if that is a valid character reference, otherwise U+FFFD REPLACEM...
 
codepointToUtf8($codepoint)
 
static decodeEntity($name)
If the named entity is defined in the HTML 4.0/XHTML 1.0 DTD, return the UTF-8 encoding of that chara...
 
$attrib
Regular expression to match HTML/XML attribute pairs within a tag.