39 $semi_optional =
"quot|QUOT|lt|LT|gt|GT|amp|AMP|AElig|Aacute|Acirc|Agrave|Aring|Atilde|Auml|COPY|Ccedil|ETH|Eacute|Ecirc|Egrave|Euml|Iacute|Icirc|Igrave|Iuml|Ntilde|Oacute|Ocirc|Ograve|Oslash|Otilde|Ouml|REG|THORN|Uacute|Ucirc|Ugrave|Uuml|Yacute|aacute|acirc|acute|aelig|agrave|aring|atilde|auml|brvbar|ccedil|cedil|cent|copy|curren|deg|divide|eacute|ecirc|egrave|eth|euml|frac12|frac14|frac34|iacute|icirc|iexcl|igrave|iquest|iuml|laquo|macr|micro|middot|nbsp|not|ntilde|oacute|ocirc|ograve|ordf|ordm|oslash|otilde|ouml|para|plusmn|pound|raquo|reg|sect|shy|sup1|sup2|sup3|szlig|thorn|times|uacute|ucirc|ugrave|uml|uuml|yacute|yen|yuml";
43 $this->_semiOptionalPrefixRegex =
"/&()()()($semi_optional)/";
45 $this->_textEntitiesRegex =
48 '[#]x([a-fA-F0-9]+);?|'.
53 '([A-Za-z_:][A-Za-z0-9.\-_:]*);|'.
58 $this->_attrEntitiesRegex =
61 '[#]x([a-fA-F0-9]+);?|'.
66 '([A-Za-z_:][A-Za-z0-9.\-_:]*);|'.
70 "($semi_optional)(?![=;A-Za-z0-9])".
84 return preg_replace_callback(
85 $this->_textEntitiesRegex,
86 array($this,
'entityCallback'),
100 return preg_replace_callback(
101 $this->_attrEntitiesRegex,
102 array($this,
'entityCallback'),
118 $entity = $matches[0];
119 $hex_part = @$matches[1];
120 $dec_part = @$matches[2];
121 $named_part = empty($matches[3]) ? (empty($matches[4]) ?
"" : $matches[4]) : $matches[3];
122 if ($hex_part !== NULL && $hex_part !==
"") {
124 } elseif ($dec_part !== NULL && $dec_part !==
"") {
127 if (!$this->_entity_lookup) {
130 if (isset($this->_entity_lookup->table[$named_part])) {
131 return $this->_entity_lookup->table[$named_part];
137 if (!empty($matches[3])) {
138 return preg_replace_callback(
139 $this->_semiOptionalPrefixRegex,
140 array($this,
'entityCallback'),
156 '/&(?:[#]x([a-fA-F0-9]+)|[#]0*(\d+)|([A-Za-z_:][A-Za-z0-9.\-_:]*));?/';
195 return preg_replace_callback(
196 $this->_substituteEntitiesRegex,
197 array($this,
'nonSpecialEntityCallback'),
214 $entity = $matches[0];
215 $is_num = (@$matches[0][1] ===
'#');
217 $is_hex = (@$entity[2] ===
'x');
218 $code = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
220 if (isset($this->_special_dec2str[
$code])) {
225 if (isset($this->_special_ent2dec[$matches[3]])) {
228 if (!$this->_entity_lookup) {
231 if (isset($this->_entity_lookup->table[$matches[3]])) {
232 return $this->_entity_lookup->table[$matches[3]];
250 return preg_replace_callback(
251 $this->_substituteEntitiesRegex,
252 array($this,
'specialEntityCallback'),
269 $entity = $matches[0];
270 $is_num = (@$matches[0][1] ===
'#');
272 $is_hex = (@$entity[2] ===
'x');
273 $int = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
274 return isset($this->_special_dec2str[
$int]) ?
275 $this->_special_dec2str[
$int] :
278 return isset($this->_special_ent2dec[$matches[3]]) ?
279 $this->_special_dec2str[$this->_special_ent2dec[$matches[3]]] :
$_special_dec2str
Decimal to parsed string conversion table for special entities.
entityCallback($matches)
Callback function for substituteNonSpecialEntities() that does the work.
static unichr($code)
Translates a Unicode codepoint into its corresponding UTF-8 character.
$_substituteEntitiesRegex
Callback regex string for parsing entities.
$_textEntitiesRegex
Callback regex string for entities in text.
$_semiOptionalPrefixRegex
Tests if the beginning of a string is a semi-optional regex.
specialEntityCallback($matches)
Callback function for substituteSpecialEntities() that does the work.
$_special_ent2dec
Stripped entity names to decimal conversion table for special entities.
static instance($prototype=false)
Retrieves sole instance of the object.
substituteAttrEntities($string)
Substitute entities with the parsed equivalents.
substituteSpecialEntities($string)
Substitutes only special entities with their parsed equivalents.
Handles referencing and derefencing character entities.
nonSpecialEntityCallback($matches)
Callback function for substituteNonSpecialEntities() that does the work.
$_entity_lookup
Reference to entity lookup table.
substituteTextEntities($string)
Substitute entities with the parsed equivalents.
substituteNonSpecialEntities($string)
Substitutes non-special entities with their parsed equivalents.
$_attrEntitiesRegex
Callback regex string for entities in attributes.