14         trigger_error(
'Cannot instantiate encoder, call methods statically', E_USER_ERROR);
 
   47     public static function cleanUTF8($str, $force_php = 
false) {
 
   54         if (preg_match(
'/^[\x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]*$/Du', $str)) {
 
   73         for($i = 0; $i < $len; $i++) {
 
   79                 if (0 == (0x80 & (
$in))) {
 
   81                     if ((
$in <= 31 || 
$in == 127) &&
 
   91                 } elseif (0xC0 == (0xE0 & (
$in))) {
 
   94                     $mUcs4 = ($mUcs4 & 0x1F) << 6;
 
   97                 } elseif (0xE0 == (0xF0 & (
$in))) {
 
  100                     $mUcs4 = ($mUcs4 & 0x0F) << 12;
 
  103                 } elseif (0xF0 == (0xF8 & (
$in))) {
 
  106                     $mUcs4 = ($mUcs4 & 0x07) << 18;
 
  109                 } elseif (0xF8 == (0xFC & (
$in))) {
 
  120                     $mUcs4 = ($mUcs4 & 0x03) << 24;
 
  123                 } elseif (0xFC == (0xFE & (
$in))) {
 
  127                     $mUcs4 = ($mUcs4 & 1) << 30;
 
  141                 if (0x80 == (0xC0 & (
$in))) {
 
  143                     $shift = ($mState - 1) * 6;
 
  145                     $tmp = ($tmp & 0x0000003F) << $shift;
 
  148                     if (0 == --$mState) {
 
  155                         if (((2 == $mBytes) && ($mUcs4 < 0x0080)) ||
 
  156                             ((3 == $mBytes) && ($mUcs4 < 0x0800)) ||
 
  157                             ((4 == $mBytes) && ($mUcs4 < 0x10000)) ||
 
  160                             (($mUcs4 & 0xFFFFF800) == 0xD800) ||
 
  165                         } elseif (0xFEFF != $mUcs4 && 
 
  171                                 (0x20 <= $mUcs4 && 0x7E >= $mUcs4) ||
 
  174                                 (0xA0 <= $mUcs4 && 0xD7FF >= $mUcs4) ||
 
  175                                 (0x10000 <= $mUcs4 && 0x10FFFF >= $mUcs4)
 
  227         if($code > 1114111 or $code < 0 or
 
  228           ($code >= 55296 and $code <= 57343) ) {
 
  234         $x = $y = $z = $w = 0;
 
  240             $x = ($code & 63) | 128;
 
  242                 $y = (($code & 2047) >> 6) | 192;
 
  244                 $y = (($code & 4032) >> 6) | 128;
 
  246                     $z = (($code >> 12) & 15) | 224;
 
  248                     $z = (($code >> 12) & 63) | 128;
 
  249                     $w = (($code >> 18) & 7)  | 240;
 
  255         if($w) 
$ret .= chr($w);
 
  256         if($z) 
$ret .= chr($z);
 
  257         if($y) 
$ret .= chr($y);
 
  267         $encoding = 
$config->get(
'Core.Encoding');
 
  268         if ($encoding === 
'utf-8') 
return $str;
 
  269         static $iconv = null;
 
  270         if ($iconv === null) $iconv = function_exists(
'iconv');
 
  271         set_error_handler(array(
'HTMLPurifier_Encoder', 
'muteErrorHandler'));
 
  272         if ($iconv && !
$config->get(
'Test.ForceNoIconv')) {
 
  273             $str = iconv($encoding, 
'utf-8//IGNORE', $str);
 
  274             if ($str === 
false) {
 
  276                 restore_error_handler();
 
  277                 trigger_error(
'Invalid encoding ' . $encoding, E_USER_ERROR);
 
  284             restore_error_handler();
 
  286         } elseif ($encoding === 
'iso-8859-1') {
 
  287             $str = utf8_encode($str);
 
  288             restore_error_handler();
 
  291         trigger_error(
'Encoding not supported, please install iconv', E_USER_ERROR);
 
  300         $encoding = 
$config->get(
'Core.Encoding');
 
  301         if ($encoding === 
'utf-8') 
return $str;
 
  302         static $iconv = null;
 
  303         if ($iconv === null) $iconv = function_exists(
'iconv');
 
  304         if ($escape = 
$config->get(
'Core.EscapeNonASCIICharacters')) {
 
  307         set_error_handler(array(
'HTMLPurifier_Encoder', 
'muteErrorHandler'));
 
  308         if ($iconv && !
$config->get(
'Test.ForceNoIconv')) {
 
  311             if (!$escape && !empty($ascii_fix)) {
 
  312                 $clear_fix = array();
 
  313                 foreach ($ascii_fix as $utf8 => $native) $clear_fix[$utf8] = 
'';
 
  314                 $str = strtr($str, $clear_fix);
 
  316             $str = strtr($str, array_flip($ascii_fix));
 
  318             $str = iconv(
'utf-8', $encoding . 
'//IGNORE', $str);
 
  319             restore_error_handler();
 
  321         } elseif ($encoding === 
'iso-8859-1') {
 
  322             $str = utf8_decode($str);
 
  323             restore_error_handler();
 
  326         trigger_error(
'Encoding not supported', E_USER_ERROR);
 
  350         for( $i = 0; $i < $len; $i++ ) {
 
  351             $bytevalue = ord( $str[$i] );
 
  352             if( $bytevalue <= 0x7F ) { 
 
  355             } elseif( $bytevalue <= 0xBF ) { 
 
  356                 $working = $working << 6;
 
  357                 $working += ($bytevalue & 0x3F);
 
  359                 if( $bytesleft <= 0 ) {
 
  360                     $result .= 
"&#" . $working . 
";";
 
  362             } elseif( $bytevalue <= 0xDF ) { 
 
  363                 $working = $bytevalue & 0x1F;
 
  365             } elseif( $bytevalue <= 0xEF ) { 
 
  366                 $working = $bytevalue & 0x0F;
 
  369                 $working = $bytevalue & 0x07;
 
  388         static $encodings = array();
 
  390             if (isset($encodings[$encoding])) 
return $encodings[$encoding];
 
  391             $lenc = strtolower($encoding);
 
  394                     return array(
"\xC2\xA5" => 
'\\', 
"\xE2\x80\xBE" => 
'~');
 
  396                     return array(
"\xE2\x82\xA9" => 
'\\');
 
  398             if (strpos($lenc, 
'iso-8859-') === 0) 
return array();
 
  401         set_error_handler(array(
'HTMLPurifier_Encoder', 
'muteErrorHandler'));
 
  402         if (iconv(
'UTF-8', $encoding, 
'a') === 
false) 
return false;
 
  403         for ($i = 0x20; $i <= 0x7E; $i++) { 
 
  405             $r = iconv(
'UTF-8', 
"$encoding//IGNORE", $c); 
 
  410                 ($r === $c && iconv($encoding, 
'UTF-8//IGNORE', $r) !== $c)
 
  415                 $ret[iconv($encoding, 
'UTF-8//IGNORE', $c)] = $c;
 
  418         restore_error_handler();
 
  419         $encodings[$encoding] = 
$ret;