15        trigger_error(
'Cannot instantiate encoder, call methods statically', E_USER_ERROR);
 
   34        set_error_handler(array(
'HTMLPurifier_Encoder', 
'muteErrorHandler'));
 
   36        restore_error_handler();
 
   51        if (
$code == self::ICONV_OK) {
 
   53        } elseif (
$code == self::ICONV_TRUNCATES) {
 
   57                if ($max_chunk_size < 4) {
 
   58                    trigger_error(
'max_chunk_size is too small', E_USER_WARNING);
 
   63                if (($c = strlen(
$text)) <= $max_chunk_size) {
 
   69                    if (
$i + $max_chunk_size >= $c) {
 
   74                    if (0x80 != (0xC0 & ord(
$text[
$i + $max_chunk_size]))) {
 
   75                        $chunk_size = $max_chunk_size;
 
   76                    } elseif (0x80 != (0xC0 & ord(
$text[
$i + $max_chunk_size - 1]))) {
 
   77                        $chunk_size = $max_chunk_size - 1;
 
   78                    } elseif (0x80 != (0xC0 & ord(
$text[
$i + $max_chunk_size - 2]))) {
 
   79                        $chunk_size = $max_chunk_size - 2;
 
   80                    } elseif (0x80 != (0xC0 & ord(
$text[
$i + $max_chunk_size - 3]))) {
 
   81                        $chunk_size = $max_chunk_size - 3;
 
   85                    $chunk = substr(
$text, 
$i, $chunk_size); 
 
  134    public static function cleanUTF8($str, $force_php = 
false)
 
  140            '/^[\x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]*$/Du',
 
  161        for (
$i = 0; 
$i < $len; 
$i++) {
 
  167                if (0 == (0x80 & (
$in))) {
 
  169                    if ((
$in <= 31 || 
$in == 127) &&
 
  179                } elseif (0xC0 == (0xE0 & (
$in))) {
 
  182                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
 
  185                } elseif (0xE0 == (0xF0 & (
$in))) {
 
  188                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
 
  191                } elseif (0xF0 == (0xF8 & (
$in))) {
 
  194                    $mUcs4 = ($mUcs4 & 0x07) << 18;
 
  197                } elseif (0xF8 == (0xFC & (
$in))) {
 
  208                    $mUcs4 = ($mUcs4 & 0x03) << 24;
 
  211                } elseif (0xFC == (0xFE & (
$in))) {
 
  215                    $mUcs4 = ($mUcs4 & 1) << 30;
 
  229                if (0x80 == (0xC0 & (
$in))) {
 
  231                    $shift = ($mState - 1) * 6;
 
  233                    $tmp = ($tmp & 0x0000003F) << $shift;
 
  236                    if (0 == --$mState) {
 
  243                        if (((2 == $mBytes) && ($mUcs4 < 0x0080)) ||
 
  244                            ((3 == $mBytes) && ($mUcs4 < 0x0800)) ||
 
  245                            ((4 == $mBytes) && ($mUcs4 < 0x10000)) ||
 
  248                            (($mUcs4 & 0xFFFFF800) == 0xD800) ||
 
  253                        } elseif (0xFEFF != $mUcs4 && 
 
  259                                (0x20 <= $mUcs4 && 0x7E >= $mUcs4) ||
 
  262                                (0xA0 <= $mUcs4 && 0xD7FF >= $mUcs4) ||
 
  263                                (0xE000 <= $mUcs4 && 0xFFFD >= $mUcs4) ||
 
  264                                (0x10000 <= $mUcs4 && 0x10FFFF >= $mUcs4)
 
  332                $y = ((
$code & 2047) >> 6) | 192;
 
  334                $y = ((
$code & 4032) >> 6) | 128;
 
  336                    $z = ((
$code >> 12) & 15) | 224;
 
  338                    $z = ((
$code >> 12) & 63) | 128;
 
  339                    $w = ((
$code >> 18) & 7)  | 240;
 
  364        static $iconv = 
null;
 
  365        if ($iconv === 
null) {
 
  380        $encoding = 
$config->get(
'Core.Encoding');
 
  381        if ($encoding === 
'utf-8') {
 
  384        static $iconv = 
null;
 
  385        if ($iconv === 
null) {
 
  388        if ($iconv && !
$config->get(
'Test.ForceNoIconv')) {
 
  391            if ($str === 
false) {
 
  393                trigger_error(
'Invalid encoding ' . $encoding, E_USER_ERROR);
 
  399            $str = strtr($str, self::testEncodingSupportsASCII($encoding));
 
  401        } elseif ($encoding === 
'iso-8859-1') {
 
  402            $str = utf8_encode($str);
 
  406        if ($bug == self::ICONV_OK) {
 
  407            trigger_error(
'Encoding not supported, please install iconv', E_USER_ERROR);
 
  410                'You have a buggy version of iconv, see https://bugs.php.net/bug.php?id=48147 ' .
 
  411                'and http://sourceware.org/bugzilla/show_bug.cgi?id=13541',
 
  428        $encoding = 
$config->get(
'Core.Encoding');
 
  429        if ($escape = 
$config->get(
'Core.EscapeNonASCIICharacters')) {
 
  432        if ($encoding === 
'utf-8') {
 
  435        static $iconv = 
null;
 
  436        if ($iconv === 
null) {
 
  439        if ($iconv && !
$config->get(
'Test.ForceNoIconv')) {
 
  442            if (!$escape && !empty($ascii_fix)) {
 
  443                $clear_fix = array();
 
  444                foreach ($ascii_fix as $utf8 => $native) {
 
  445                    $clear_fix[$utf8] = 
'';
 
  447                $str = strtr($str, $clear_fix);
 
  449            $str = strtr($str, array_flip($ascii_fix));
 
  451            $str = 
self::iconv(
'utf-8', $encoding . 
'//IGNORE', $str);
 
  453        } elseif ($encoding === 
'iso-8859-1') {
 
  454            $str = utf8_decode($str);
 
  457        trigger_error(
'Encoding not supported', E_USER_ERROR);
 
  486        for (
$i = 0; 
$i < $len; 
$i++) {
 
  487            $bytevalue = ord($str[
$i]);
 
  488            if ($bytevalue <= 0x7F) { 
 
  491            } elseif ($bytevalue <= 0xBF) { 
 
  492                $working = $working << 6;
 
  493                $working += ($bytevalue & 0x3F);
 
  495                if ($bytesleft <= 0) {
 
  496                    $result .= 
"&#" . $working . 
";";
 
  498            } elseif ($bytevalue <= 0xDF) { 
 
  499                $working = $bytevalue & 0x1F;
 
  501            } elseif ($bytevalue <= 0xEF) { 
 
  502                $working = $bytevalue & 0x0F;
 
  505                $working = $bytevalue & 0x07;
 
  540        if (
$code === 
null) {
 
  545            } elseif (($c = strlen(
$r)) < 9000) {
 
  547            } elseif ($c > 9000) {
 
  549                    'Your copy of iconv is extremely buggy. Please notify HTML Purifier maintainers: ' .
 
  550                    'include your iconv version as per phpversion()',
 
  578        static $encodings = array();
 
  580            if (isset($encodings[$encoding])) {
 
  581                return $encodings[$encoding];
 
  583            $lenc = strtolower($encoding);
 
  586                    return array(
"\xC2\xA5" => 
'\\', 
"\xE2\x80\xBE" => 
'~');
 
  588                    return array(
"\xE2\x82\xA9" => 
'\\');
 
  590            if (strpos($lenc, 
'iso-8859-') === 0) {
 
  595        if (self::unsafeIconv(
'UTF-8', $encoding, 
'a') === 
false) {
 
  598        for (
$i = 0x20; 
$i <= 0x7E; 
$i++) { 
 
  604                (
$r === $c && self::unsafeIconv($encoding, 
'UTF-8//IGNORE', 
$r) !== $c)
 
  612        $encodings[$encoding] = 
$ret;
 
if(php_sapi_name() !='cli') $in
An exception for terminatinating execution or to throw for unit testing.
A UTF-8 specific character encoder that handles cleaning and transforming.
static iconv($in, $out, $text, $max_chunk_size=8000)
iconv wrapper which mutes errors and works around bugs.
static unichr($code)
Translates a Unicode codepoint into its corresponding UTF-8 character.
const ICONV_TRUNCATES
Iconv truncates output if converting from UTF-8 to another character set with //IGNORE,...
__construct()
Constructor throws fatal error if you attempt to instantiate class.
static convertFromUTF8($str, $config, $context)
Converts a string from UTF-8 based on configuration.
static cleanUTF8($str, $force_php=false)
Cleans a UTF-8 string for well-formedness and SGML validity.
static convertToASCIIDumbLossless($str)
Lossless (character-wise) conversion of HTML to ASCII.
static convertToUTF8($str, $config, $context)
Convert a string to UTF-8 based on configuration.
static testEncodingSupportsASCII($encoding, $bypass=false)
This expensive function tests whether or not a given character encoding supports ASCII.
static muteErrorHandler()
Error-handler that mutes errors, alternative to shut-up operator.
const ICONV_UNUSABLE
Iconv does not support //IGNORE, making it unusable for transcoding purposes.
const ICONV_OK
No bugs detected in iconv.
static testIconvTruncateBug()
glibc iconv has a known bug where it doesn't handle the magic //IGNORE stanza correctly.
static unsafeIconv($in, $out, $text)
iconv wrapper which mutes errors, but doesn't work around bugs.