2# Copyright (C) 2004 Brion Vibber <brion@pobox.com> 
    5# This program is free software; you can redistribute it and/or modify 
    6# it under the terms of the GNU General Public License as published by 
    7# the Free Software Foundation; either version 2 of the License, or 
    8# (at your option) any later version. 
   10# This program is distributed in the hope that it will be useful, 
   11# but WITHOUT ANY WARRANTY; without even the implied warranty of 
   12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 
   13# GNU General Public License for more details. 
   15# You should have received a copy of the GNU General Public License along 
   16# with this program; if not, write to the Free Software Foundation, Inc., 
   17# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 
   21if (php_sapi_name() != 
'cli') {
 
   22    die(
"Run me from the command line please.\n");
 
   27    dl(
'php_utfnormal.so');
 
   30#ini_set( 'memory_limit', '40M' ); 
   32require_once 
'PHPUnit/Framework.php';
 
   33require_once 
'PHPUnit/TextUI/TestRunner.php';
 
   35require_once 
'include/Unicode/UtfNormal.php';
 
   60        $text = 
'This is plain ASCII text.';
 
   67        $text = 
"a \x00 null";
 
   68        $expect = 
"a \xef\xbf\xbd null";
 
   78        $text = 
"L'\xc3\xa9cole";
 
   85        $text = 
"L'e\xcc\x81cole";
 
   86        $expect = 
"L'\xc3\xa9cole";
 
  101            $x = sprintf(
"%04X", 
$i);
 
  102            if (
$i % 0x1000 == 0) {
 
  116                        "U+$x should be decomposed" 
  122                        "U+$x should be intact" 
  126                $this->assertEquals(bin2hex($rep), bin2hex($clean), 
$x);
 
  143        for (
$i = 0x0; 
$i < 256; 
$i++) {
 
  144            $char = $head . chr(
$i) . 
$tail;
 
  146            $x = sprintf(
"%02X", 
$i);
 
  150                (
$i > 0x001f && 
$i < 0x80)) {
 
  154                    "ASCII byte $x should be intact" 
  156                if ($char != $clean) {
 
  164                    "Forbidden byte $x should be rejected" 
  166                if ($norm != $clean) {
 
  187        for ($first = 0xc0; $first < 0x100; $first++) {
 
  188            for ($second = 0x80; $second < 0x100; $second++) {
 
  189                $char = $head . chr($first) . chr($second) . 
$tail;
 
  191                $x = sprintf(
"%02X,%02X", $first, $second);
 
  199                        "Pair $x should be intact" 
  201                    if ($norm != $clean) {
 
  204                } elseif ($first > 0xfd || $second > 0xbf) {
 
  205                    # fe and ff are not legal head bytes -- expect two replacement chars 
  210                        "Forbidden pair $x should be rejected" 
  212                    if ($norm != $clean) {
 
  220                        "Forbidden pair $x should be rejected" 
  222                    if ($norm != $clean) {
 
  242        for ($first = 0xc0; $first < 0x100; $first++) {
 
  243            for ($second = 0x80; $second < 0x100; $second++) {
 
  244                #for( $third = 0x80; $third < 0x100; $third++ ) { 
  245                for ($third = 0x80; $third < 0x81; $third++) {
 
  246                    $char = $head . chr($first) . chr($second) . chr($third) . 
$tail;
 
  248                    $x = sprintf(
"%02X,%02X,%02X", $first, $second, $third);
 
  249                    if ($first >= 0xe0 &&
 
  253                        if ($first == 0xe0 && $second < 0xa0) {
 
  257                                "Overlong triplet $x should be rejected" 
  259                        } elseif ($first == 0xed &&
 
  264                                "Surrogate triplet $x should be rejected" 
  270                                "Triplet $x should be intact" 
  273                    } elseif ($first > 0xc1 && $first < 0xe0 && $second < 0xc0) {
 
  277                            "Valid 2-byte $x + broken tail" 
  279                    } elseif ($second > 0xc1 && $second < 0xe0 && $third < 0xc0) {
 
  283                            "Broken head + valid 2-byte $x" 
  285                    } elseif (($first > 0xfd || $second > 0xfd) &&
 
  286                                (($second > 0xbf && $third > 0xbf) ||
 
  287                                  ($second < 0xc0 && $third < 0xc0) ||
 
  290                        # fe and ff are not legal head bytes -- expect three replacement chars 
  294                            "Forbidden triplet $x should be rejected" 
  296                    } elseif ($first > 0xc2 && $second < 0xc0 && $third < 0xc0) {
 
  300                            "Forbidden triplet $x should be rejected" 
  306                            "Forbidden triplet $x should be rejected" 
  317        # Check for regression against a chunking bug 
  318        $text = 
"\x46\x55\xb8" .
 
  325        $expect = 
"\x46\x55\xef\xbf\xbd" .
 
  356        $expect = 
"\x4e\x30" .
 
  380                  "\x1a" . # forbidden ascii
 
  382                  "\xc1\xa6" . # overlong sequence
 
  384                  "\x1c" . # forbidden ascii
 
  406        $text = 
"\xed\xb4\x96" . # surrogate 0xDD16
 
  410        $expect = 
"\xef\xbf\xbd" .
 
  423        $text = 
"\xef\xbf\xbe" . # U+FFFE, illegal 
char 
  427        $expect = 
"\xef\xbf\xbd" .
 
  440        $text = 
"\xef\xbf\xbf"; # U+FFFF, illegal 
char 
  441        $expect = 
"\xef\xbf\xbd";
 
  451        $text = 
"\xed\x9c\xaf" . # Hangul 
char 
  452                "\xe1\x87\x81";  # followed by another 
final jamo
 
  453        $expect = 
$text;         # Should *not* change.
 
  465if (!
$result->wasSuccessful()) {
 
codepointToUtf8($codepoint)
Return UTF-8 sequence for a given Unicode code point.
const UNICODE_SURROGATE_FIRST
const UTF8_SURROGATE_FIRST
global $utfCanonicalDecomp
const UNICODE_SURROGATE_LAST
An exception for terminatinating execution or to throw for unit testing.
XtestAllChars()
This test is very expensive!
doTestDoubleBytes($head, $tail)
testSurrogateRegression()
doTestBytes($head, $tail)
testForbiddenRegression()
testInterposeRegression()
doTestTripleBytes($head, $tail)
static cleanUp($string)
The ultimate convenience function! Clean up invalid UTF-8 sequences, and convert to normal form C,...
if((!isset($_SERVER['DOCUMENT_ROOT'])) OR(empty($_SERVER['DOCUMENT_ROOT']))) $_SERVER['DOCUMENT_ROOT']