d9/d3d/CleanUpTest_8php_source.html

<?php

# Copyright (C) 2004 Brion Vibber <brion@pobox.com>

# http://www.mediawiki.org/

#

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

#

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

#

# You should have received a copy of the GNU General Public License along

# with this program; if not, write to the Free Software Foundation, Inc.,

# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

# http://www.gnu.org/copyleft/gpl.html


if (php_sapi_name() != 'cli') {

    die("Run me from the command line please.\n");

}


if (isset($_SERVER['argv']) && in_array('--icu', $_SERVER['argv'])) {

    dl('php_utfnormal.so');

}


#ini_set( 'memory_limit', '40M' );


require_once 'PHPUnit/Framework.php';

require_once 'PHPUnit/TextUI/TestRunner.php';


require_once 'include/Unicode/UtfNormal.php';


class CleanUpTest extends PHPUnit_Framework_TestCase

{

    public function setUp()

    {

    }


    public function tearDown()

    {

    }


    public function testAscii()

    {

        $text = 'This is plain ASCII text.';

        $this->assertEquals($text, UtfNormal::cleanUp($text));

    }


    public function testNull()

    {

        $text = "a \x00 null";

        $expect = "a \xef\xbf\xbd null";

        $this->assertEquals(

            bin2hex($expect),

            bin2hex(UtfNormal::cleanUp($text))

        );

    }


    public function testLatin()

    {

        $text = "L'\xc3\xa9cole";

        $this->assertEquals($text, UtfNormal::cleanUp($text));

    }


    public function testLatinNormal()

    {

        $text = "L'e\xcc\x81cole";

        $expect = "L'\xc3\xa9cole";

        $this->assertEquals($expect, UtfNormal::cleanUp($text));

    }


    public function XtestAllChars()

    {

        $rep = UTF8_REPLACEMENT;

        global $utfCanonicalComp, $utfCanonicalDecomp;

        for ($i = 0x0; $i < UNICODE_MAX; $i++) {

            $char = codepointToUtf8($i);

            $clean = UtfNormal::cleanUp($char);

            $x = sprintf("%04X", $i);

            if ($i % 0x1000 == 0) {

                echo "U+$x\n";

            }

            if ($i == 0x0009 ||

                $i == 0x000a ||

                $i == 0x000d ||

                ($i > 0x001f && $i < UNICODE_SURROGATE_FIRST) ||

                ($i > UNICODE_SURROGATE_LAST && $i < 0xfffe) ||

                ($i > 0xffff && $i <= UNICODE_MAX)) {

                if (isset($utfCanonicalComp[$char]) || isset($utfCanonicalDecomp[$char])) {

                    $comp = UtfNormal::NFC($char);

                    $this->assertEquals(

                        bin2hex($comp),

                        bin2hex($clean),

                        "U+$x should be decomposed"

                    );

                } else {

                    $this->assertEquals(

                        bin2hex($char),

                        bin2hex($clean),

                        "U+$x should be intact"

                    );

                }

            } else {

                $this->assertEquals(bin2hex($rep), bin2hex($clean), $x);

            }

        }

    }


    public function testAllBytes()

    {

        $this->doTestBytes('', '');

        $this->doTestBytes('x', '');

        $this->doTestBytes('', 'x');

        $this->doTestBytes('x', 'x');

    }


    public function doTestBytes($head, $tail)

    {

        for ($i = 0x0; $i < 256; $i++) {

            $char = $head . chr($i) . $tail;

            $clean = UtfNormal::cleanUp($char);

            $x = sprintf("%02X", $i);

            if ($i == 0x0009 ||

                $i == 0x000a ||

                $i == 0x000d ||

                ($i > 0x001f && $i < 0x80)) {

                $this->assertEquals(

                    bin2hex($char),

                    bin2hex($clean),

                    "ASCII byte $x should be intact"

                );

                if ($char != $clean) {

                    return;

                }

            } else {

                $norm = $head . UTF8_REPLACEMENT . $tail;

                $this->assertEquals(

                    bin2hex($norm),

                    bin2hex($clean),

                    "Forbidden byte $x should be rejected"

                );

                if ($norm != $clean) {

                    return;

                }

            }

        }

    }


    public function testDoubleBytes()

    {

        $this->doTestDoubleBytes('', '');

        $this->doTestDoubleBytes('x', '');

        $this->doTestDoubleBytes('', 'x');

        $this->doTestDoubleBytes('x', 'x');

    }


    public function doTestDoubleBytes($head, $tail)

    {

        for ($first = 0xc0; $first < 0x100; $first++) {

            for ($second = 0x80; $second < 0x100; $second++) {

                $char = $head . chr($first) . chr($second) . $tail;

                $clean = UtfNormal::cleanUp($char);

                $x = sprintf("%02X,%02X", $first, $second);

                if ($first > 0xc1 &&

                    $first < 0xe0 &&

                    $second < 0xc0) {

                    $norm = UtfNormal::NFC($char);

                    $this->assertEquals(

                        bin2hex($norm),

                        bin2hex($clean),

                        "Pair $x should be intact"

                    );

                    if ($norm != $clean) {

                        return;

                    }

                } elseif ($first > 0xfd || $second > 0xbf) {

                    # fe and ff are not legal head bytes -- expect two replacement chars

                    $norm = $head . UTF8_REPLACEMENT . UTF8_REPLACEMENT . $tail;

                    $this->assertEquals(

                        bin2hex($norm),

                        bin2hex($clean),

                        "Forbidden pair $x should be rejected"

                    );

                    if ($norm != $clean) {

                        return;

                    }

                } else {

                    $norm = $head . UTF8_REPLACEMENT . $tail;

                    $this->assertEquals(

                        bin2hex($norm),

                        bin2hex($clean),

                        "Forbidden pair $x should be rejected"

                    );

                    if ($norm != $clean) {

                        return;

                    }

                }

            }

        }

    }


    public function testTripleBytes()

    {

        $this->doTestTripleBytes('', '');

        $this->doTestTripleBytes('x', '');

        $this->doTestTripleBytes('', 'x');

        $this->doTestTripleBytes('x', 'x');

    }


    public function doTestTripleBytes($head, $tail)

    {

        for ($first = 0xc0; $first < 0x100; $first++) {

            for ($second = 0x80; $second < 0x100; $second++) {

                #for( $third = 0x80; $third < 0x100; $third++ ) {

                for ($third = 0x80; $third < 0x81; $third++) {

                    $char = $head . chr($first) . chr($second) . chr($third) . $tail;

                    $clean = UtfNormal::cleanUp($char);

                    $x = sprintf("%02X,%02X,%02X", $first, $second, $third);

                    if ($first >= 0xe0 &&

                        $first < 0xf0 &&

                        $second < 0xc0 &&

                        $third < 0xc0) {

                        if ($first == 0xe0 && $second < 0xa0) {

                            $this->assertEquals(

                                bin2hex($head . UTF8_REPLACEMENT . $tail),

                                bin2hex($clean),

                                "Overlong triplet $x should be rejected"

                            );

                        } elseif ($first == 0xed &&

                            (chr($first) . chr($second) . chr($third))  >= UTF8_SURROGATE_FIRST) {

                            $this->assertEquals(

                                bin2hex($head . UTF8_REPLACEMENT . $tail),

                                bin2hex($clean),

                                "Surrogate triplet $x should be rejected"

                            );

                        } else {

                            $this->assertEquals(

                                bin2hex(UtfNormal::NFC($char)),

                                bin2hex($clean),

                                "Triplet $x should be intact"

                            );

                        }

                    } elseif ($first > 0xc1 && $first < 0xe0 && $second < 0xc0) {

                        $this->assertEquals(

                            bin2hex(UtfNormal::NFC($head . chr($first) . chr($second)) . UTF8_REPLACEMENT . $tail),

                            bin2hex($clean),

                            "Valid 2-byte $x + broken tail"

                        );

                    } elseif ($second > 0xc1 && $second < 0xe0 && $third < 0xc0) {

                        $this->assertEquals(

                            bin2hex($head . UTF8_REPLACEMENT . UtfNormal::NFC(chr($second) . chr($third) . $tail)),

                            bin2hex($clean),

                            "Broken head + valid 2-byte $x"

                        );

                    } elseif (($first > 0xfd || $second > 0xfd) &&

                                (($second > 0xbf && $third > 0xbf) ||

                                  ($second < 0xc0 && $third < 0xc0) ||

                                  ($second > 0xfd) ||

                                  ($third > 0xfd))) {

                        # fe and ff are not legal head bytes -- expect three replacement chars

                        $this->assertEquals(

                            bin2hex($head . UTF8_REPLACEMENT . UTF8_REPLACEMENT . UTF8_REPLACEMENT . $tail),

                            bin2hex($clean),

                            "Forbidden triplet $x should be rejected"

                        );

                    } elseif ($first > 0xc2 && $second < 0xc0 && $third < 0xc0) {

                        $this->assertEquals(

                            bin2hex($head . UTF8_REPLACEMENT . $tail),

                            bin2hex($clean),

                            "Forbidden triplet $x should be rejected"

                        );

                    } else {

                        $this->assertEquals(

                            bin2hex($head . UTF8_REPLACEMENT . UTF8_REPLACEMENT . $tail),

                            bin2hex($clean),

                            "Forbidden triplet $x should be rejected"

                        );

                    }

                }

            }

        }

    }


    public function testChunkRegression()

    {

        # Check for regression against a chunking bug

        $text   = "\x46\x55\xb8" .

                  "\xdc\x96" .

                  "\xee" .

                  "\xe7" .

                  "\x44" .

                  "\xaa" .

                  "\x2f\x25";

        $expect = "\x46\x55\xef\xbf\xbd" .

                  "\xdc\x96" .

                  "\xef\xbf\xbd" .

                  "\xef\xbf\xbd" .

                  "\x44" .

                  "\xef\xbf\xbd" .

                  "\x2f\x25";


        $this->assertEquals(

            bin2hex($expect),

            bin2hex(UtfNormal::cleanUp($text))

        );

    }


    public function testInterposeRegression()

    {

        $text   = "\x4e\x30" .

                  "\xb1" .              # bad tail

                  "\x3a" .

                  "\x92" .              # bad tail

                  "\x62\x3a" .

                  "\x84" .              # bad tail

                  "\x43" .

                  "\xc6" .              # bad head

                  "\x3f" .

                  "\x92" .              # bad tail

                  "\xad" .              # bad tail

                  "\x7d" .

                  "\xd9\x95";


        $expect = "\x4e\x30" .

                  "\xef\xbf\xbd" .

                  "\x3a" .

                  "\xef\xbf\xbd" .

                  "\x62\x3a" .

                  "\xef\xbf\xbd" .

                  "\x43" .

                  "\xef\xbf\xbd" .

                  "\x3f" .

                  "\xef\xbf\xbd" .

                  "\xef\xbf\xbd" .

                  "\x7d" .

                  "\xd9\x95";


        $this->assertEquals(

            bin2hex($expect),

            bin2hex(UtfNormal::cleanUp($text))

        );

    }


    public function testOverlongRegression()

    {

        $text   = "\x67" .

                  "\x1a" . # forbidden ascii

                  "\xea" . # bad head

                  "\xc1\xa6" . # overlong sequence

                  "\xad" . # bad tail

                  "\x1c" . # forbidden ascii

                  "\xb0" . # bad tail

                  "\x3c" .

                  "\x9e";  # bad tail

        $expect = "\x67" .

                  "\xef\xbf\xbd" .

                  "\xef\xbf\xbd" .

                  "\xef\xbf\xbd" .

                  "\xef\xbf\xbd" .

                  "\xef\xbf\xbd" .

                  "\xef\xbf\xbd" .

                  "\x3c" .

                  "\xef\xbf\xbd";

        $this->assertEquals(

            bin2hex($expect),

            bin2hex(UtfNormal::cleanUp($text))

        );

    }


    public function testSurrogateRegression()

    {

        $text   = "\xed\xb4\x96" . # surrogate 0xDD16

                  "\x83" . # bad tail

                  "\xb4" . # bad tail

                  "\xac";  # bad head

        $expect = "\xef\xbf\xbd" .

                  "\xef\xbf\xbd" .

                  "\xef\xbf\xbd" .

                  "\xef\xbf\xbd";

        $this->assertEquals(

            bin2hex($expect),

            bin2hex(UtfNormal::cleanUp($text))

        );

    }


    public function testBomRegression()

    {

        $text   = "\xef\xbf\xbe" . # U+FFFE, illegal char

                  "\xb2" . # bad tail

                  "\xef" . # bad head

                  "\x59";

        $expect = "\xef\xbf\xbd" .

                  "\xef\xbf\xbd" .

                  "\xef\xbf\xbd" .

                  "\x59";

        $this->assertEquals(

            bin2hex($expect),

            bin2hex(UtfNormal::cleanUp($text))

        );

    }


    public function testForbiddenRegression()

    {

        $text   = "\xef\xbf\xbf"; # U+FFFF, illegal char

        $expect = "\xef\xbf\xbd";

        $this->assertEquals(

            bin2hex($expect),

            bin2hex(UtfNormal::cleanUp($text))

        );

    }


    public function testHangulRegression()

    {

        $text = "\xed\x9c\xaf" . # Hangul char

                "\xe1\x87\x81";  # followed by another final jamo

        $expect = $text;         # Should *not* change.

        $this->assertEquals(

            bin2hex($expect),

            bin2hex(UtfNormal::cleanUp($text))

        );

    }

}


$suite = new PHPUnit_Framework_TestSuite('CleanUpTest');

$result = PHPUnit_TextUI_TestRunner::run($suite);


if (!$result->wasSuccessful()) {

    exit(-1);

}

exit(0);

sprintf
sprintf('%.4f', $callTime)
Definition: 01pharSimple.php:87

$result
$result
Definition: CleanUpTest.php:463

$suite
$suite
Definition: CleanUpTest.php:462

codepointToUtf8
codepointToUtf8($codepoint)
Return UTF-8 sequence for a given Unicode code point.
Definition: UtfNormalUtil.php:38

UNICODE_SURROGATE_FIRST
const UNICODE_SURROGATE_FIRST
Definition: UtfNormal.php:48

UTF8_SURROGATE_FIRST
const UTF8_SURROGATE_FIRST
Definition: UtfNormal.php:65

UNICODE_MAX
const UNICODE_MAX
Definition: UtfNormal.php:50

UTF8_REPLACEMENT
const UTF8_REPLACEMENT
Definition: UtfNormal.php:68

$utfCanonicalDecomp
global $utfCanonicalDecomp
Definition: UtfNormal.php:23

$utfCanonicalComp
global $utfCanonicalComp
Definition: UtfNormal.php:23

UNICODE_SURROGATE_LAST
const UNICODE_SURROGATE_LAST
Definition: UtfNormal.php:49

php
An exception for terminatinating execution or to throw for unit testing.

CleanUpTest
Definition: CleanUpTest.php:46

CleanUpTest\XtestAllChars
XtestAllChars()
This test is very expensive!
Definition: CleanUpTest.php:94

CleanUpTest\testDoubleBytes
testDoubleBytes()
Definition: CleanUpTest.php:174

CleanUpTest\testOverlongRegression
testOverlongRegression()
Definition: CleanUpTest.php:377

CleanUpTest\testNull
testNull()
Definition: CleanUpTest.php:65

CleanUpTest\doTestDoubleBytes
doTestDoubleBytes($head, $tail)
Definition: CleanUpTest.php:185

CleanUpTest\testAllBytes
testAllBytes()
Definition: CleanUpTest.php:132

CleanUpTest\testLatin
testLatin()
Definition: CleanUpTest.php:76

CleanUpTest\testSurrogateRegression
testSurrogateRegression()
Definition: CleanUpTest.php:404

CleanUpTest\doTestBytes
doTestBytes($head, $tail)
Definition: CleanUpTest.php:141

CleanUpTest\testForbiddenRegression
testForbiddenRegression()
Definition: CleanUpTest.php:438

CleanUpTest\testBomRegression
testBomRegression()
Definition: CleanUpTest.php:421

CleanUpTest\testInterposeRegression
testInterposeRegression()
Definition: CleanUpTest.php:340

CleanUpTest\doTestTripleBytes
doTestTripleBytes($head, $tail)
Definition: CleanUpTest.php:240

CleanUpTest\testHangulRegression
testHangulRegression()
Definition: CleanUpTest.php:449

CleanUpTest\testChunkRegression
testChunkRegression()
Definition: CleanUpTest.php:315

CleanUpTest\tearDown
tearDown()
Definition: CleanUpTest.php:53

CleanUpTest\setUp
setUp()
Definition: CleanUpTest.php:48

CleanUpTest\testTripleBytes
testTripleBytes()
Definition: CleanUpTest.php:231

CleanUpTest\testAscii
testAscii()
Definition: CleanUpTest.php:58

CleanUpTest\testLatinNormal
testLatinNormal()
Definition: CleanUpTest.php:83

PHPUnit_Framework_TestCase

PHPUnit_Framework_TestSuite

UtfNormal\cleanUp
static cleanUp($string)
The ultimate convenience function! Clean up invalid UTF-8 sequences, and convert to normal form C,...
Definition: UtfNormal.php:125

UtfNormal\NFC
static NFC($string)
Definition: UtfNormal.php:517

$i
$i
Definition: disco.tpl.php:19

$x
$x
Definition: example_009.php:98

exit
exit
Definition: old-extract-schema.php:9

$_SERVER
if((!isset($_SERVER['DOCUMENT_ROOT'])) OR(empty($_SERVER['DOCUMENT_ROOT']))) $_SERVER['DOCUMENT_ROOT']
Definition: tcpdf_autoconfig.php:54

$text
$text
Definition: errorreport.php:18