d9/d3d/CleanUpTest_8php_source.html

<?php

# Copyright (C) 2004 Brion Vibber <brion@pobox.com>

# http://www.mediawiki.org/

#

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

#

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

#

# You should have received a copy of the GNU General Public License along

# with this program; if not, write to the Free Software Foundation, Inc.,

# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

# http://www.gnu.org/copyleft/gpl.html


if( php_sapi_name() != 'cli' ) {

        die( "Run me from the command line please.\n" );

}


if( isset( $_SERVER['argv'] ) && in_array( '--icu', $_SERVER['argv'] ) ) {

        dl( 'php_utfnormal.so' );

}


#ini_set( 'memory_limit', '40M' );


require_once 'PHPUnit/Framework.php';

require_once 'PHPUnit/TextUI/TestRunner.php';


require_once 'include/Unicode/UtfNormal.php';


class CleanUpTest extends PHPUnit_Framework_TestCase {

        function setUp() {

        }


        function tearDown() {

        }


        function testAscii() {

                $text = 'This is plain ASCII text.';

                $this->assertEquals( $text, UtfNormal::cleanUp( $text ) );

        }


        function testNull() {

                $text = "a \x00 null";

                $expect = "a \xef\xbf\xbd null";

                $this->assertEquals(

                        bin2hex( $expect ),

                        bin2hex( UtfNormal::cleanUp( $text ) ) );

        }


        function testLatin() {

                $text = "L'\xc3\xa9cole";

                $this->assertEquals( $text, UtfNormal::cleanUp( $text ) );

        }


        function testLatinNormal() {

                $text = "L'e\xcc\x81cole";

                $expect = "L'\xc3\xa9cole";

                $this->assertEquals( $expect, UtfNormal::cleanUp( $text ) );

        }


        function XtestAllChars() {

                $rep = UTF8_REPLACEMENT;

                global $utfCanonicalComp, $utfCanonicalDecomp;

                for( $i = 0x0; $i < UNICODE_MAX; $i++ ) {

                        $char = codepointToUtf8( $i );

                        $clean = UtfNormal::cleanUp( $char );

                        $x = sprintf( "%04X", $i );

                        if( $i % 0x1000 == 0 ) echo "U+$x\n";

                        if( $i == 0x0009 ||

                            $i == 0x000a ||

                            $i == 0x000d ||

                            ($i > 0x001f && $i < UNICODE_SURROGATE_FIRST) ||

                            ($i > UNICODE_SURROGATE_LAST && $i < 0xfffe ) ||

                            ($i > 0xffff && $i <= UNICODE_MAX ) ) {

                                if( isset( $utfCanonicalComp[$char] ) || isset( $utfCanonicalDecomp[$char] ) ) {

                                    $comp = UtfNormal::NFC( $char );

                                        $this->assertEquals(

                                                bin2hex( $comp ),

                                                bin2hex( $clean ),

                                                "U+$x should be decomposed" );

                                } else {

                                        $this->assertEquals(

                                                bin2hex( $char ),

                                                bin2hex( $clean ),

                                                "U+$x should be intact" );

                                }

                        } else {

                                $this->assertEquals( bin2hex( $rep ), bin2hex( $clean ), $x );

                        }

                }

        }


        function testAllBytes() {

                $this->doTestBytes( '', '' );

                $this->doTestBytes( 'x', '' );

                $this->doTestBytes( '', 'x' );

                $this->doTestBytes( 'x', 'x' );

        }


        function doTestBytes( $head, $tail ) {

                for( $i = 0x0; $i < 256; $i++ ) {

                        $char = $head . chr( $i ) . $tail;

                        $clean = UtfNormal::cleanUp( $char );

                        $x = sprintf( "%02X", $i );

                        if( $i == 0x0009 ||

                            $i == 0x000a ||

                            $i == 0x000d ||

                            ($i > 0x001f && $i < 0x80) ) {

                                $this->assertEquals(

                                        bin2hex( $char ),

                                        bin2hex( $clean ),

                                        "ASCII byte $x should be intact" );

                                if( $char != $clean ) return;

                        } else {

                                $norm = $head . UTF8_REPLACEMENT . $tail;

                                $this->assertEquals(

                                        bin2hex( $norm ),

                                        bin2hex( $clean ),

                                        "Forbidden byte $x should be rejected" );

                                if( $norm != $clean ) return;

                        }

                }

        }


        function testDoubleBytes() {

                $this->doTestDoubleBytes( '', '' );

                $this->doTestDoubleBytes( 'x', '' );

                $this->doTestDoubleBytes( '', 'x' );

                $this->doTestDoubleBytes( 'x', 'x' );

        }


        function doTestDoubleBytes( $head, $tail ) {

                for( $first = 0xc0; $first < 0x100; $first++ ) {

                        for( $second = 0x80; $second < 0x100; $second++ ) {

                                $char = $head . chr( $first ) . chr( $second ) . $tail;

                                $clean = UtfNormal::cleanUp( $char );

                                $x = sprintf( "%02X,%02X", $first, $second );

                                if( $first > 0xc1 &&

                                    $first < 0xe0 &&

                                    $second < 0xc0 ) {

                                    $norm = UtfNormal::NFC( $char );

                                        $this->assertEquals(

                                                bin2hex( $norm ),

                                                bin2hex( $clean ),

                                                "Pair $x should be intact" );

                                    if( $norm != $clean ) return;

                                } elseif( $first > 0xfd || $second > 0xbf ) {

                                        # fe and ff are not legal head bytes -- expect two replacement chars

                                        $norm = $head . UTF8_REPLACEMENT . UTF8_REPLACEMENT . $tail;

                                        $this->assertEquals(

                                                bin2hex( $norm ),

                                                bin2hex( $clean ),

                                                "Forbidden pair $x should be rejected" );

                                        if( $norm != $clean ) return;

                                } else {

                                        $norm = $head . UTF8_REPLACEMENT . $tail;

                                        $this->assertEquals(

                                                bin2hex( $norm ),

                                                bin2hex( $clean ),

                                                "Forbidden pair $x should be rejected" );

                                        if( $norm != $clean ) return;

                                }

                        }

                }

        }


        function testTripleBytes() {

                $this->doTestTripleBytes( '', '' );

                $this->doTestTripleBytes( 'x', '' );

                $this->doTestTripleBytes( '', 'x' );

                $this->doTestTripleBytes( 'x', 'x' );

        }


        function doTestTripleBytes( $head, $tail ) {

                for( $first = 0xc0; $first < 0x100; $first++ ) {

                        for( $second = 0x80; $second < 0x100; $second++ ) {

                                #for( $third = 0x80; $third < 0x100; $third++ ) {

                                for( $third = 0x80; $third < 0x81; $third++ ) {

                                        $char = $head . chr( $first ) . chr( $second ) . chr( $third ) . $tail;

                                        $clean = UtfNormal::cleanUp( $char );

                                        $x = sprintf( "%02X,%02X,%02X", $first, $second, $third );

                                        if( $first >= 0xe0 &&

                                                $first < 0xf0 &&

                                                $second < 0xc0 &&

                                                $third < 0xc0 ) {

                                                if( $first == 0xe0 && $second < 0xa0 ) {

                                                        $this->assertEquals(

                                                                bin2hex( $head . UTF8_REPLACEMENT . $tail ),

                                                                bin2hex( $clean ),

                                                                "Overlong triplet $x should be rejected" );

                                                } elseif( $first == 0xed &&

                                                        ( chr( $first ) . chr( $second ) . chr( $third ))  >= UTF8_SURROGATE_FIRST ) {

                                                        $this->assertEquals(

                                                                bin2hex( $head . UTF8_REPLACEMENT . $tail ),

                                                                bin2hex( $clean ),

                                                                "Surrogate triplet $x should be rejected" );

                                                } else {

                                                        $this->assertEquals(

                                                                bin2hex( UtfNormal::NFC( $char ) ),

                                                                bin2hex( $clean ),

                                                                "Triplet $x should be intact" );

                                                }

                                        } elseif( $first > 0xc1 && $first < 0xe0 && $second < 0xc0 ) {

                                                $this->assertEquals(

                                                        bin2hex( UtfNormal::NFC( $head . chr( $first ) . chr( $second ) ) . UTF8_REPLACEMENT . $tail ),

                                                        bin2hex( $clean ),

                                                        "Valid 2-byte $x + broken tail" );

                                        } elseif( $second > 0xc1 && $second < 0xe0 && $third < 0xc0 ) {

                                                $this->assertEquals(

                                                        bin2hex( $head . UTF8_REPLACEMENT . UtfNormal::NFC( chr( $second ) . chr( $third ) . $tail ) ),

                                                        bin2hex( $clean ),

                                                        "Broken head + valid 2-byte $x" );

                                        } elseif( ( $first > 0xfd || $second > 0xfd ) &&

                                                    ( ( $second > 0xbf && $third > 0xbf ) ||

                                                      ( $second < 0xc0 && $third < 0xc0 ) ||

                                                      ( $second > 0xfd ) ||

                                                      ( $third > 0xfd ) ) ) {

                                                # fe and ff are not legal head bytes -- expect three replacement chars

                                                $this->assertEquals(

                                                        bin2hex( $head . UTF8_REPLACEMENT . UTF8_REPLACEMENT . UTF8_REPLACEMENT . $tail ),

                                                        bin2hex( $clean ),

                                                        "Forbidden triplet $x should be rejected" );

                                        } elseif( $first > 0xc2 && $second < 0xc0 && $third < 0xc0 ) {

                                                $this->assertEquals(

                                                        bin2hex( $head . UTF8_REPLACEMENT . $tail ),

                                                        bin2hex( $clean ),

                                                        "Forbidden triplet $x should be rejected" );

                                        } else {

                                                $this->assertEquals(

                                                        bin2hex( $head . UTF8_REPLACEMENT . UTF8_REPLACEMENT . $tail ),

                                                        bin2hex( $clean ),

                                                        "Forbidden triplet $x should be rejected" );

                                        }

                                }

                        }

                }

        }


        function testChunkRegression() {

                # Check for regression against a chunking bug

                $text   = "\x46\x55\xb8" .

                          "\xdc\x96" .

                          "\xee" .

                          "\xe7" .

                          "\x44" .

                          "\xaa" .

                          "\x2f\x25";

                $expect = "\x46\x55\xef\xbf\xbd" .

                          "\xdc\x96" .

                          "\xef\xbf\xbd" .

                          "\xef\xbf\xbd" .

                          "\x44" .

                          "\xef\xbf\xbd" .

                          "\x2f\x25";


                $this->assertEquals(

                        bin2hex( $expect ),

                        bin2hex( UtfNormal::cleanUp( $text ) ) );

        }


        function testInterposeRegression() {

                $text   = "\x4e\x30" .

                          "\xb1" .              # bad tail

                          "\x3a" .

                          "\x92" .              # bad tail

                          "\x62\x3a" .

                          "\x84" .              # bad tail

                          "\x43" .

                          "\xc6" .              # bad head

                          "\x3f" .

                          "\x92" .              # bad tail

                          "\xad" .              # bad tail

                          "\x7d" .

                          "\xd9\x95";


                $expect = "\x4e\x30" .

                          "\xef\xbf\xbd" .

                          "\x3a" .

                          "\xef\xbf\xbd" .

                          "\x62\x3a" .

                          "\xef\xbf\xbd" .

                          "\x43" .

                          "\xef\xbf\xbd" .

                          "\x3f" .

                          "\xef\xbf\xbd" .

                          "\xef\xbf\xbd" .

                          "\x7d" .

                          "\xd9\x95";


                $this->assertEquals(

                        bin2hex( $expect ),

                        bin2hex( UtfNormal::cleanUp( $text ) ) );

        }


        function testOverlongRegression() {

                $text   = "\x67" .

                          "\x1a" . # forbidden ascii

                          "\xea" . # bad head

                          "\xc1\xa6" . # overlong sequence

                          "\xad" . # bad tail

                          "\x1c" . # forbidden ascii

                          "\xb0" . # bad tail

                          "\x3c" .

                          "\x9e";  # bad tail

                $expect = "\x67" .

                          "\xef\xbf\xbd" .

                          "\xef\xbf\xbd" .

                          "\xef\xbf\xbd" .

                          "\xef\xbf\xbd" .

                          "\xef\xbf\xbd" .

                          "\xef\xbf\xbd" .

                          "\x3c" .

                          "\xef\xbf\xbd";

                $this->assertEquals(

                        bin2hex( $expect ),

                        bin2hex( UtfNormal::cleanUp( $text ) ) );

        }


        function testSurrogateRegression() {

                $text   = "\xed\xb4\x96" . # surrogate 0xDD16

                          "\x83" . # bad tail

                          "\xb4" . # bad tail

                          "\xac";  # bad head

                $expect = "\xef\xbf\xbd" .

                          "\xef\xbf\xbd" .

                          "\xef\xbf\xbd" .

                          "\xef\xbf\xbd";

                $this->assertEquals(

                        bin2hex( $expect ),

                        bin2hex( UtfNormal::cleanUp( $text ) ) );

        }


        function testBomRegression() {

                $text   = "\xef\xbf\xbe" . # U+FFFE, illegal char

                          "\xb2" . # bad tail

                          "\xef" . # bad head

                          "\x59";

                $expect = "\xef\xbf\xbd" .

                          "\xef\xbf\xbd" .

                          "\xef\xbf\xbd" .

                          "\x59";

                $this->assertEquals(

                        bin2hex( $expect ),

                        bin2hex( UtfNormal::cleanUp( $text ) ) );

        }


        function testForbiddenRegression() {

                $text   = "\xef\xbf\xbf"; # U+FFFF, illegal char

                $expect = "\xef\xbf\xbd";

                $this->assertEquals(

                        bin2hex( $expect ),

                        bin2hex( UtfNormal::cleanUp( $text ) ) );

        }


        function testHangulRegression() {

                $text = "\xed\x9c\xaf" . # Hangul char

                                "\xe1\x87\x81";  # followed by another final jamo

                $expect = $text;         # Should *not* change.

                $this->assertEquals(

                        bin2hex( $expect ),

                        bin2hex( UtfNormal::cleanUp( $text ) ) );

        }

}


$suite = new PHPUnit_Framework_TestSuite( 'CleanUpTest' );

$result = PHPUnit_TextUI_TestRunner::run( $suite );


if( !$result->wasSuccessful() ) {

        exit( -1 );

}

exit( 0 );

?>

sprintf
sprintf('%.4f', $callTime)
Definition: 01pharSimple.php:87

$result
$result
Definition: CleanUpTest.php:407

$suite
$suite
Definition: CleanUpTest.php:406

codepointToUtf8
codepointToUtf8( $codepoint)
Return UTF-8 sequence for a given Unicode code point.
Definition: UtfNormalUtil.php:38

UNICODE_SURROGATE_FIRST
const UNICODE_SURROGATE_FIRST
Definition: UtfNormal.php:48

UTF8_SURROGATE_FIRST
const UTF8_SURROGATE_FIRST
Definition: UtfNormal.php:65

UNICODE_MAX
const UNICODE_MAX
Definition: UtfNormal.php:50

UTF8_REPLACEMENT
const UTF8_REPLACEMENT
Definition: UtfNormal.php:68

$utfCanonicalDecomp
global $utfCanonicalDecomp
Definition: UtfNormal.php:23

$utfCanonicalComp
global $utfCanonicalComp
Definition: UtfNormal.php:23

UNICODE_SURROGATE_LAST
const UNICODE_SURROGATE_LAST
Definition: UtfNormal.php:49

php
An exception for terminatinating execution or to throw for unit testing.

CleanUpTest
Definition: CleanUpTest.php:45

CleanUpTest\XtestAllChars
XtestAllChars()
This test is very expensive!
Definition: CleanUpTest.php:86

CleanUpTest\testDoubleBytes
testDoubleBytes()
Definition: CleanUpTest.php:153

CleanUpTest\testOverlongRegression
testOverlongRegression()
Definition: CleanUpTest.php:331

CleanUpTest\testNull
testNull()
Definition: CleanUpTest.php:61

CleanUpTest\testAllBytes
testAllBytes()
Definition: CleanUpTest.php:119

CleanUpTest\testLatin
testLatin()
Definition: CleanUpTest.php:70

CleanUpTest\testSurrogateRegression
testSurrogateRegression()
Definition: CleanUpTest.php:356

CleanUpTest\testForbiddenRegression
testForbiddenRegression()
Definition: CleanUpTest.php:386

CleanUpTest\testBomRegression
testBomRegression()
Definition: CleanUpTest.php:371

CleanUpTest\testInterposeRegression
testInterposeRegression()
Definition: CleanUpTest.php:296

CleanUpTest\doTestBytes
doTestBytes( $head, $tail)
Definition: CleanUpTest.php:127

CleanUpTest\testHangulRegression
testHangulRegression()
Definition: CleanUpTest.php:395

CleanUpTest\testChunkRegression
testChunkRegression()
Definition: CleanUpTest.php:273

CleanUpTest\tearDown
tearDown()
Definition: CleanUpTest.php:51

CleanUpTest\doTestDoubleBytes
doTestDoubleBytes( $head, $tail)
Definition: CleanUpTest.php:163

CleanUpTest\setUp
setUp()
Definition: CleanUpTest.php:47

CleanUpTest\testTripleBytes
testTripleBytes()
Definition: CleanUpTest.php:199

CleanUpTest\doTestTripleBytes
doTestTripleBytes( $head, $tail)
Definition: CleanUpTest.php:207

CleanUpTest\testAscii
testAscii()
Definition: CleanUpTest.php:55

CleanUpTest\testLatinNormal
testLatinNormal()
Definition: CleanUpTest.php:76

PHPUnit_Framework_TestCase

PHPUnit_Framework_TestSuite

UtfNormal\NFC
static NFC( $string)
Definition: UtfNormal.php:496

UtfNormal\cleanUp
static cleanUp( $string)
The ultimate convenience function! Clean up invalid UTF-8 sequences, and convert to normal form C,...
Definition: UtfNormal.php:124

$x
$x
Definition: example_009.php:98

$text
$text
Definition: example_020.php:127

exit
exit
Definition: old-extract-schema.php:9

$_SERVER
if((!isset($_SERVER['DOCUMENT_ROOT'])) OR(empty($_SERVER['DOCUMENT_ROOT']))) $_SERVER['DOCUMENT_ROOT']
Definition: tcpdf_autoconfig.php:54