d1/d9a/UtfNormalTest_8php_source.html

<?php

# Copyright (C) 2004 Brion Vibber <brion@pobox.com>

# http://www.mediawiki.org/

#

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

#

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

#

# You should have received a copy of the GNU General Public License along

# with this program; if not, write to the Free Software Foundation, Inc.,

# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

# http://www.gnu.org/copyleft/gpl.html


$verbose = true;

#define( 'PRETTY_UTF8', true );


if (defined('PRETTY_UTF8')) {

    function pretty($string)

    {

        return preg_replace_callback(

            '/([\x00-\xff])/',

            function ($hit) {

                return sprintf("%02X", ord($hit[1]));

            },

            $string

        );

    }

} else {

    function pretty($string)

    {

        return trim(preg_replace_callback(

            '/(.)/us',

            function ($hit) {

                return sprintf("%04X ", utf8ToCodepoint($hit[1]));

            },

            $string

        ));

    }

}


if (isset($_SERVER['argv']) && in_array('--icu', $_SERVER['argv'])) {

    dl('php_utfnormal.so');

}


require_once 'include/Unicode/UtfNormal.php';


if (php_sapi_name() != 'cli') {

    die("Run me from the command line please.\n");

}


$in = fopen("NormalizationTest.txt", "rt");

if (!$in) {

    print "Couldn't open NormalizationTest.txt -- can't run tests.\n";

    print "If necessary, manually download this file. It can be obtained at\n";

    print "http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt";

    exit(-1);

}


$normalizer = new UtfNormal;


$total = 0;

$success = 0;

$failure = 0;

$ok = true;

$testedChars = array();

while (false !== ($line = fgets($in))) {

    list($data, $comment) = explode('#', $line);

    if ($data === '') {

        continue;

    }

    $matches = array();

    if (preg_match('/@Part([\d])/', $data, $matches)) {

        if ($matches[1] > 0) {

            $ok = reportResults($total, $success, $failure) && $ok;

        }

        print "Part {$matches[1]}: $comment";

        continue;

    }


    $columns = array_map("hexSequenceToUtf8", explode(";", $data));

    array_unshift($columns, '');


    $testedChars[$columns[1]] = true;

    $total++;

    if (testNormals($normalizer, $columns, $comment)) {

        $success++;

    } else {

        $failure++;

        # print "FAILED: $comment";

    }

    if ($total % 100 == 0) {

        print "$total ";

    }

}

fclose($in);


$ok = reportResults($total, $success, $failure) && $ok;


$in = fopen("UnicodeData.txt", "rt");

if (!$in) {

    print "Can't open UnicodeData.txt for reading.\n";

    print "If necessary, fetch this file from the internet:\n";

    print "http://www.unicode.org/Public/UNIDATA/UnicodeData.txt\n";

    exit(-1);

}

print "Now testing invariants...\n";

while (false !== ($line = fgets($in))) {

    $cols = explode(';', $line);

    $char = codepointToUtf8(hexdec($cols[0]));

    $desc = $cols[0] . ": " . $cols[1];

    if ($char < "\x20" || $char >= UTF8_SURROGATE_FIRST && $char <= UTF8_SURROGATE_LAST) {

        # Can't check NULL with the ICU plugin, as null bytes fail in C land.

        # Skip other control characters, as we strip them for XML safety.

        # Surrogates are illegal on their own or in UTF-8, ignore.

        continue;

    }

    if (empty($testedChars[$char])) {

        $total++;

        if (testInvariant($normalizer, $char, $desc)) {

            $success++;

        } else {

            $failure++;

        }

        if ($total % 100 == 0) {

            print "$total ";

        }

    }

}

fclose($in);


$ok = reportResults($total, $success, $failure) && $ok;


if ($ok) {

    print "TEST SUCCEEDED!\n";

    exit(0);

} else {

    print "TEST FAILED!\n";

    exit(-1);

}


## ------


function reportResults(&$total, &$success, &$failure)

{

    $percSucc = intval($success * 100 / $total);

    $percFail = intval($failure * 100 / $total);

    print "\n";

    print "$success tests successful ($percSucc%)\n";

    print "$failure tests failed ($percFail%)\n\n";

    $ok = ($success > 0 && $failure == 0);

    $total = 0;

    $success = 0;

    $failure = 0;

    return $ok;

}


function testNormals(&$u, $c, $comment, $reportFailure = false)

{

    $result = testNFC($u, $c, $comment, $reportFailure);

    $result = testNFD($u, $c, $comment, $reportFailure) && $result;

    $result = testNFKC($u, $c, $comment, $reportFailure) && $result;

    $result = testNFKD($u, $c, $comment, $reportFailure) && $result;

    $result = testCleanUp($u, $c, $comment, $reportFailure) && $result;


    global $verbose;

    if ($verbose && !$result && !$reportFailure) {

        print $comment;

        testNormals($u, $c, $comment, true);

    }

    return $result;

}


function verbosify($a, $b, $col, $form, $verbose)

{

    #$result = ($a === $b);

    $result = (strcmp($a, $b) == 0);

    if ($verbose) {

        $aa = pretty($a);

        $bb = pretty($b);

        $ok = $result ? "succeed" : " failed";

        $eq = $result ? "==" : "!=";

        print "  $ok $form c$col '$aa' $eq '$bb'\n";

    }

    return $result;

}


function testNFC(&$u, $c, $comment, $verbose)

{

    $result = verbosify($c[2], $u->toNFC($c[1]), 1, 'NFC', $verbose);

    $result = verbosify($c[2], $u->toNFC($c[2]), 2, 'NFC', $verbose) && $result;

    $result = verbosify($c[2], $u->toNFC($c[3]), 3, 'NFC', $verbose) && $result;

    $result = verbosify($c[4], $u->toNFC($c[4]), 4, 'NFC', $verbose) && $result;

    $result = verbosify($c[4], $u->toNFC($c[5]), 5, 'NFC', $verbose) && $result;

    return $result;

}


function testCleanUp(&$u, $c, $comment, $verbose)

{

    $x = $c[1];

    $result = verbosify($c[2], $u->cleanUp($x), 1, 'cleanUp', $verbose);

    $x = $c[2];

    $result = verbosify($c[2], $u->cleanUp($x), 2, 'cleanUp', $verbose) && $result;

    $x = $c[3];

    $result = verbosify($c[2], $u->cleanUp($x), 3, 'cleanUp', $verbose) && $result;

    $x = $c[4];

    $result = verbosify($c[4], $u->cleanUp($x), 4, 'cleanUp', $verbose) && $result;

    $x = $c[5];

    $result = verbosify($c[4], $u->cleanUp($x), 5, 'cleanUp', $verbose) && $result;

    return $result;

}


function testNFD(&$u, $c, $comment, $verbose)

{

    $result = verbosify($c[3], $u->toNFD($c[1]), 1, 'NFD', $verbose);

    $result = verbosify($c[3], $u->toNFD($c[2]), 2, 'NFD', $verbose) && $result;

    $result = verbosify($c[3], $u->toNFD($c[3]), 3, 'NFD', $verbose) && $result;

    $result = verbosify($c[5], $u->toNFD($c[4]), 4, 'NFD', $verbose) && $result;

    $result = verbosify($c[5], $u->toNFD($c[5]), 5, 'NFD', $verbose) && $result;

    return $result;

}


function testNFKC(&$u, $c, $comment, $verbose)

{

    $result = verbosify($c[4], $u->toNFKC($c[1]), 1, 'NFKC', $verbose);

    $result = verbosify($c[4], $u->toNFKC($c[2]), 2, 'NFKC', $verbose) && $result;

    $result = verbosify($c[4], $u->toNFKC($c[3]), 3, 'NFKC', $verbose) && $result;

    $result = verbosify($c[4], $u->toNFKC($c[4]), 4, 'NFKC', $verbose) && $result;

    $result = verbosify($c[4], $u->toNFKC($c[5]), 5, 'NFKC', $verbose) && $result;

    return $result;

}


function testNFKD(&$u, $c, $comment, $verbose)

{

    $result = verbosify($c[5], $u->toNFKD($c[1]), 1, 'NFKD', $verbose);

    $result = verbosify($c[5], $u->toNFKD($c[2]), 2, 'NFKD', $verbose) && $result;

    $result = verbosify($c[5], $u->toNFKD($c[3]), 3, 'NFKD', $verbose) && $result;

    $result = verbosify($c[5], $u->toNFKD($c[4]), 4, 'NFKD', $verbose) && $result;

    $result = verbosify($c[5], $u->toNFKD($c[5]), 5, 'NFKD', $verbose) && $result;

    return $result;

}


function testInvariant(&$u, $char, $desc, $reportFailure = false)

{

    $result = verbosify($char, $u->toNFC($char), 1, 'NFC', $reportFailure);

    $result = verbosify($char, $u->toNFD($char), 1, 'NFD', $reportFailure) && $result;

    $result = verbosify($char, $u->toNFKC($char), 1, 'NFKC', $reportFailure) && $result;

    $result = verbosify($char, $u->toNFKD($char), 1, 'NFKD', $reportFailure) && $result;

    $result = verbosify($char, $u->cleanUp($char), 1, 'cleanUp', $reportFailure) && $result;

    global $verbose;

    if ($verbose && !$result && !$reportFailure) {

        print $desc;

        testInvariant($u, $char, $desc, true);

    }

    return $result;

}

sprintf
sprintf('%.4f', $callTime)
Definition: 01pharSimple.php:87

$result
$result
Definition: CleanUpTest.php:463

$columns
if(! $in) $columns
Definition: Utf8Test.php:45

$ok
$ok
Definition: UtfNormalTest.php:80

verbosify
verbosify($a, $b, $col, $form, $verbose)
Definition: UtfNormalTest.php:189

$in
if(php_sapi_name() !='cli') $in
Definition: UtfNormalTest.php:67

$failure
$failure
Definition: UtfNormalTest.php:79

$total
$total
Definition: UtfNormalTest.php:77

$verbose
$verbose
Definition: UtfNormalTest.php:27

testNFC
testNFC(&$u, $c, $comment, $verbose)
Definition: UtfNormalTest.php:203

testNormals
testNormals(&$u, $c, $comment, $reportFailure=false)
Definition: UtfNormalTest.php:173

testCleanUp
testCleanUp(&$u, $c, $comment, $verbose)
Definition: UtfNormalTest.php:213

testNFKC
testNFKC(&$u, $c, $comment, $verbose)
Definition: UtfNormalTest.php:238

testNFD
testNFD(&$u, $c, $comment, $verbose)
Definition: UtfNormalTest.php:228

$success
$success
Definition: UtfNormalTest.php:78

testNFKD
testNFKD(&$u, $c, $comment, $verbose)
Definition: UtfNormalTest.php:248

print
if(! $in) print
Definition: UtfNormalTest.php:116

testInvariant
testInvariant(&$u, $char, $desc, $reportFailure=false)
Definition: UtfNormalTest.php:258

$testedChars
$testedChars
Definition: UtfNormalTest.php:81

reportResults
reportResults(&$total, &$success, &$failure)
Definition: UtfNormalTest.php:159

$normalizer
if(! $in) $normalizer
Definition: UtfNormalTest.php:75

codepointToUtf8
codepointToUtf8($codepoint)
Return UTF-8 sequence for a given Unicode code point.
Definition: UtfNormalUtil.php:38

utf8ToCodepoint
utf8ToCodepoint($char)
Determine the Unicode codepoint of a single-character UTF-8 sequence.
Definition: UtfNormalUtil.php:109

UTF8_SURROGATE_FIRST
const UTF8_SURROGATE_FIRST
Definition: UtfNormal.php:65

UTF8_SURROGATE_LAST
const UTF8_SURROGATE_LAST
Definition: UtfNormal.php:66

$comment
$comment
Definition: buildRTE.php:83

php
An exception for terminatinating execution or to throw for unit testing.

UtfNormal
Definition: UtfNormal.php:113

$x
$x
Definition: example_009.php:98

exit
exit
Definition: old-extract-schema.php:9

defined
defined( 'APPLICATION_ENV')||define( 'APPLICATION_ENV'
Definition: bootstrap.php:27

$form
if(isset($_POST['submit'])) $form
Definition: registry.edit.php:63

$_SERVER
if((!isset($_SERVER['DOCUMENT_ROOT'])) OR(empty($_SERVER['DOCUMENT_ROOT']))) $_SERVER['DOCUMENT_ROOT']
Definition: tcpdf_autoconfig.php:54

$data
$data
Definition: test-settings.sample.php:14

$cols
$cols
Definition: xhr_table.php:11