ILIAS  release_5-3 Revision v5.3.23-19-g915713cf615
UtfNormalTest.php
Go to the documentation of this file.
1<?php
2# Copyright (C) 2004 Brion Vibber <brion@pobox.com>
3# http://www.mediawiki.org/
4#
5# This program is free software; you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation; either version 2 of the License, or
8# (at your option) any later version.
9#
10# This program is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13# GNU General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License along
16# with this program; if not, write to the Free Software Foundation, Inc.,
17# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18# http://www.gnu.org/copyleft/gpl.html
19
27$verbose = true;
28#define( 'PRETTY_UTF8', true );
29
30if (defined('PRETTY_UTF8')) {
31 function pretty($string)
32 {
33 return preg_replace_callback(
34 '/([\x00-\xff])/',
35 function ($hit) {
36 return sprintf("%02X", ord($hit[1]));
37 },
38 $string
39 );
40 }
41} else {
45 function pretty($string)
46 {
47 return trim(preg_replace_callback(
48 '/(.)/us',
49 function ($hit) {
50 return sprintf("%04X ", utf8ToCodepoint($hit[1]));
51 },
52 $string
53 ));
54 }
55}
56
57if (isset($_SERVER['argv']) && in_array('--icu', $_SERVER['argv'])) {
58 dl('php_utfnormal.so');
59}
60
61require_once 'include/Unicode/UtfNormal.php';
62
63if (php_sapi_name() != 'cli') {
64 die("Run me from the command line please.\n");
65}
66
67$in = fopen("NormalizationTest.txt", "rt");
68if (!$in) {
69 print "Couldn't open NormalizationTest.txt -- can't run tests.\n";
70 print "If necessary, manually download this file. It can be obtained at\n";
71 print "http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt";
72 exit(-1);
73}
74
76
80$ok = true;
81$testedChars = array();
82while (false !== ($line = fgets($in))) {
83 list($data, $comment) = explode('#', $line);
84 if ($data === '') {
85 continue;
86 }
87 $matches = array();
88 if (preg_match('/@Part([\d])/', $data, $matches)) {
89 if ($matches[1] > 0) {
91 }
92 print "Part {$matches[1]}: $comment";
93 continue;
94 }
95
96 $columns = array_map("hexSequenceToUtf8", explode(";", $data));
97 array_unshift($columns, '');
98
99 $testedChars[$columns[1]] = true;
100 $total++;
102 $success++;
103 } else {
104 $failure++;
105 # print "FAILED: $comment";
106 }
107 if ($total % 100 == 0) {
108 print "$total ";
109 }
110}
111fclose($in);
112
114
115$in = fopen("UnicodeData.txt", "rt");
116if (!$in) {
117 print "Can't open UnicodeData.txt for reading.\n";
118 print "If necessary, fetch this file from the internet:\n";
119 print "http://www.unicode.org/Public/UNIDATA/UnicodeData.txt\n";
120 exit(-1);
121}
122print "Now testing invariants...\n";
123while (false !== ($line = fgets($in))) {
124 $cols = explode(';', $line);
125 $char = codepointToUtf8(hexdec($cols[0]));
126 $desc = $cols[0] . ": " . $cols[1];
127 if ($char < "\x20" || $char >= UTF8_SURROGATE_FIRST && $char <= UTF8_SURROGATE_LAST) {
128 # Can't check NULL with the ICU plugin, as null bytes fail in C land.
129 # Skip other control characters, as we strip them for XML safety.
130 # Surrogates are illegal on their own or in UTF-8, ignore.
131 continue;
132 }
133 if (empty($testedChars[$char])) {
134 $total++;
135 if (testInvariant($normalizer, $char, $desc)) {
136 $success++;
137 } else {
138 $failure++;
139 }
140 if ($total % 100 == 0) {
141 print "$total ";
142 }
143 }
144}
145fclose($in);
146
148
149if ($ok) {
150 print "TEST SUCCEEDED!\n";
151 exit(0);
152} else {
153 print "TEST FAILED!\n";
154 exit(-1);
155}
156
157## ------
158
160{
161 $percSucc = intval($success * 100 / $total);
162 $percFail = intval($failure * 100 / $total);
163 print "\n";
164 print "$success tests successful ($percSucc%)\n";
165 print "$failure tests failed ($percFail%)\n\n";
166 $ok = ($success > 0 && $failure == 0);
167 $total = 0;
168 $success = 0;
169 $failure = 0;
170 return $ok;
171}
172
173function testNormals(&$u, $c, $comment, $reportFailure = false)
174{
175 $result = testNFC($u, $c, $comment, $reportFailure);
176 $result = testNFD($u, $c, $comment, $reportFailure) && $result;
177 $result = testNFKC($u, $c, $comment, $reportFailure) && $result;
178 $result = testNFKD($u, $c, $comment, $reportFailure) && $result;
179 $result = testCleanUp($u, $c, $comment, $reportFailure) && $result;
180
181 global $verbose;
182 if ($verbose && !$result && !$reportFailure) {
184 testNormals($u, $c, $comment, true);
185 }
186 return $result;
187}
188
189function verbosify($a, $b, $col, $form, $verbose)
190{
191 #$result = ($a === $b);
192 $result = (strcmp($a, $b) == 0);
193 if ($verbose) {
194 $aa = pretty($a);
195 $bb = pretty($b);
196 $ok = $result ? "succeed" : " failed";
197 $eq = $result ? "==" : "!=";
198 print " $ok $form c$col '$aa' $eq '$bb'\n";
199 }
200 return $result;
201}
202
203function testNFC(&$u, $c, $comment, $verbose)
204{
205 $result = verbosify($c[2], $u->toNFC($c[1]), 1, 'NFC', $verbose);
206 $result = verbosify($c[2], $u->toNFC($c[2]), 2, 'NFC', $verbose) && $result;
207 $result = verbosify($c[2], $u->toNFC($c[3]), 3, 'NFC', $verbose) && $result;
208 $result = verbosify($c[4], $u->toNFC($c[4]), 4, 'NFC', $verbose) && $result;
209 $result = verbosify($c[4], $u->toNFC($c[5]), 5, 'NFC', $verbose) && $result;
210 return $result;
211}
212
213function testCleanUp(&$u, $c, $comment, $verbose)
214{
215 $x = $c[1];
216 $result = verbosify($c[2], $u->cleanUp($x), 1, 'cleanUp', $verbose);
217 $x = $c[2];
218 $result = verbosify($c[2], $u->cleanUp($x), 2, 'cleanUp', $verbose) && $result;
219 $x = $c[3];
220 $result = verbosify($c[2], $u->cleanUp($x), 3, 'cleanUp', $verbose) && $result;
221 $x = $c[4];
222 $result = verbosify($c[4], $u->cleanUp($x), 4, 'cleanUp', $verbose) && $result;
223 $x = $c[5];
224 $result = verbosify($c[4], $u->cleanUp($x), 5, 'cleanUp', $verbose) && $result;
225 return $result;
226}
227
228function testNFD(&$u, $c, $comment, $verbose)
229{
230 $result = verbosify($c[3], $u->toNFD($c[1]), 1, 'NFD', $verbose);
231 $result = verbosify($c[3], $u->toNFD($c[2]), 2, 'NFD', $verbose) && $result;
232 $result = verbosify($c[3], $u->toNFD($c[3]), 3, 'NFD', $verbose) && $result;
233 $result = verbosify($c[5], $u->toNFD($c[4]), 4, 'NFD', $verbose) && $result;
234 $result = verbosify($c[5], $u->toNFD($c[5]), 5, 'NFD', $verbose) && $result;
235 return $result;
236}
237
238function testNFKC(&$u, $c, $comment, $verbose)
239{
240 $result = verbosify($c[4], $u->toNFKC($c[1]), 1, 'NFKC', $verbose);
241 $result = verbosify($c[4], $u->toNFKC($c[2]), 2, 'NFKC', $verbose) && $result;
242 $result = verbosify($c[4], $u->toNFKC($c[3]), 3, 'NFKC', $verbose) && $result;
243 $result = verbosify($c[4], $u->toNFKC($c[4]), 4, 'NFKC', $verbose) && $result;
244 $result = verbosify($c[4], $u->toNFKC($c[5]), 5, 'NFKC', $verbose) && $result;
245 return $result;
246}
247
248function testNFKD(&$u, $c, $comment, $verbose)
249{
250 $result = verbosify($c[5], $u->toNFKD($c[1]), 1, 'NFKD', $verbose);
251 $result = verbosify($c[5], $u->toNFKD($c[2]), 2, 'NFKD', $verbose) && $result;
252 $result = verbosify($c[5], $u->toNFKD($c[3]), 3, 'NFKD', $verbose) && $result;
253 $result = verbosify($c[5], $u->toNFKD($c[4]), 4, 'NFKD', $verbose) && $result;
254 $result = verbosify($c[5], $u->toNFKD($c[5]), 5, 'NFKD', $verbose) && $result;
255 return $result;
256}
257
258function testInvariant(&$u, $char, $desc, $reportFailure = false)
259{
260 $result = verbosify($char, $u->toNFC($char), 1, 'NFC', $reportFailure);
261 $result = verbosify($char, $u->toNFD($char), 1, 'NFD', $reportFailure) && $result;
262 $result = verbosify($char, $u->toNFKC($char), 1, 'NFKC', $reportFailure) && $result;
263 $result = verbosify($char, $u->toNFKD($char), 1, 'NFKD', $reportFailure) && $result;
264 $result = verbosify($char, $u->cleanUp($char), 1, 'cleanUp', $reportFailure) && $result;
265 global $verbose;
266 if ($verbose && !$result && !$reportFailure) {
267 print $desc;
268 testInvariant($u, $char, $desc, true);
269 }
270 return $result;
271}
sprintf('%.4f', $callTime)
$result
if(! $in) $columns
Definition: Utf8Test.php:45
verbosify($a, $b, $col, $form, $verbose)
if(php_sapi_name() !='cli') $in
$failure
$total
$verbose
testNFC(&$u, $c, $comment, $verbose)
testNormals(&$u, $c, $comment, $reportFailure=false)
testCleanUp(&$u, $c, $comment, $verbose)
testNFKC(&$u, $c, $comment, $verbose)
testNFD(&$u, $c, $comment, $verbose)
$success
testNFKD(&$u, $c, $comment, $verbose)
if(! $in) print
testInvariant(&$u, $char, $desc, $reportFailure=false)
$testedChars
reportResults(&$total, &$success, &$failure)
if(! $in) $normalizer
codepointToUtf8($codepoint)
Return UTF-8 sequence for a given Unicode code point.
utf8ToCodepoint($char)
Determine the Unicode codepoint of a single-character UTF-8 sequence.
const UTF8_SURROGATE_FIRST
Definition: UtfNormal.php:65
const UTF8_SURROGATE_LAST
Definition: UtfNormal.php:66
$comment
Definition: buildRTE.php:83
An exception for terminatinating execution or to throw for unit testing.
$x
Definition: example_009.php:98
defined( 'APPLICATION_ENV')||define( 'APPLICATION_ENV'
Definition: bootstrap.php:27
if(isset($_POST['submit'])) $form
if((!isset($_SERVER['DOCUMENT_ROOT'])) OR(empty($_SERVER['DOCUMENT_ROOT']))) $_SERVER['DOCUMENT_ROOT']
$cols
Definition: xhr_table.php:11