ILIAS  release_5-4 Revision v5.4.26-12-gabc799a52e6
CleanUpTest Class Reference
+ Inheritance diagram for CleanUpTest:
+ Collaboration diagram for CleanUpTest:

Public Member Functions

 setUp ()
 
 tearDown ()
 
 testAscii ()
 
 testNull ()
 
 testLatin ()
 
 testLatinNormal ()
 
 XtestAllChars ()
 This test is very expensive! More...
 
 testAllBytes ()
 
 doTestBytes ($head, $tail)
 
 testDoubleBytes ()
 
 doTestDoubleBytes ($head, $tail)
 
 testTripleBytes ()
 
 doTestTripleBytes ($head, $tail)
 
 testChunkRegression ()
 
 testInterposeRegression ()
 
 testOverlongRegression ()
 
 testSurrogateRegression ()
 
 testBomRegression ()
 
 testForbiddenRegression ()
 
 testHangulRegression ()
 

Detailed Description

Definition at line 45 of file CleanUpTest.php.

Member Function Documentation

◆ doTestBytes()

CleanUpTest::doTestBytes (   $head,
  $tail 
)
Todo:
document

Definition at line 141 of file CleanUpTest.php.

References $i, $tail, $x, UtfNormal\cleanUp(), and UTF8_REPLACEMENT.

Referenced by testAllBytes().

142  {
143  for ($i = 0x0; $i < 256; $i++) {
144  $char = $head . chr($i) . $tail;
145  $clean = UtfNormal::cleanUp($char);
146  $x = sprintf("%02X", $i);
147  if ($i == 0x0009 ||
148  $i == 0x000a ||
149  $i == 0x000d ||
150  ($i > 0x001f && $i < 0x80)) {
151  $this->assertEquals(
152  bin2hex($char),
153  bin2hex($clean),
154  "ASCII byte $x should be intact"
155  );
156  if ($char != $clean) {
157  return;
158  }
159  } else {
160  $norm = $head . UTF8_REPLACEMENT . $tail;
161  $this->assertEquals(
162  bin2hex($norm),
163  bin2hex($clean),
164  "Forbidden byte $x should be rejected"
165  );
166  if ($norm != $clean) {
167  return;
168  }
169  }
170  }
171  }
static cleanUp($string)
The ultimate convenience function! Clean up invalid UTF-8 sequences, and convert to normal form C...
Definition: UtfNormal.php:125
$tail
Definition: tail.php:20
const UTF8_REPLACEMENT
Definition: UtfNormal.php:68
$i
Definition: disco.tpl.php:19
$x
Definition: complexTest.php:9
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ doTestDoubleBytes()

CleanUpTest::doTestDoubleBytes (   $head,
  $tail 
)
Todo:
document

Definition at line 185 of file CleanUpTest.php.

References $tail, $x, UtfNormal\cleanUp(), UtfNormal\NFC(), and UTF8_REPLACEMENT.

Referenced by testDoubleBytes().

186  {
187  for ($first = 0xc0; $first < 0x100; $first++) {
188  for ($second = 0x80; $second < 0x100; $second++) {
189  $char = $head . chr($first) . chr($second) . $tail;
190  $clean = UtfNormal::cleanUp($char);
191  $x = sprintf("%02X,%02X", $first, $second);
192  if ($first > 0xc1 &&
193  $first < 0xe0 &&
194  $second < 0xc0) {
195  $norm = UtfNormal::NFC($char);
196  $this->assertEquals(
197  bin2hex($norm),
198  bin2hex($clean),
199  "Pair $x should be intact"
200  );
201  if ($norm != $clean) {
202  return;
203  }
204  } elseif ($first > 0xfd || $second > 0xbf) {
205  # fe and ff are not legal head bytes -- expect two replacement chars
206  $norm = $head . UTF8_REPLACEMENT . UTF8_REPLACEMENT . $tail;
207  $this->assertEquals(
208  bin2hex($norm),
209  bin2hex($clean),
210  "Forbidden pair $x should be rejected"
211  );
212  if ($norm != $clean) {
213  return;
214  }
215  } else {
216  $norm = $head . UTF8_REPLACEMENT . $tail;
217  $this->assertEquals(
218  bin2hex($norm),
219  bin2hex($clean),
220  "Forbidden pair $x should be rejected"
221  );
222  if ($norm != $clean) {
223  return;
224  }
225  }
226  }
227  }
228  }
static cleanUp($string)
The ultimate convenience function! Clean up invalid UTF-8 sequences, and convert to normal form C...
Definition: UtfNormal.php:125
static NFC($string)
Definition: UtfNormal.php:517
$tail
Definition: tail.php:20
const UTF8_REPLACEMENT
Definition: UtfNormal.php:68
$x
Definition: complexTest.php:9
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ doTestTripleBytes()

CleanUpTest::doTestTripleBytes (   $head,
  $tail 
)
Todo:
document

Definition at line 240 of file CleanUpTest.php.

References $tail, $x, UtfNormal\cleanUp(), UtfNormal\NFC(), UTF8_REPLACEMENT, and UTF8_SURROGATE_FIRST.

Referenced by testTripleBytes().

241  {
242  for ($first = 0xc0; $first < 0x100; $first++) {
243  for ($second = 0x80; $second < 0x100; $second++) {
244  #for( $third = 0x80; $third < 0x100; $third++ ) {
245  for ($third = 0x80; $third < 0x81; $third++) {
246  $char = $head . chr($first) . chr($second) . chr($third) . $tail;
247  $clean = UtfNormal::cleanUp($char);
248  $x = sprintf("%02X,%02X,%02X", $first, $second, $third);
249  if ($first >= 0xe0 &&
250  $first < 0xf0 &&
251  $second < 0xc0 &&
252  $third < 0xc0) {
253  if ($first == 0xe0 && $second < 0xa0) {
254  $this->assertEquals(
255  bin2hex($head . UTF8_REPLACEMENT . $tail),
256  bin2hex($clean),
257  "Overlong triplet $x should be rejected"
258  );
259  } elseif ($first == 0xed &&
260  (chr($first) . chr($second) . chr($third)) >= UTF8_SURROGATE_FIRST) {
261  $this->assertEquals(
262  bin2hex($head . UTF8_REPLACEMENT . $tail),
263  bin2hex($clean),
264  "Surrogate triplet $x should be rejected"
265  );
266  } else {
267  $this->assertEquals(
268  bin2hex(UtfNormal::NFC($char)),
269  bin2hex($clean),
270  "Triplet $x should be intact"
271  );
272  }
273  } elseif ($first > 0xc1 && $first < 0xe0 && $second < 0xc0) {
274  $this->assertEquals(
275  bin2hex(UtfNormal::NFC($head . chr($first) . chr($second)) . UTF8_REPLACEMENT . $tail),
276  bin2hex($clean),
277  "Valid 2-byte $x + broken tail"
278  );
279  } elseif ($second > 0xc1 && $second < 0xe0 && $third < 0xc0) {
280  $this->assertEquals(
281  bin2hex($head . UTF8_REPLACEMENT . UtfNormal::NFC(chr($second) . chr($third) . $tail)),
282  bin2hex($clean),
283  "Broken head + valid 2-byte $x"
284  );
285  } elseif (($first > 0xfd || $second > 0xfd) &&
286  (($second > 0xbf && $third > 0xbf) ||
287  ($second < 0xc0 && $third < 0xc0) ||
288  ($second > 0xfd) ||
289  ($third > 0xfd))) {
290  # fe and ff are not legal head bytes -- expect three replacement chars
291  $this->assertEquals(
293  bin2hex($clean),
294  "Forbidden triplet $x should be rejected"
295  );
296  } elseif ($first > 0xc2 && $second < 0xc0 && $third < 0xc0) {
297  $this->assertEquals(
298  bin2hex($head . UTF8_REPLACEMENT . $tail),
299  bin2hex($clean),
300  "Forbidden triplet $x should be rejected"
301  );
302  } else {
303  $this->assertEquals(
304  bin2hex($head . UTF8_REPLACEMENT . UTF8_REPLACEMENT . $tail),
305  bin2hex($clean),
306  "Forbidden triplet $x should be rejected"
307  );
308  }
309  }
310  }
311  }
312  }
static cleanUp($string)
The ultimate convenience function! Clean up invalid UTF-8 sequences, and convert to normal form C...
Definition: UtfNormal.php:125
const UTF8_SURROGATE_FIRST
Definition: UtfNormal.php:65
static NFC($string)
Definition: UtfNormal.php:517
$tail
Definition: tail.php:20
const UTF8_REPLACEMENT
Definition: UtfNormal.php:68
$x
Definition: complexTest.php:9
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ setUp()

CleanUpTest::setUp ( )
Todo:
document

Definition at line 48 of file CleanUpTest.php.

49  {
50  }

◆ tearDown()

CleanUpTest::tearDown ( )
Todo:
document

Definition at line 53 of file CleanUpTest.php.

54  {
55  }

◆ testAllBytes()

CleanUpTest::testAllBytes ( )
Todo:
document

Definition at line 132 of file CleanUpTest.php.

References doTestBytes().

133  {
134  $this->doTestBytes('', '');
135  $this->doTestBytes('x', '');
136  $this->doTestBytes('', 'x');
137  $this->doTestBytes('x', 'x');
138  }
doTestBytes($head, $tail)
+ Here is the call graph for this function:

◆ testAscii()

CleanUpTest::testAscii ( )
Todo:
document

Definition at line 58 of file CleanUpTest.php.

References $text, and UtfNormal\cleanUp().

59  {
60  $text = 'This is plain ASCII text.';
61  $this->assertEquals($text, UtfNormal::cleanUp($text));
62  }
static cleanUp($string)
The ultimate convenience function! Clean up invalid UTF-8 sequences, and convert to normal form C...
Definition: UtfNormal.php:125
$text
Definition: errorreport.php:18
+ Here is the call graph for this function:

◆ testBomRegression()

CleanUpTest::testBomRegression ( )
Todo:
document

Definition at line 421 of file CleanUpTest.php.

References $text, and UtfNormal\cleanUp().

422  {
423  $text = "\xef\xbf\xbe" . # U+FFFE, illegal char
424  "\xb2" . # bad tail
425  "\xef" . # bad head
426  "\x59";
427  $expect = "\xef\xbf\xbd" .
428  "\xef\xbf\xbd" .
429  "\xef\xbf\xbd" .
430  "\x59";
431  $this->assertEquals(
432  bin2hex($expect),
433  bin2hex(UtfNormal::cleanUp($text))
434  );
435  }
static cleanUp($string)
The ultimate convenience function! Clean up invalid UTF-8 sequences, and convert to normal form C...
Definition: UtfNormal.php:125
$text
Definition: errorreport.php:18
+ Here is the call graph for this function:

◆ testChunkRegression()

CleanUpTest::testChunkRegression ( )
Todo:
document

Definition at line 315 of file CleanUpTest.php.

References $text, and UtfNormal\cleanUp().

316  {
317  # Check for regression against a chunking bug
318  $text = "\x46\x55\xb8" .
319  "\xdc\x96" .
320  "\xee" .
321  "\xe7" .
322  "\x44" .
323  "\xaa" .
324  "\x2f\x25";
325  $expect = "\x46\x55\xef\xbf\xbd" .
326  "\xdc\x96" .
327  "\xef\xbf\xbd" .
328  "\xef\xbf\xbd" .
329  "\x44" .
330  "\xef\xbf\xbd" .
331  "\x2f\x25";
332 
333  $this->assertEquals(
334  bin2hex($expect),
335  bin2hex(UtfNormal::cleanUp($text))
336  );
337  }
static cleanUp($string)
The ultimate convenience function! Clean up invalid UTF-8 sequences, and convert to normal form C...
Definition: UtfNormal.php:125
$text
Definition: errorreport.php:18
+ Here is the call graph for this function:

◆ testDoubleBytes()

CleanUpTest::testDoubleBytes ( )
Todo:
document

Definition at line 174 of file CleanUpTest.php.

References doTestDoubleBytes().

175  {
176  $this->doTestDoubleBytes('', '');
177  $this->doTestDoubleBytes('x', '');
178  $this->doTestDoubleBytes('', 'x');
179  $this->doTestDoubleBytes('x', 'x');
180  }
doTestDoubleBytes($head, $tail)
+ Here is the call graph for this function:

◆ testForbiddenRegression()

CleanUpTest::testForbiddenRegression ( )
Todo:
document

Definition at line 438 of file CleanUpTest.php.

References $text, and UtfNormal\cleanUp().

439  {
440  $text = "\xef\xbf\xbf"; # U+FFFF, illegal char
441  $expect = "\xef\xbf\xbd";
442  $this->assertEquals(
443  bin2hex($expect),
444  bin2hex(UtfNormal::cleanUp($text))
445  );
446  }
static cleanUp($string)
The ultimate convenience function! Clean up invalid UTF-8 sequences, and convert to normal form C...
Definition: UtfNormal.php:125
$text
Definition: errorreport.php:18
+ Here is the call graph for this function:

◆ testHangulRegression()

CleanUpTest::testHangulRegression ( )
Todo:
document

Definition at line 449 of file CleanUpTest.php.

References $text, and UtfNormal\cleanUp().

450  {
451  $text = "\xed\x9c\xaf" . # Hangul char
452  "\xe1\x87\x81"; # followed by another final jamo
453  $expect = $text; # Should *not* change.
454  $this->assertEquals(
455  bin2hex($expect),
456  bin2hex(UtfNormal::cleanUp($text))
457  );
458  }
static cleanUp($string)
The ultimate convenience function! Clean up invalid UTF-8 sequences, and convert to normal form C...
Definition: UtfNormal.php:125
$text
Definition: errorreport.php:18
+ Here is the call graph for this function:

◆ testInterposeRegression()

CleanUpTest::testInterposeRegression ( )
Todo:
document

Definition at line 340 of file CleanUpTest.php.

References $text, and UtfNormal\cleanUp().

341  {
342  $text = "\x4e\x30" .
343  "\xb1" . # bad tail
344  "\x3a" .
345  "\x92" . # bad tail
346  "\x62\x3a" .
347  "\x84" . # bad tail
348  "\x43" .
349  "\xc6" . # bad head
350  "\x3f" .
351  "\x92" . # bad tail
352  "\xad" . # bad tail
353  "\x7d" .
354  "\xd9\x95";
355 
356  $expect = "\x4e\x30" .
357  "\xef\xbf\xbd" .
358  "\x3a" .
359  "\xef\xbf\xbd" .
360  "\x62\x3a" .
361  "\xef\xbf\xbd" .
362  "\x43" .
363  "\xef\xbf\xbd" .
364  "\x3f" .
365  "\xef\xbf\xbd" .
366  "\xef\xbf\xbd" .
367  "\x7d" .
368  "\xd9\x95";
369 
370  $this->assertEquals(
371  bin2hex($expect),
372  bin2hex(UtfNormal::cleanUp($text))
373  );
374  }
static cleanUp($string)
The ultimate convenience function! Clean up invalid UTF-8 sequences, and convert to normal form C...
Definition: UtfNormal.php:125
$text
Definition: errorreport.php:18
+ Here is the call graph for this function:

◆ testLatin()

CleanUpTest::testLatin ( )
Todo:
document

Definition at line 76 of file CleanUpTest.php.

References $text, and UtfNormal\cleanUp().

77  {
78  $text = "L'\xc3\xa9cole";
79  $this->assertEquals($text, UtfNormal::cleanUp($text));
80  }
static cleanUp($string)
The ultimate convenience function! Clean up invalid UTF-8 sequences, and convert to normal form C...
Definition: UtfNormal.php:125
$text
Definition: errorreport.php:18
+ Here is the call graph for this function:

◆ testLatinNormal()

CleanUpTest::testLatinNormal ( )
Todo:
document

Definition at line 83 of file CleanUpTest.php.

References $text, and UtfNormal\cleanUp().

84  {
85  $text = "L'e\xcc\x81cole";
86  $expect = "L'\xc3\xa9cole";
87  $this->assertEquals($expect, UtfNormal::cleanUp($text));
88  }
static cleanUp($string)
The ultimate convenience function! Clean up invalid UTF-8 sequences, and convert to normal form C...
Definition: UtfNormal.php:125
$text
Definition: errorreport.php:18
+ Here is the call graph for this function:

◆ testNull()

CleanUpTest::testNull ( )
Todo:
document

Definition at line 65 of file CleanUpTest.php.

References $text, and UtfNormal\cleanUp().

66  {
67  $text = "a \x00 null";
68  $expect = "a \xef\xbf\xbd null";
69  $this->assertEquals(
70  bin2hex($expect),
71  bin2hex(UtfNormal::cleanUp($text))
72  );
73  }
static cleanUp($string)
The ultimate convenience function! Clean up invalid UTF-8 sequences, and convert to normal form C...
Definition: UtfNormal.php:125
$text
Definition: errorreport.php:18
+ Here is the call graph for this function:

◆ testOverlongRegression()

CleanUpTest::testOverlongRegression ( )
Todo:
document

Definition at line 377 of file CleanUpTest.php.

References $text, and UtfNormal\cleanUp().

378  {
379  $text = "\x67" .
380  "\x1a" . # forbidden ascii
381  "\xea" . # bad head
382  "\xc1\xa6" . # overlong sequence
383  "\xad" . # bad tail
384  "\x1c" . # forbidden ascii
385  "\xb0" . # bad tail
386  "\x3c" .
387  "\x9e"; # bad tail
388  $expect = "\x67" .
389  "\xef\xbf\xbd" .
390  "\xef\xbf\xbd" .
391  "\xef\xbf\xbd" .
392  "\xef\xbf\xbd" .
393  "\xef\xbf\xbd" .
394  "\xef\xbf\xbd" .
395  "\x3c" .
396  "\xef\xbf\xbd";
397  $this->assertEquals(
398  bin2hex($expect),
399  bin2hex(UtfNormal::cleanUp($text))
400  );
401  }
static cleanUp($string)
The ultimate convenience function! Clean up invalid UTF-8 sequences, and convert to normal form C...
Definition: UtfNormal.php:125
$text
Definition: errorreport.php:18
+ Here is the call graph for this function:

◆ testSurrogateRegression()

CleanUpTest::testSurrogateRegression ( )
Todo:
document

Definition at line 404 of file CleanUpTest.php.

References $text, and UtfNormal\cleanUp().

405  {
406  $text = "\xed\xb4\x96" . # surrogate 0xDD16
407  "\x83" . # bad tail
408  "\xb4" . # bad tail
409  "\xac"; # bad head
410  $expect = "\xef\xbf\xbd" .
411  "\xef\xbf\xbd" .
412  "\xef\xbf\xbd" .
413  "\xef\xbf\xbd";
414  $this->assertEquals(
415  bin2hex($expect),
416  bin2hex(UtfNormal::cleanUp($text))
417  );
418  }
static cleanUp($string)
The ultimate convenience function! Clean up invalid UTF-8 sequences, and convert to normal form C...
Definition: UtfNormal.php:125
$text
Definition: errorreport.php:18
+ Here is the call graph for this function:

◆ testTripleBytes()

CleanUpTest::testTripleBytes ( )
Todo:
document

Definition at line 231 of file CleanUpTest.php.

References doTestTripleBytes().

232  {
233  $this->doTestTripleBytes('', '');
234  $this->doTestTripleBytes('x', '');
235  $this->doTestTripleBytes('', 'x');
236  $this->doTestTripleBytes('x', 'x');
237  }
doTestTripleBytes($head, $tail)
+ Here is the call graph for this function:

◆ XtestAllChars()

CleanUpTest::XtestAllChars ( )

This test is very expensive!

Todo:
document

Definition at line 94 of file CleanUpTest.php.

References $i, $utfCanonicalComp, $utfCanonicalDecomp, $x, UtfNormal\cleanUp(), codepointToUtf8(), UtfNormal\NFC(), UNICODE_MAX, UNICODE_SURROGATE_FIRST, UNICODE_SURROGATE_LAST, and UTF8_REPLACEMENT.

95  {
96  $rep = UTF8_REPLACEMENT;
98  for ($i = 0x0; $i < UNICODE_MAX; $i++) {
99  $char = codepointToUtf8($i);
100  $clean = UtfNormal::cleanUp($char);
101  $x = sprintf("%04X", $i);
102  if ($i % 0x1000 == 0) {
103  echo "U+$x\n";
104  }
105  if ($i == 0x0009 ||
106  $i == 0x000a ||
107  $i == 0x000d ||
108  ($i > 0x001f && $i < UNICODE_SURROGATE_FIRST) ||
109  ($i > UNICODE_SURROGATE_LAST && $i < 0xfffe) ||
110  ($i > 0xffff && $i <= UNICODE_MAX)) {
111  if (isset($utfCanonicalComp[$char]) || isset($utfCanonicalDecomp[$char])) {
112  $comp = UtfNormal::NFC($char);
113  $this->assertEquals(
114  bin2hex($comp),
115  bin2hex($clean),
116  "U+$x should be decomposed"
117  );
118  } else {
119  $this->assertEquals(
120  bin2hex($char),
121  bin2hex($clean),
122  "U+$x should be intact"
123  );
124  }
125  } else {
126  $this->assertEquals(bin2hex($rep), bin2hex($clean), $x);
127  }
128  }
129  }
static cleanUp($string)
The ultimate convenience function! Clean up invalid UTF-8 sequences, and convert to normal form C...
Definition: UtfNormal.php:125
const UNICODE_SURROGATE_LAST
Definition: UtfNormal.php:49
global $utfCanonicalDecomp
Definition: UtfNormal.php:21
const UNICODE_MAX
Definition: UtfNormal.php:50
static NFC($string)
Definition: UtfNormal.php:517
const UNICODE_SURROGATE_FIRST
Definition: UtfNormal.php:48
const UTF8_REPLACEMENT
Definition: UtfNormal.php:68
global $utfCanonicalComp
Definition: UtfNormal.php:21
$i
Definition: disco.tpl.php:19
codepointToUtf8($codepoint)
Return UTF-8 sequence for a given Unicode code point.
$x
Definition: complexTest.php:9
+ Here is the call graph for this function:

The documentation for this class was generated from the following file: