2# Copyright (C) 2004 Brion Vibber <brion@pobox.com> 
    5# This program is free software; you can redistribute it and/or modify 
    6# it under the terms of the GNU General Public License as published by 
    7# the Free Software Foundation; either version 2 of the License, or 
    8# (at your option) any later version. 
   10# This program is distributed in the hope that it will be useful, 
   11# but WITHOUT ANY WARRANTY; without even the implied warranty of 
   12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 
   13# GNU General Public License for more details. 
   15# You should have received a copy of the GNU General Public License along 
   16# with this program; if not, write to the Free Software Foundation, Inc., 
   17# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 
   29if (php_sapi_name() != 
'cli') {
 
   30    die(
"Run me from the command line please.\n");
 
   34require_once(
'include/Unicode/UtfNormal.php');
 
   35require_once(
'../DifferenceEngine.php');
 
   37dl(
'php_utfnormal.so');
 
   44    for (
$i = 0; 
$i < $length; 
$i++) {
 
   45        $out .= chr(mt_rand($nullOk ? 0 : 1, $ascii ? 127 : 255));
 
   53    # We exclude a few chars that ICU would not. 
   54    $str = preg_replace(
'/[\x00-\x08\x0b\x0c\x0e-\x1f]/', 
UTF8_REPLACEMENT, $str);
 
   58    # UnicodeString constructor fails if the string ends with a head byte. 
   59    # Add a junk char at the end, we'll strip it off 
   60    return rtrim(utf8_normalize($str . 
"\x01", 
UNORM_NFC), 
"\x01");
 
   70    $ota = explode(
"\n", str_replace(
"\r\n", 
"\n", $a));
 
   71    $nta = explode(
"\n", str_replace(
"\r\n", 
"\n", $b));
 
   73    $diffs = 
new Diff($ota, $nta);
 
   75    $funky = $formatter->format($diffs);
 
   77    preg_match_all(
'/<(?:ins|del) class="diffchange">(.*?)<\/(?:ins|del)>/', $funky, $matches);
 
   78    foreach ($matches[1] as $bit) {
 
   94    echo strlen($clean) . 
", " . strlen($norm);
 
   95    if ($clean == $norm) {
 
   99        echo 
"\traw: " . bin2hex($str) . 
"\n" .
 
  100             "\tphp: " . bin2hex($clean) . 
"\n" .
 
  101             "\ticu: " . bin2hex($norm) . 
"\n";
 
randomString($length, $nullOk, $ascii=false)
An exception for terminatinating execution or to throw for unit testing.
static cleanUp($string)
The ultimate convenience function! Clean up invalid UTF-8 sequences, and convert to normal form C,...