2# Copyright (C) 2004 Brion Vibber <brion@pobox.com> 
    5# This program is free software; you can redistribute it and/or modify 
    6# it under the terms of the GNU General Public License as published by 
    7# the Free Software Foundation; either version 2 of the License, or 
    8# (at your option) any later version. 
   10# This program is distributed in the hope that it will be useful, 
   11# but WITHOUT ANY WARRANTY; without even the implied warranty of 
   12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 
   13# GNU General Public License for more details. 
   15# You should have received a copy of the GNU General Public License along 
   16# with this program; if not, write to the Free Software Foundation, Inc., 
   17# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 
   29if( php_sapi_name() != 
'cli' ) {
 
   30        die( 
"Run me from the command line please.\n" );
 
   34require_once( 
'include/Unicode/UtfNormal.php' );
 
   35require_once( 
'../DifferenceEngine.php' );
 
   37dl(
'php_utfnormal.so' );
 
   43        for( $i = 0; $i < $length; $i++ )
 
   44                $out .= chr( mt_rand( $nullOk ? 0 : 1, $ascii ? 127 : 255 ) );
 
   50        # We exclude a few chars that ICU would not. 
   51        $str = preg_replace( 
'/[\x00-\x08\x0b\x0c\x0e-\x1f]/', 
UTF8_REPLACEMENT, $str );
 
   55        # UnicodeString constructor fails if the string ends with a head byte. 
   56        # Add a junk char at the end, we'll strip it off 
   57        return rtrim( utf8_normalize( $str . 
"\x01", 
UNORM_NFC ), 
"\x01" );
 
   65        $ota = explode( 
"\n", str_replace( 
"\r\n", 
"\n", $a ) );
 
   66        $nta = explode( 
"\n", str_replace( 
"\r\n", 
"\n", $b ) );
 
   68        $diffs = 
new Diff( $ota, $nta );
 
   70        $funky = $formatter->format( $diffs );
 
   72        preg_match_all( 
'/<(?:ins|del) class="diffchange">(.*?)<\/(?:ins|del)>/', $funky, $matches );
 
   73        foreach( $matches[1] as $bit ) {
 
   74                $hex = bin2hex( $bit );
 
   89        echo strlen( $clean ) . 
", " . strlen( $norm );
 
   90        if( $clean == $norm ) {
 
   94                echo 
"\traw: " . bin2hex( $str ) . 
"\n" .
 
   95                         "\tphp: " . bin2hex( $clean ) . 
"\n" .
 
   96                         "\ticu: " . bin2hex( $norm ) . 
"\n";
 
randomString( $length, $nullOk, $ascii=false)
An exception for terminatinating execution or to throw for unit testing.
static cleanUp( $string)
The ultimate convenience function! Clean up invalid UTF-8 sequences, and convert to normal form C,...