2# Copyright (C) 2004 Brion Vibber <brion@pobox.com> 
    5# This program is free software; you can redistribute it and/or modify 
    6# it under the terms of the GNU General Public License as published by 
    7# the Free Software Foundation; either version 2 of the License, or 
    8# (at your option) any later version. 
   10# This program is distributed in the hope that it will be useful, 
   11# but WITHOUT ANY WARRANTY; without even the implied warranty of 
   12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 
   13# GNU General Public License for more details. 
   15# You should have received a copy of the GNU General Public License along 
   16# with this program; if not, write to the Free Software Foundation, Inc., 
   17# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 
   30if( php_sapi_name() != 
'cli' ) {
 
   31        die( 
"Run me from the command line please.\n" );
 
   34require_once 
'include/Unicode/UtfNormalUtil.php';
 
   36$in = fopen(
"DerivedNormalizationProps.txt", 
"rt" );
 
   38        print "Can't open DerivedNormalizationProps.txt for reading.\n";
 
   39        print "If necessary, fetch this file from the internet:\n";
 
   40        print "http://www.unicode.org/Public/UNIDATA/CompositionExclusions.txt\n";
 
   43print "Initializing normalization quick check tables...\n";
 
   45while( 
false !== ($line = fgets( 
$in ) ) ) {
 
   47        if( preg_match( 
'/^([0-9A-F]+)(?:..([0-9A-F]+))?\s*;\s*(NFC_QC)\s*;\s*([MN])/', $line, $matches ) ) {
 
   48                list( $junk, $first, $last, $prop, $value ) = $matches;
 
   49                #print "$first $last $prop $value\n";
 
   50                if( !$last ) $last = $first;
 
   51                for( $i = hexdec( $first ); $i <= hexdec( $last ); $i++) {
 
   59$in = fopen(
"CompositionExclusions.txt", 
"rt" );
 
   61        print "Can't open CompositionExclusions.txt for reading.\n";
 
   62        print "If necessary, fetch this file from the internet:\n";
 
   63        print "http://www.unicode.org/Public/UNIDATA/CompositionExclusions.txt\n";
 
   67while( 
false !== ($line = fgets( 
$in ) ) ) {
 
   68        if( preg_match( 
'/^([0-9A-F]+)/i', $line, $matches ) ) {
 
   69                $codepoint = $matches[1];
 
   76$in = fopen(
"UnicodeData.txt", 
"rt" );
 
   78        print "Can't open UnicodeData.txt for reading.\n";
 
   79        print "If necessary, fetch this file from the internet:\n";
 
   80        print "http://www.unicode.org/Public/UNIDATA/UnicodeData.txt\n";
 
   92print "Reading character definitions...\n";
 
   93while( 
false !== ($line = fgets( 
$in ) ) ) {
 
   97        $canonicalCombiningClass = 
$columns[3];
 
  102        if( $canonicalCombiningClass != 0 ) {
 
  106        if( $decompositionMapping === 
'' ) 
continue;
 
  107        if( preg_match( 
'/^<(.+)> (.*)$/', $decompositionMapping, $matches ) ) {
 
  108                # Compatibility decomposition 
  110                $decompositionMapping = $matches[2];
 
  126        #print "$codepoint | $canonicalCombiningClasses | $decompositionMapping\n";
 
  130print "Recursively expanding canonical mappings...\n";
 
  134        print "pass $pass\n";
 
  137                $newDest = preg_replace_callback(
 
  138                        '/([\xc0-\xff][\x80-\xbf]+)/',
 
  141                if( $newDest === $dest ) 
continue;
 
  148print "Recursively expanding compatibility mappings...\n";
 
  152        print "pass $pass\n";
 
  155                $newDest = preg_replace_callback(
 
  156                        '/([\xc0-\xff][\x80-\xbf]+)/',
 
  159                if( $newDest === $dest ) 
continue;
 
  166print "$total decomposition mappings ($canon canonical, $compat compatibility)\n";
 
  168$out = fopen(
"UtfNormalData.inc", 
"wt");
 
  174        $outdata = 
"<" . 
"?php 
  180global \$utfCombiningClass, \$utfCanonicalComp, \$utfCanonicalDecomp, \$utfCheckNFC; 
  181\$utfCombiningClass = unserialize( '$serCombining' ); 
  182\$utfCanonicalComp = unserialize( '$serComp' ); 
  183\$utfCanonicalDecomp = unserialize( '$serCanon' ); 
  184\$utfCheckNFC = unserialize( '$serCheckNFC' ); 
  186        fputs( 
$out, $outdata );
 
  188        print "Wrote out UtfNormalData.inc\n";
 
  190        print "Can't create file UtfNormalData.inc\n";
 
  195$out = fopen(
"UtfNormalDataK.inc", 
"wt");
 
  198        $outdata = 
"<" . 
"?php 
  204global \$utfCompatibilityDecomp; 
  205\$utfCompatibilityDecomp = unserialize( '$serCompat' ); 
  207        fputs( 
$out, $outdata );
 
  209        print "Wrote out UtfNormalDataK.inc\n";
 
  212        print "Can't create file UtfNormalDataK.inc\n";
 
callbackCanonical( $matches)
callbackCompat( $matches)
if(! $in) $compatibilityDecomp
escapeSingleString( $string)
Escape a string for inclusion in a PHP single-quoted string literal.
hexSequenceToUtf8( $sequence)
Take a series of space-separated hexadecimal numbers representing Unicode code points and return a UT...
codepointToUtf8( $codepoint)
Return UTF-8 sequence for a given Unicode code point.
An exception for terminatinating execution or to throw for unit testing.