2# Copyright (C) 2004 Brion Vibber <brion@pobox.com> 
    5# This program is free software; you can redistribute it and/or modify 
    6# it under the terms of the GNU General Public License as published by 
    7# the Free Software Foundation; either version 2 of the License, or 
    8# (at your option) any later version. 
   10# This program is distributed in the hope that it will be useful, 
   11# but WITHOUT ANY WARRANTY; without even the implied warranty of 
   12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 
   13# GNU General Public License for more details. 
   15# You should have received a copy of the GNU General Public License along 
   16# with this program; if not, write to the Free Software Foundation, Inc., 
   17# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 
   30if (php_sapi_name() != 
'cli') {
 
   31    die(
"Run me from the command line please.\n");
 
   34require_once 
'include/Unicode/UtfNormalUtil.php';
 
   36$in = fopen(
"DerivedNormalizationProps.txt", 
"rt");
 
   38    print "Can't open DerivedNormalizationProps.txt for reading.\n";
 
   39    print "If necessary, fetch this file from the internet:\n";
 
   40    print "http://www.unicode.org/Public/UNIDATA/CompositionExclusions.txt\n";
 
   43print "Initializing normalization quick check tables...\n";
 
   45while (
false !== ($line = fgets(
$in))) {
 
   47    if (preg_match(
'/^([0-9A-F]+)(?:..([0-9A-F]+))?\s*;\s*(NFC_QC)\s*;\s*([MN])/', $line, $matches)) {
 
   48        list($junk, $first, $last, $prop, $value) = $matches;
 
   49        #print "$first $last $prop $value\n";
 
   53        for (
$i = hexdec($first); 
$i <= hexdec($last); 
$i++) {
 
   61$in = fopen(
"CompositionExclusions.txt", 
"rt");
 
   63    print "Can't open CompositionExclusions.txt for reading.\n";
 
   64    print "If necessary, fetch this file from the internet:\n";
 
   65    print "http://www.unicode.org/Public/UNIDATA/CompositionExclusions.txt\n";
 
   69while (
false !== ($line = fgets(
$in))) {
 
   70    if (preg_match(
'/^([0-9A-F]+)/i', $line, $matches)) {
 
   71        $codepoint = $matches[1];
 
   78$in = fopen(
"UnicodeData.txt", 
"rt");
 
   80    print "Can't open UnicodeData.txt for reading.\n";
 
   81    print "If necessary, fetch this file from the internet:\n";
 
   82    print "http://www.unicode.org/Public/UNIDATA/UnicodeData.txt\n";
 
   94print "Reading character definitions...\n";
 
   95while (
false !== ($line = fgets(
$in))) {
 
   99    $canonicalCombiningClass = 
$columns[3];
 
  100    $decompositionMapping = 
$columns[5];
 
  104    if ($canonicalCombiningClass != 0) {
 
  108    if ($decompositionMapping === 
'') {
 
  111    if (preg_match(
'/^<(.+)> (.*)$/', $decompositionMapping, $matches)) {
 
  112        # Compatibility decomposition 
  114        $decompositionMapping = $matches[2];
 
  130    #print "$codepoint | $canonicalCombiningClasses | $decompositionMapping\n";
 
  134print "Recursively expanding canonical mappings...\n";
 
  138    print "pass $pass\n";
 
  141        $newDest = preg_replace_callback(
 
  142            '/([\xc0-\xff][\x80-\xbf]+)/',
 
  146        if ($newDest === $dest) {
 
  155print "Recursively expanding compatibility mappings...\n";
 
  159    print "pass $pass\n";
 
  162        $newDest = preg_replace_callback(
 
  163            '/([\xc0-\xff][\x80-\xbf]+)/',
 
  167        if ($newDest === $dest) {
 
  176print "$total decomposition mappings ($canon canonical, $compat compatibility)\n";
 
  178$out = fopen(
"UtfNormalData.inc", 
"wt");
 
  184    $outdata = 
"<" . 
"?php 
  190global \$utfCombiningClass, \$utfCanonicalComp, \$utfCanonicalDecomp, \$utfCheckNFC; 
  191\$utfCombiningClass = unserialize( '$serCombining' ); 
  192\$utfCanonicalComp = unserialize( '$serComp' ); 
  193\$utfCanonicalDecomp = unserialize( '$serCanon' ); 
  194\$utfCheckNFC = unserialize( '$serCheckNFC' ); 
  196    fputs(
$out, $outdata);
 
  198    print "Wrote out UtfNormalData.inc\n";
 
  200    print "Can't create file UtfNormalData.inc\n";
 
  205$out = fopen(
"UtfNormalDataK.inc", 
"wt");
 
  208    $outdata = 
"<" . 
"?php 
  214global \$utfCompatibilityDecomp; 
  215\$utfCompatibilityDecomp = unserialize( '$serCompat' ); 
  217    fputs(
$out, $outdata);
 
  219    print "Wrote out UtfNormalDataK.inc\n";
 
  222    print "Can't create file UtfNormalDataK.inc\n";
 
callbackCanonical($matches)
if(! $in) $compatibilityDecomp
escapeSingleString($string)
Escape a string for inclusion in a PHP single-quoted string literal.
codepointToUtf8($codepoint)
Return UTF-8 sequence for a given Unicode code point.
hexSequenceToUtf8($sequence)
Take a series of space-separated hexadecimal numbers representing Unicode code points and return a UT...
An exception for terminatinating execution or to throw for unit testing.