ILIAS  release_5-4 Revision v5.4.26-12-gabc799a52e6
UtfNormalUtil.php
Go to the documentation of this file.
1 <?php
2 # Copyright (C) 2004 Brion Vibber <brion@pobox.com>
3 # http://www.mediawiki.org/
4 #
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 2 of the License, or
8 # (at your option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License along
16 # with this program; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 # http://www.gnu.org/copyleft/gpl.html
19 
38 function codepointToUtf8($codepoint)
39 {
40  if ($codepoint < 0x80) {
41  return chr($codepoint);
42  }
43  if ($codepoint < 0x800) {
44  return chr($codepoint >> 6 & 0x3f | 0xc0) .
45  chr($codepoint & 0x3f | 0x80);
46  }
47  if ($codepoint < 0x10000) {
48  return chr($codepoint >> 12 & 0x0f | 0xe0) .
49  chr($codepoint >> 6 & 0x3f | 0x80) .
50  chr($codepoint & 0x3f | 0x80);
51  }
52  if ($codepoint < 0x110000) {
53  return chr($codepoint >> 18 & 0x07 | 0xf0) .
54  chr($codepoint >> 12 & 0x3f | 0x80) .
55  chr($codepoint >> 6 & 0x3f | 0x80) .
56  chr($codepoint & 0x3f | 0x80);
57  }
58 
59  echo "Asked for code outside of range ($codepoint)\n";
60  die(-1);
61 }
62 
72 function hexSequenceToUtf8($sequence)
73 {
74  $utf = '';
75  foreach (explode(' ', $sequence) as $hex) {
76  $n = hexdec($hex);
77  $utf .= codepointToUtf8($n);
78  }
79  return $utf;
80 }
81 
90 function utf8ToHexSequence($str)
91 {
92  return rtrim(preg_replace_callback(
93  '/(.)/uS',
94  function ($hit) {
95  return sprintf("%04x ", utf8ToCodepoint($hit[1]));
96  },
97  $str
98  ));
99 }
100 
109 function utf8ToCodepoint($char)
110 {
111  # Find the length
112  $z = ord($char[0]);
113  if ($z & 0x80) {
114  $length = 0;
115  while ($z & 0x80) {
116  $length++;
117  $z <<= 1;
118  }
119  } else {
120  $length = 1;
121  }
122 
123  if ($length != strlen($char)) {
124  return false;
125  }
126  if ($length == 1) {
127  return ord($char);
128  }
129 
130  # Mask off the length-determining bits and shift back to the original location
131  $z &= 0xff;
132  $z >>= $length;
133 
134  # Add in the free bits from subsequent bytes
135  for ($i = 1; $i < $length; $i++) {
136  $z <<= 6;
137  $z |= ord($char[$i]) & 0x3f;
138  }
139 
140  return $z;
141 }
142 
150 function escapeSingleString($string)
151 {
152  return strtr(
153  $string,
154  array(
155  '\\' => '\\\\',
156  '\'' => '\\\''
157  )
158  );
159 }
utf8ToCodepoint($char)
Determine the Unicode codepoint of a single-character UTF-8 sequence.
utf8ToHexSequence($str)
Take a UTF-8 string and return a space-separated series of hex numbers representing Unicode code poin...
$n
Definition: RandomTest.php:85
escapeSingleString($string)
Escape a string for inclusion in a PHP single-quoted string literal.
$i
Definition: disco.tpl.php:19
codepointToUtf8($codepoint)
Return UTF-8 sequence for a given Unicode code point.
hexSequenceToUtf8($sequence)
Take a series of space-separated hexadecimal numbers representing Unicode code points and return a UT...