ILIAS  release_5-4 Revision v5.4.26-12-gabc799a52e6
CodePage.php
Go to the documentation of this file.
1 <?php
2 
4 
6 
7 class CodePage
8 {
9  public const DEFAULT_CODE_PAGE = 'CP1252';
10 
11  private static $pageArray = [
12  0 => 'CP1252', // CodePage is not always correctly set when the xls file was saved by Apple's Numbers program
13  367 => 'ASCII', // ASCII
14  437 => 'CP437', // OEM US
15  //720 => 'notsupported', // OEM Arabic
16  737 => 'CP737', // OEM Greek
17  775 => 'CP775', // OEM Baltic
18  850 => 'CP850', // OEM Latin I
19  852 => 'CP852', // OEM Latin II (Central European)
20  855 => 'CP855', // OEM Cyrillic
21  857 => 'CP857', // OEM Turkish
22  858 => 'CP858', // OEM Multilingual Latin I with Euro
23  860 => 'CP860', // OEM Portugese
24  861 => 'CP861', // OEM Icelandic
25  862 => 'CP862', // OEM Hebrew
26  863 => 'CP863', // OEM Canadian (French)
27  864 => 'CP864', // OEM Arabic
28  865 => 'CP865', // OEM Nordic
29  866 => 'CP866', // OEM Cyrillic (Russian)
30  869 => 'CP869', // OEM Greek (Modern)
31  874 => 'CP874', // ANSI Thai
32  932 => 'CP932', // ANSI Japanese Shift-JIS
33  936 => 'CP936', // ANSI Chinese Simplified GBK
34  949 => 'CP949', // ANSI Korean (Wansung)
35  950 => 'CP950', // ANSI Chinese Traditional BIG5
36  1200 => 'UTF-16LE', // UTF-16 (BIFF8)
37  1250 => 'CP1250', // ANSI Latin II (Central European)
38  1251 => 'CP1251', // ANSI Cyrillic
39  1252 => 'CP1252', // ANSI Latin I (BIFF4-BIFF7)
40  1253 => 'CP1253', // ANSI Greek
41  1254 => 'CP1254', // ANSI Turkish
42  1255 => 'CP1255', // ANSI Hebrew
43  1256 => 'CP1256', // ANSI Arabic
44  1257 => 'CP1257', // ANSI Baltic
45  1258 => 'CP1258', // ANSI Vietnamese
46  1361 => 'CP1361', // ANSI Korean (Johab)
47  10000 => 'MAC', // Apple Roman
48  10001 => 'CP932', // Macintosh Japanese
49  10002 => 'CP950', // Macintosh Chinese Traditional
50  10003 => 'CP1361', // Macintosh Korean
51  10004 => 'MACARABIC', // Apple Arabic
52  10005 => 'MACHEBREW', // Apple Hebrew
53  10006 => 'MACGREEK', // Macintosh Greek
54  10007 => 'MACCYRILLIC', // Macintosh Cyrillic
55  10008 => 'CP936', // Macintosh - Simplified Chinese (GB 2312)
56  10010 => 'MACROMANIA', // Macintosh Romania
57  10017 => 'MACUKRAINE', // Macintosh Ukraine
58  10021 => 'MACTHAI', // Macintosh Thai
59  10029 => 'MACCENTRALEUROPE', // Macintosh Central Europe
60  10079 => 'MACICELAND', // Macintosh Icelandic
61  10081 => 'MACTURKISH', // Macintosh Turkish
62  10082 => 'MACCROATIAN', // Macintosh Croatian
63  21010 => 'UTF-16LE', // UTF-16 (BIFF8) This isn't correct, but some Excel writer libraries erroneously use Codepage 21010 for UTF-16LE
64  32768 => 'MAC', // Apple Roman
65  //32769 => 'unsupported', // ANSI Latin I (BIFF2-BIFF3)
66  65000 => 'UTF-7', // Unicode (UTF-7)
67  65001 => 'UTF-8', // Unicode (UTF-8)
68  ];
69 
70  public static function validate(string $codePage): bool
71  {
72  return in_array($codePage, self::$pageArray, true);
73  }
74 
83  public static function numberToName(int $codePage): string
84  {
85  if (array_key_exists($codePage, self::$pageArray)) {
86  return self::$pageArray[$codePage];
87  }
88  if ($codePage == 720 || $codePage == 32769) {
89  throw new PhpSpreadsheetException("Code page $codePage not supported."); // OEM Arabic
90  }
91 
92  throw new PhpSpreadsheetException('Unknown codepage: ' . $codePage);
93  }
94 
95  public static function getEncodings(): array
96  {
97  return self::$pageArray;
98  }
99 }
static static validate(string $codePage)
Definition: CodePage.php:70
static numberToName(int $codePage)
Convert Microsoft Code Page Identifier to Code Page Name which iconv and mbstring understands...
Definition: CodePage.php:83