ILIAS  release_5-4 Revision v5.4.26-12-gabc799a52e6
XmlScanner.php
Go to the documentation of this file.
1 <?php
2 
4 
7 
8 class XmlScanner
9 {
15  private $pattern;
16 
17  private $callback;
18 
20 
24  private static $shutdownRegistered = false;
25 
26  public function __construct($pattern = '<!DOCTYPE')
27  {
28  $this->pattern = $pattern;
29 
30  $this->disableEntityLoaderCheck();
31 
32  // A fatal error will bypass the destructor, so we register a shutdown here
33  if (!self::$shutdownRegistered) {
34  self::$shutdownRegistered = true;
35  register_shutdown_function([__CLASS__, 'shutdown']);
36  }
37  }
38 
39  public static function getInstance(Reader\IReader $reader)
40  {
41  switch (true) {
42  case $reader instanceof Reader\Html:
43  return new self('<!ENTITY');
44  case $reader instanceof Reader\Xlsx:
45  case $reader instanceof Reader\Xml:
46  case $reader instanceof Reader\Ods:
47  case $reader instanceof Reader\Gnumeric:
48  return new self('<!DOCTYPE');
49  default:
50  return new self('<!DOCTYPE');
51  }
52  }
53 
55  {
56  if (PHP_MAJOR_VERSION == 7) {
57  switch (PHP_MINOR_VERSION) {
58  case 2:
59  return PHP_RELEASE_VERSION >= 1;
60  case 1:
61  return PHP_RELEASE_VERSION >= 13;
62  case 0:
63  return PHP_RELEASE_VERSION >= 27;
64  }
65 
66  return true;
67  }
68 
69  return false;
70  }
71 
72  private function disableEntityLoaderCheck(): void
73  {
74  if (Settings::getLibXmlDisableEntityLoader() && \PHP_VERSION_ID < 80000) {
75  $libxmlDisableEntityLoaderValue = libxml_disable_entity_loader(true);
76 
77  if (self::$libxmlDisableEntityLoaderValue === null) {
78  self::$libxmlDisableEntityLoaderValue = $libxmlDisableEntityLoaderValue;
79  }
80  }
81  }
82 
83  public static function shutdown(): void
84  {
85  if (self::$libxmlDisableEntityLoaderValue !== null && \PHP_VERSION_ID < 80000) {
86  libxml_disable_entity_loader(self::$libxmlDisableEntityLoaderValue);
87  self::$libxmlDisableEntityLoaderValue = null;
88  }
89  }
90 
91  public function __destruct()
92  {
93  self::shutdown();
94  }
95 
96  public function setAdditionalCallback(callable $callback): void
97  {
98  $this->callback = $callback;
99  }
100 
101  private function toUtf8($xml)
102  {
103  $pattern = '/encoding="(.*?)"/';
104  $result = preg_match($pattern, $xml, $matches);
105  $charset = strtoupper($result ? $matches[1] : 'UTF-8');
106 
107  if ($charset !== 'UTF-8') {
108  $xml = mb_convert_encoding($xml, 'UTF-8', $charset);
109 
110  $result = preg_match($pattern, $xml, $matches);
111  $charset = strtoupper($result ? $matches[1] : 'UTF-8');
112  if ($charset !== 'UTF-8') {
113  throw new Reader\Exception('Suspicious Double-encoded XML, spreadsheet file load() aborted to prevent XXE/XEE attacks');
114  }
115  }
116 
117  return $xml;
118  }
119 
127  public function scan($xml)
128  {
129  $this->disableEntityLoaderCheck();
130 
131  $xml = $this->toUtf8($xml);
132 
133  // Don't rely purely on libxml_disable_entity_loader()
134  $pattern = '/\\0?' . implode('\\0?', str_split($this->pattern)) . '\\0?/';
135 
136  if (preg_match($pattern, $xml)) {
137  throw new Reader\Exception('Detected use of ENTITY in XML, spreadsheet file load() aborted to prevent XXE/XEE attacks');
138  }
139 
140  if ($this->callback !== null && is_callable($this->callback)) {
141  $xml = call_user_func($this->callback, $xml);
142  }
143 
144  return $xml;
145  }
146 
154  public function scanFile($filestream)
155  {
156  return $this->scan(file_get_contents($filestream));
157  }
158 }
$result
Reader for SpreadsheetML, the XML schema for Microsoft Office Excel 2003.
Definition: Xml.php:26
static getLibXmlDisableEntityLoader()
Return the state of the entity loader (disabled/enabled) for libxml loader.
Definition: Settings.php:147
static getInstance(Reader\IReader $reader)
Definition: XmlScanner.php:39
scan($xml)
Scan the XML for use of <!ENTITY to prevent XXE/XEE attacks.
Definition: XmlScanner.php:127
scanFile($filestream)
Scan theXML for use of <!ENTITY to prevent XXE/XEE attacks.
Definition: XmlScanner.php:154
PhpSpreadsheet root directory.
Definition: Html.php:22