ILIAS  Release_4_0_x_branch Revision 61816
 All Data Structures Namespaces Files Functions Variables Groups Pages
OLERead.php
Go to the documentation of this file.
1 <?php
28 define('IDENTIFIER_OLE', pack('CCCCCCCC', 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1));
29 
31  private $data = '';
32 
33  // OLE identifier
35 
36  // Size of a sector = 512 bytes
37  const BIG_BLOCK_SIZE = 0x200;
38 
39  // Size of a short sector = 64 bytes
40  const SMALL_BLOCK_SIZE = 0x40;
41 
42  // Size of a directory entry always = 128 bytes
44 
45  // Minimum size of a standard stream = 4096 bytes, streams smaller than this are stored as short streams
46  const SMALL_BLOCK_THRESHOLD = 0x1000;
47 
48  // header offsets
50  const ROOT_START_BLOCK_POS = 0x30;
52  const EXTENSION_BLOCK_POS = 0x44;
55 
56  // property storage offsets (directory offsets)
57  const SIZE_OF_NAME_POS = 0x40;
58  const TYPE_POS = 0x42;
59  const START_BLOCK_POS = 0x74;
60  const SIZE_POS = 0x78;
61 
68  public function read($sFileName)
69  {
70  // Check if file exists and is readable
71  if(!is_readable($sFileName)) {
72  throw new Exception("Could not open " . $sFileName . " for reading! File does not exist, or it is not readable.");
73  }
74 
75  // Get the file data
76  $this->data = file_get_contents($sFileName);
77 
78  // Check OLE identifier
79  if (substr($this->data, 0, 8) != self::IDENTIFIER_OLE) {
80  throw new Exception('The filename ' . $sFileName . ' is not recognised as an OLE file');
81  }
82 
83  // Total number of sectors used for the SAT
84  $this->numBigBlockDepotBlocks = $this->_GetInt4d($this->data, self::NUM_BIG_BLOCK_DEPOT_BLOCKS_POS);
85 
86  // SecID of the first sector of the directory stream
87  $this->rootStartBlock = $this->_GetInt4d($this->data, self::ROOT_START_BLOCK_POS);
88 
89  // SecID of the first sector of the SSAT (or -2 if not extant)
90  $this->sbdStartBlock = $this->_GetInt4d($this->data, self::SMALL_BLOCK_DEPOT_BLOCK_POS);
91 
92  // SecID of the first sector of the MSAT (or -2 if no additional sectors are used)
93  $this->extensionBlock = $this->_GetInt4d($this->data, self::EXTENSION_BLOCK_POS);
94 
95  // Total number of sectors used by MSAT
96  $this->numExtensionBlocks = $this->_GetInt4d($this->data, self::NUM_EXTENSION_BLOCK_POS);
97 
98  $bigBlockDepotBlocks = array();
100 
101  $bbdBlocks = $this->numBigBlockDepotBlocks;
102 
103  if ($this->numExtensionBlocks != 0) {
104  $bbdBlocks = (self::BIG_BLOCK_SIZE - self::BIG_BLOCK_DEPOT_BLOCKS_POS)/4;
105  }
106 
107  for ($i = 0; $i < $bbdBlocks; ++$i) {
108  $bigBlockDepotBlocks[$i] = $this->_GetInt4d($this->data, $pos);
109  $pos += 4;
110  }
111 
112  for ($j = 0; $j < $this->numExtensionBlocks; ++$j) {
113  $pos = ($this->extensionBlock + 1) * self::BIG_BLOCK_SIZE;
114  $blocksToRead = min($this->numBigBlockDepotBlocks - $bbdBlocks, self::BIG_BLOCK_SIZE / 4 - 1);
115 
116  for ($i = $bbdBlocks; $i < $bbdBlocks + $blocksToRead; ++$i) {
117  $bigBlockDepotBlocks[$i] = $this->_GetInt4d($this->data, $pos);
118  $pos += 4;
119  }
120 
121  $bbdBlocks += $blocksToRead;
122  if ($bbdBlocks < $this->numBigBlockDepotBlocks) {
123  $this->extensionBlock = $this->_GetInt4d($this->data, $pos);
124  }
125  }
126 
127  $pos = 0;
128  $index = 0;
129  $this->bigBlockChain = array();
130 
131  for ($i = 0; $i < $this->numBigBlockDepotBlocks; ++$i) {
132  $pos = ($bigBlockDepotBlocks[$i] + 1) * self::BIG_BLOCK_SIZE;
133 
134  for ($j = 0 ; $j < self::BIG_BLOCK_SIZE / 4; ++$j) {
135  $this->bigBlockChain[$index] = $this->_GetInt4d($this->data, $pos);
136  $pos += 4 ;
137  ++$index;
138  }
139  }
140 
141  $pos = 0;
142  $index = 0;
143  $sbdBlock = $this->sbdStartBlock;
144  $this->smallBlockChain = array();
145 
146  while ($sbdBlock != -2) {
147  $pos = ($sbdBlock + 1) * self::BIG_BLOCK_SIZE;
148 
149  for ($j = 0; $j < self::BIG_BLOCK_SIZE / 4; ++$j) {
150  $this->smallBlockChain[$index] = $this->_GetInt4d($this->data, $pos);
151  $pos += 4;
152  ++$index;
153  }
154 
155  $sbdBlock = $this->bigBlockChain[$sbdBlock];
156  }
157 
158  $block = $this->rootStartBlock;
159  $pos = 0;
160 
161  // read the directory stream
162  $this->entry = $this->_readData($block);
163 
164  $this->_readPropertySets();
165 
166  }
167 
173  public function getWorkBook()
174  {
175  if ($this->props[$this->wrkbook]['size'] < self::SMALL_BLOCK_THRESHOLD){
176  $rootdata = $this->_readData($this->props[$this->rootentry]['startBlock']);
177 
178  $streamData = '';
179  $block = $this->props[$this->wrkbook]['startBlock'];
180 
181  $pos = 0;
182  while ($block != -2) {
183  $pos = $block * self::SMALL_BLOCK_SIZE;
184  $streamData .= substr($rootdata, $pos, self::SMALL_BLOCK_SIZE);
185 
186  $block = $this->smallBlockChain[$block];
187  }
188 
189  return $streamData;
190 
191 
192  } else {
193  $numBlocks = $this->props[$this->wrkbook]['size'] / self::BIG_BLOCK_SIZE;
194  if ($this->props[$this->wrkbook]['size'] % self::BIG_BLOCK_SIZE != 0) {
195  ++$numBlocks;
196  }
197 
198  if ($numBlocks == 0) return '';
199 
200 
201  $streamData = '';
202  $block = $this->props[$this->wrkbook]['startBlock'];
203 
204  $pos = 0;
205 
206  while ($block != -2) {
207  $pos = ($block + 1) * self::BIG_BLOCK_SIZE;
208  $streamData .= substr($this->data, $pos, self::BIG_BLOCK_SIZE);
209  $block = $this->bigBlockChain[$block];
210  }
211 
212  return $streamData;
213  }
214  }
215 
222  private function _readData($bl)
223  {
224  $block = $bl;
225  $pos = 0;
226  $data = '';
227 
228  while ($block != -2) {
229  $pos = ($block + 1) * self::BIG_BLOCK_SIZE;
230  $data = $data . substr($this->data, $pos, self::BIG_BLOCK_SIZE);
231  $block = $this->bigBlockChain[$block];
232  }
233  return $data;
234  }
235 
239  private function _readPropertySets()
240  {
241  $offset = 0;
242 
243  // loop through entires, each entry is 128 bytes
244  while ($offset < strlen($this->entry)) {
245  // entry data (128 bytes)
246  $d = substr($this->entry, $offset, self::PROPERTY_STORAGE_BLOCK_SIZE);
247 
248  // size in bytes of name
249  $nameSize = ord($d[self::SIZE_OF_NAME_POS]) | (ord($d[self::SIZE_OF_NAME_POS+1]) << 8);
250 
251  // type of entry
252  $type = ord($d[self::TYPE_POS]);
253 
254  // sectorID of first sector or short sector, if this entry refers to a stream (the case with workbook)
255  // sectorID of first sector of the short-stream container stream, if this entry is root entry
256  $startBlock = $this->_GetInt4d($d, self::START_BLOCK_POS);
257 
258  $size = $this->_GetInt4d($d, self::SIZE_POS);
259 
260  $name = '';
261  for ($i = 0; $i < $nameSize ; ++$i) {
262  $name .= $d[$i];
263  }
264 
265  $name = str_replace("\x00", "", $name);
266 
267  $this->props[] = array (
268  'name' => $name,
269  'type' => $type,
270  'startBlock' => $startBlock,
271  'size' => $size);
272 
273  // Workbook directory entry (BIFF5 uses Book, BIFF8 uses Workbook)
274  if (($name == 'Workbook') || ($name == 'Book') || ($name == 'WORKBOOK')) {
275  $this->wrkbook = count($this->props) - 1;
276  }
277 
278  // Root entry
279  if ($name == 'Root Entry' || $name == 'ROOT ENTRY' || $name == 'R') {
280  $this->rootentry = count($this->props) - 1;
281  }
282 
284  }
285 
286  }
287 
295  private function _GetInt4d($data, $pos)
296  {
297  // Hacked by Andreas Rehm 2006 to ensure correct result of the <<24 block on 32 and 64bit systems
298  $_or_24 = ord($data[$pos+3]);
299  if ($_or_24>=128) $_ord_24 = -abs((256-$_or_24) << 24);
300  else $_ord_24 = ($_or_24&127) << 24;
301 
302  return ord($data[$pos]) | (ord($data[$pos+1]) << 8) | (ord($data[$pos+2]) << 16) | $_ord_24;
303  }
304 
305 }