ILIAS  release_5-2 Revision v5.2.25-18-g3f80b828510
tcpdf_parser.php
Go to the documentation of this file.
1 <?php
2 //============================================================+
3 // File name : tcpdf_parser.php
4 // Version : 1.0.16
5 // Begin : 2011-05-23
6 // Last Update : 2015-04-28
7 // Author : Nicola Asuni - Tecnick.com LTD - www.tecnick.com - info@tecnick.com
8 // License : http://www.tecnick.com/pagefiles/tcpdf/LICENSE.TXT GNU-LGPLv3
9 // -------------------------------------------------------------------
10 // Copyright (C) 2011-2015 Nicola Asuni - Tecnick.com LTD
11 //
12 // This file is part of TCPDF software library.
13 //
14 // TCPDF is free software: you can redistribute it and/or modify it
15 // under the terms of the GNU Lesser General Public License as
16 // published by the Free Software Foundation, either version 3 of the
17 // License, or (at your option) any later version.
18 //
19 // TCPDF is distributed in the hope that it will be useful, but
20 // WITHOUT ANY WARRANTY; without even the implied warranty of
21 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
22 // See the GNU Lesser General Public License for more details.
23 //
24 // You should have received a copy of the License
25 // along with TCPDF. If not, see
26 // <http://www.tecnick.com/pagefiles/tcpdf/LICENSE.TXT>.
27 //
28 // See LICENSE.TXT file for more information.
29 // -------------------------------------------------------------------
30 //
31 // Description : This is a PHP class for parsing PDF documents.
32 //
33 //============================================================+
34 
43 // include class for decoding filters
44 require_once(dirname(__FILE__).'/include/tcpdf_filters.php');
45 
54 class TCPDF_PARSER {
55 
60  private $pdfdata = '';
61 
66  protected $xref = array();
67 
72  protected $objects = array();
73 
78  private $FilterDecoders;
79 
84  private $cfg = array(
85  'die_for_errors' => false,
86  'ignore_filter_decoding_errors' => true,
87  'ignore_missing_filter_decoders' => true,
88  );
89 
90 // -----------------------------------------------------------------------------
91 
102  public function __construct($data, $cfg=array()) {
103  if (empty($data)) {
104  $this->Error('Empty PDF data.');
105  }
106  // find the pdf header starting position
107  if (($trimpos = strpos($data, '%PDF-')) === FALSE) {
108  $this->Error('Invalid PDF data: missing %PDF header.');
109  }
110  // get PDF content string
111  $this->pdfdata = substr($data, $trimpos);
112  // get length
113  $pdflen = strlen($this->pdfdata);
114  // set configuration parameters
115  $this->setConfig($cfg);
116  // get xref and trailer data
117  $this->xref = $this->getXrefData();
118  // parse all document objects
119  $this->objects = array();
120  foreach ($this->xref['xref'] as $obj => $offset) {
121  if (!isset($this->objects[$obj]) AND ($offset > 0)) {
122  // decode objects with positive offset
123  $this->objects[$obj] = $this->getIndirectObject($obj, $offset, true);
124  }
125  }
126  // release some memory
127  unset($this->pdfdata);
128  $this->pdfdata = '';
129  }
130 
139  protected function setConfig($cfg) {
140  if (isset($cfg['die_for_errors'])) {
141  $this->cfg['die_for_errors'] = !!$cfg['die_for_errors'];
142  }
143  if (isset($cfg['ignore_filter_decoding_errors'])) {
144  $this->cfg['ignore_filter_decoding_errors'] = !!$cfg['ignore_filter_decoding_errors'];
145  }
146  if (isset($cfg['ignore_missing_filter_decoders'])) {
147  $this->cfg['ignore_missing_filter_decoders'] = !!$cfg['ignore_missing_filter_decoders'];
148  }
149  }
150 
157  public function getParsedData() {
158  return array($this->xref, $this->objects);
159  }
160 
169  protected function getXrefData($offset=0, $xref=array()) {
170  if ($offset == 0) {
171  // find last startxref
172  if (preg_match_all('/[\r\n]startxref[\s]*[\r\n]+([0-9]+)[\s]*[\r\n]+%%EOF/i', $this->pdfdata, $matches, PREG_SET_ORDER, $offset) == 0) {
173  $this->Error('Unable to find startxref');
174  }
175  $matches = array_pop($matches);
176  $startxref = $matches[1];
177  } elseif (strpos($this->pdfdata, 'xref', $offset) == $offset) {
178  // Already pointing at the xref table
179  $startxref = $offset;
180  } elseif (preg_match('/([0-9]+[\s][0-9]+[\s]obj)/i', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset)) {
181  // Cross-Reference Stream object
182  $startxref = $offset;
183  } elseif (preg_match('/[\r\n]startxref[\s]*[\r\n]+([0-9]+)[\s]*[\r\n]+%%EOF/i', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset)) {
184  // startxref found
185  $startxref = $matches[1][0];
186  } else {
187  $this->Error('Unable to find startxref');
188  }
189  // check xref position
190  if (strpos($this->pdfdata, 'xref', $startxref) == $startxref) {
191  // Cross-Reference
192  $xref = $this->decodeXref($startxref, $xref);
193  } else {
194  // Cross-Reference Stream
195  $xref = $this->decodeXrefStream($startxref, $xref);
196  }
197  if (empty($xref)) {
198  $this->Error('Unable to find xref');
199  }
200  return $xref;
201  }
202 
211  protected function decodeXref($startxref, $xref=array()) {
212  $startxref += 4; // 4 is the length of the word 'xref'
213  // skip initial white space chars: \x00 null (NUL), \x09 horizontal tab (HT), \x0A line feed (LF), \x0C form feed (FF), \x0D carriage return (CR), \x20 space (SP)
214  $offset = $startxref + strspn($this->pdfdata, "\x00\x09\x0a\x0c\x0d\x20", $startxref);
215  // initialize object number
216  $obj_num = 0;
217  // search for cross-reference entries or subsection
218  while (preg_match('/([0-9]+)[\x20]([0-9]+)[\x20]?([nf]?)(\r\n|[\x20]?[\r\n])/', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset) > 0) {
219  if ($matches[0][1] != $offset) {
220  // we are on another section
221  break;
222  }
223  $offset += strlen($matches[0][0]);
224  if ($matches[3][0] == 'n') {
225  // create unique object index: [object number]_[generation number]
226  $index = $obj_num.'_'.intval($matches[2][0]);
227  // check if object already exist
228  if (!isset($xref['xref'][$index])) {
229  // store object offset position
230  $xref['xref'][$index] = intval($matches[1][0]);
231  }
232  ++$obj_num;
233  } elseif ($matches[3][0] == 'f') {
234  ++$obj_num;
235  } else {
236  // object number (index)
237  $obj_num = intval($matches[1][0]);
238  }
239  }
240  // get trailer data
241  if (preg_match('/trailer[\s]*<<(.*)>>/isU', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset) > 0) {
242  $trailer_data = $matches[1][0];
243  if (!isset($xref['trailer']) OR empty($xref['trailer'])) {
244  // get only the last updated version
245  $xref['trailer'] = array();
246  // parse trailer_data
247  if (preg_match('/Size[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
248  $xref['trailer']['size'] = intval($matches[1]);
249  }
250  if (preg_match('/Root[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
251  $xref['trailer']['root'] = intval($matches[1]).'_'.intval($matches[2]);
252  }
253  if (preg_match('/Encrypt[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
254  $xref['trailer']['encrypt'] = intval($matches[1]).'_'.intval($matches[2]);
255  }
256  if (preg_match('/Info[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
257  $xref['trailer']['info'] = intval($matches[1]).'_'.intval($matches[2]);
258  }
259  if (preg_match('/ID[\s]*[\[][\s]*[<]([^>]*)[>][\s]*[<]([^>]*)[>]/i', $trailer_data, $matches) > 0) {
260  $xref['trailer']['id'] = array();
261  $xref['trailer']['id'][0] = $matches[1];
262  $xref['trailer']['id'][1] = $matches[2];
263  }
264  }
265  if (preg_match('/Prev[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
266  // get previous xref
267  $xref = $this->getXrefData(intval($matches[1]), $xref);
268  }
269  } else {
270  $this->Error('Unable to find trailer');
271  }
272  return $xref;
273  }
274 
283  protected function decodeXrefStream($startxref, $xref=array()) {
284  // try to read Cross-Reference Stream
285  $xrefobj = $this->getRawObject($startxref);
286  $xrefcrs = $this->getIndirectObject($xrefobj[1], $startxref, true);
287  if (!isset($xref['trailer']) OR empty($xref['trailer'])) {
288  // get only the last updated version
289  $xref['trailer'] = array();
290  $filltrailer = true;
291  } else {
292  $filltrailer = false;
293  }
294  if (!isset($xref['xref'])) {
295  $xref['xref'] = array();
296  }
297  $valid_crs = false;
298  $columns = 0;
299  $sarr = $xrefcrs[0][1];
300  if (!is_array($sarr)) {
301  $sarr = array();
302  }
303  foreach ($sarr as $k => $v) {
304  if (($v[0] == '/') AND ($v[1] == 'Type') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == '/') AND ($sarr[($k +1)][1] == 'XRef'))) {
305  $valid_crs = true;
306  } elseif (($v[0] == '/') AND ($v[1] == 'Index') AND (isset($sarr[($k +1)]))) {
307  // first object number in the subsection
308  $index_first = intval($sarr[($k +1)][1][0][1]);
309  // number of entries in the subsection
310  $index_entries = intval($sarr[($k +1)][1][1][1]);
311  } elseif (($v[0] == '/') AND ($v[1] == 'Prev') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == 'numeric'))) {
312  // get previous xref offset
313  $prevxref = intval($sarr[($k +1)][1]);
314  } elseif (($v[0] == '/') AND ($v[1] == 'W') AND (isset($sarr[($k +1)]))) {
315  // number of bytes (in the decoded stream) of the corresponding field
316  $wb = array();
317  $wb[0] = intval($sarr[($k +1)][1][0][1]);
318  $wb[1] = intval($sarr[($k +1)][1][1][1]);
319  $wb[2] = intval($sarr[($k +1)][1][2][1]);
320  } elseif (($v[0] == '/') AND ($v[1] == 'DecodeParms') AND (isset($sarr[($k +1)][1]))) {
321  $decpar = $sarr[($k +1)][1];
322  foreach ($decpar as $kdc => $vdc) {
323  if (($vdc[0] == '/') AND ($vdc[1] == 'Columns') AND (isset($decpar[($kdc +1)]) AND ($decpar[($kdc +1)][0] == 'numeric'))) {
324  $columns = intval($decpar[($kdc +1)][1]);
325  } elseif (($vdc[0] == '/') AND ($vdc[1] == 'Predictor') AND (isset($decpar[($kdc +1)]) AND ($decpar[($kdc +1)][0] == 'numeric'))) {
326  $predictor = intval($decpar[($kdc +1)][1]);
327  }
328  }
329  } elseif ($filltrailer) {
330  if (($v[0] == '/') AND ($v[1] == 'Size') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == 'numeric'))) {
331  $xref['trailer']['size'] = $sarr[($k +1)][1];
332  } elseif (($v[0] == '/') AND ($v[1] == 'Root') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == 'objref'))) {
333  $xref['trailer']['root'] = $sarr[($k +1)][1];
334  } elseif (($v[0] == '/') AND ($v[1] == 'Info') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == 'objref'))) {
335  $xref['trailer']['info'] = $sarr[($k +1)][1];
336  } elseif (($v[0] == '/') AND ($v[1] == 'Encrypt') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == 'objref'))) {
337  $xref['trailer']['encrypt'] = $sarr[($k +1)][1];
338  } elseif (($v[0] == '/') AND ($v[1] == 'ID') AND (isset($sarr[($k +1)]))) {
339  $xref['trailer']['id'] = array();
340  $xref['trailer']['id'][0] = $sarr[($k +1)][1][0][1];
341  $xref['trailer']['id'][1] = $sarr[($k +1)][1][1][1];
342  }
343  }
344  }
345  // decode data
346  if ($valid_crs AND isset($xrefcrs[1][3][0])) {
347  // number of bytes in a row
348  $rowlen = ($columns + 1);
349  // convert the stream into an array of integers
350  $sdata = unpack('C*', $xrefcrs[1][3][0]);
351  // split the rows
352  $sdata = array_chunk($sdata, $rowlen);
353  // initialize decoded array
354  $ddata = array();
355  // initialize first row with zeros
356  $prev_row = array_fill (0, $rowlen, 0);
357  // for each row apply PNG unpredictor
358  foreach ($sdata as $k => $row) {
359  // initialize new row
360  $ddata[$k] = array();
361  // get PNG predictor value
362  $predictor = (10 + $row[0]);
363  // for each byte on the row
364  for ($i=1; $i<=$columns; ++$i) {
365  // new index
366  $j = ($i - 1);
367  $row_up = $prev_row[$j];
368  if ($i == 1) {
369  $row_left = 0;
370  $row_upleft = 0;
371  } else {
372  $row_left = $row[($i - 1)];
373  $row_upleft = $prev_row[($j - 1)];
374  }
375  switch ($predictor) {
376  case 10: { // PNG prediction (on encoding, PNG None on all rows)
377  $ddata[$k][$j] = $row[$i];
378  break;
379  }
380  case 11: { // PNG prediction (on encoding, PNG Sub on all rows)
381  $ddata[$k][$j] = (($row[$i] + $row_left) & 0xff);
382  break;
383  }
384  case 12: { // PNG prediction (on encoding, PNG Up on all rows)
385  $ddata[$k][$j] = (($row[$i] + $row_up) & 0xff);
386  break;
387  }
388  case 13: { // PNG prediction (on encoding, PNG Average on all rows)
389  $ddata[$k][$j] = (($row[$i] + (($row_left + $row_up) / 2)) & 0xff);
390  break;
391  }
392  case 14: { // PNG prediction (on encoding, PNG Paeth on all rows)
393  // initial estimate
394  $p = ($row_left + $row_up - $row_upleft);
395  // distances
396  $pa = abs($p - $row_left);
397  $pb = abs($p - $row_up);
398  $pc = abs($p - $row_upleft);
399  $pmin = min($pa, $pb, $pc);
400  // return minimum distance
401  switch ($pmin) {
402  case $pa: {
403  $ddata[$k][$j] = (($row[$i] + $row_left) & 0xff);
404  break;
405  }
406  case $pb: {
407  $ddata[$k][$j] = (($row[$i] + $row_up) & 0xff);
408  break;
409  }
410  case $pc: {
411  $ddata[$k][$j] = (($row[$i] + $row_upleft) & 0xff);
412  break;
413  }
414  }
415  break;
416  }
417  default: { // PNG prediction (on encoding, PNG optimum)
418  $this->Error('Unknown PNG predictor');
419  break;
420  }
421  }
422  }
423  $prev_row = $ddata[$k];
424  } // end for each row
425  // complete decoding
426  $sdata = array();
427  // for every row
428  foreach ($ddata as $k => $row) {
429  // initialize new row
430  $sdata[$k] = array(0, 0, 0);
431  if ($wb[0] == 0) {
432  // default type field
433  $sdata[$k][0] = 1;
434  }
435  $i = 0; // count bytes in the row
436  // for every column
437  for ($c = 0; $c < 3; ++$c) {
438  // for every byte on the column
439  for ($b = 0; $b < $wb[$c]; ++$b) {
440  if (isset($row[$i])) {
441  $sdata[$k][$c] += ($row[$i] << (($wb[$c] - 1 - $b) * 8));
442  }
443  ++$i;
444  }
445  }
446  }
447  $ddata = array();
448  // fill xref
449  if (isset($index_first)) {
450  $obj_num = $index_first;
451  } else {
452  $obj_num = 0;
453  }
454  foreach ($sdata as $k => $row) {
455  switch ($row[0]) {
456  case 0: { // (f) linked list of free objects
457  break;
458  }
459  case 1: { // (n) objects that are in use but are not compressed
460  // create unique object index: [object number]_[generation number]
461  $index = $obj_num.'_'.$row[2];
462  // check if object already exist
463  if (!isset($xref['xref'][$index])) {
464  // store object offset position
465  $xref['xref'][$index] = $row[1];
466  }
467  break;
468  }
469  case 2: { // compressed objects
470  // $row[1] = object number of the object stream in which this object is stored
471  // $row[2] = index of this object within the object stream
472  $index = $row[1].'_0_'.$row[2];
473  $xref['xref'][$index] = -1;
474  break;
475  }
476  default: { // null objects
477  break;
478  }
479  }
480  ++$obj_num;
481  }
482  } // end decoding data
483  if (isset($prevxref)) {
484  // get previous xref
485  $xref = $this->getXrefData($prevxref, $xref);
486  }
487  return $xref;
488  }
489 
497  protected function getRawObject($offset=0) {
498  $objtype = ''; // object type to be returned
499  $objval = ''; // object value to be returned
500  // skip initial white space chars: \x00 null (NUL), \x09 horizontal tab (HT), \x0A line feed (LF), \x0C form feed (FF), \x0D carriage return (CR), \x20 space (SP)
501  $offset += strspn($this->pdfdata, "\x00\x09\x0a\x0c\x0d\x20", $offset);
502  // get first char
503  $char = $this->pdfdata[$offset];
504  // get object type
505  switch ($char) {
506  case '%': { // \x25 PERCENT SIGN
507  // skip comment and search for next token
508  $next = strcspn($this->pdfdata, "\r\n", $offset);
509  if ($next > 0) {
510  $offset += $next;
511  return $this->getRawObject($offset);
512  }
513  break;
514  }
515  case '/': { // \x2F SOLIDUS
516  // name object
517  $objtype = $char;
518  ++$offset;
519  if (preg_match('/^([^\x00\x09\x0a\x0c\x0d\x20\s\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25]+)/', substr($this->pdfdata, $offset, 256), $matches) == 1) {
520  $objval = $matches[1]; // unescaped value
521  $offset += strlen($objval);
522  }
523  break;
524  }
525  case '(': // \x28 LEFT PARENTHESIS
526  case ')': { // \x29 RIGHT PARENTHESIS
527  // literal string object
528  $objtype = $char;
529  ++$offset;
530  $strpos = $offset;
531  if ($char == '(') {
532  $open_bracket = 1;
533  while ($open_bracket > 0) {
534  if (!isset($this->pdfdata{$strpos})) {
535  break;
536  }
537  $ch = $this->pdfdata{$strpos};
538  switch ($ch) {
539  case '\\': { // REVERSE SOLIDUS (5Ch) (Backslash)
540  // skip next character
541  ++$strpos;
542  break;
543  }
544  case '(': { // LEFT PARENHESIS (28h)
545  ++$open_bracket;
546  break;
547  }
548  case ')': { // RIGHT PARENTHESIS (29h)
549  --$open_bracket;
550  break;
551  }
552  }
553  ++$strpos;
554  }
555  $objval = substr($this->pdfdata, $offset, ($strpos - $offset - 1));
556  $offset = $strpos;
557  }
558  break;
559  }
560  case '[': // \x5B LEFT SQUARE BRACKET
561  case ']': { // \x5D RIGHT SQUARE BRACKET
562  // array object
563  $objtype = $char;
564  ++$offset;
565  if ($char == '[') {
566  // get array content
567  $objval = array();
568  do {
569  // get element
570  $element = $this->getRawObject($offset);
571  $offset = $element[2];
572  $objval[] = $element;
573  } while ($element[0] != ']');
574  // remove closing delimiter
575  array_pop($objval);
576  }
577  break;
578  }
579  case '<': // \x3C LESS-THAN SIGN
580  case '>': { // \x3E GREATER-THAN SIGN
581  if (isset($this->pdfdata{($offset + 1)}) AND ($this->pdfdata{($offset + 1)} == $char)) {
582  // dictionary object
583  $objtype = $char.$char;
584  $offset += 2;
585  if ($char == '<') {
586  // get array content
587  $objval = array();
588  do {
589  // get element
590  $element = $this->getRawObject($offset);
591  $offset = $element[2];
592  $objval[] = $element;
593  } while ($element[0] != '>>');
594  // remove closing delimiter
595  array_pop($objval);
596  }
597  } else {
598  // hexadecimal string object
599  $objtype = $char;
600  ++$offset;
601  if (($char == '<') AND (preg_match('/^([0-9A-Fa-f\x09\x0a\x0c\x0d\x20]+)>/iU', substr($this->pdfdata, $offset), $matches) == 1)) {
602  // remove white space characters
603  $objval = strtr($matches[1], "\x09\x0a\x0c\x0d\x20", '');
604  $offset += strlen($matches[0]);
605  } elseif (($endpos = strpos($this->pdfdata, '>', $offset)) !== FALSE) {
606  $offset = $endpos + 1;
607  }
608  }
609  break;
610  }
611  default: {
612  if (substr($this->pdfdata, $offset, 6) == 'endobj') {
613  // indirect object
614  $objtype = 'endobj';
615  $offset += 6;
616  } elseif (substr($this->pdfdata, $offset, 4) == 'null') {
617  // null object
618  $objtype = 'null';
619  $offset += 4;
620  $objval = 'null';
621  } elseif (substr($this->pdfdata, $offset, 4) == 'true') {
622  // boolean true object
623  $objtype = 'boolean';
624  $offset += 4;
625  $objval = 'true';
626  } elseif (substr($this->pdfdata, $offset, 5) == 'false') {
627  // boolean false object
628  $objtype = 'boolean';
629  $offset += 5;
630  $objval = 'false';
631  } elseif (substr($this->pdfdata, $offset, 6) == 'stream') {
632  // start stream object
633  $objtype = 'stream';
634  $offset += 6;
635  if (preg_match('/^([\r]?[\n])/isU', substr($this->pdfdata, $offset), $matches) == 1) {
636  $offset += strlen($matches[0]);
637  if (preg_match('/(endstream)[\x09\x0a\x0c\x0d\x20]/isU', substr($this->pdfdata, $offset), $matches, PREG_OFFSET_CAPTURE) == 1) {
638  $objval = substr($this->pdfdata, $offset, $matches[0][1]);
639  $offset += $matches[1][1];
640  }
641  }
642  } elseif (substr($this->pdfdata, $offset, 9) == 'endstream') {
643  // end stream object
644  $objtype = 'endstream';
645  $offset += 9;
646  } elseif (preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+R/iU', substr($this->pdfdata, $offset, 33), $matches) == 1) {
647  // indirect object reference
648  $objtype = 'objref';
649  $offset += strlen($matches[0]);
650  $objval = intval($matches[1]).'_'.intval($matches[2]);
651  } elseif (preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+obj/iU', substr($this->pdfdata, $offset, 33), $matches) == 1) {
652  // object start
653  $objtype = 'obj';
654  $objval = intval($matches[1]).'_'.intval($matches[2]);
655  $offset += strlen ($matches[0]);
656  } elseif (($numlen = strspn($this->pdfdata, '+-.0123456789', $offset)) > 0) {
657  // numeric object
658  $objtype = 'numeric';
659  $objval = substr($this->pdfdata, $offset, $numlen);
660  $offset += $numlen;
661  }
662  break;
663  }
664  }
665  return array($objtype, $objval, $offset);
666  }
667 
677  protected function getIndirectObject($obj_ref, $offset=0, $decoding=true) {
678  $obj = explode('_', $obj_ref);
679  if (($obj === false) OR (count($obj) != 2)) {
680  $this->Error('Invalid object reference: '.$obj);
681  return;
682  }
683  $objref = $obj[0].' '.$obj[1].' obj';
684  // ignore leading zeros
685  $offset += strspn($this->pdfdata, '0', $offset);
686  if (strpos($this->pdfdata, $objref, $offset) != $offset) {
687  // an indirect reference to an undefined object shall be considered a reference to the null object
688  return array('null', 'null', $offset);
689  }
690  // starting position of object content
691  $offset += strlen($objref);
692  // get array of object content
693  $objdata = array();
694  $i = 0; // object main index
695  do {
696  $oldoffset = $offset;
697  // get element
698  $element = $this->getRawObject($offset);
699  $offset = $element[2];
700  // decode stream using stream's dictionary information
701  if ($decoding AND ($element[0] == 'stream') AND (isset($objdata[($i - 1)][0])) AND ($objdata[($i - 1)][0] == '<<')) {
702  $element[3] = $this->decodeStream($objdata[($i - 1)][1], $element[1]);
703  }
704  $objdata[$i] = $element;
705  ++$i;
706  } while (($element[0] != 'endobj') AND ($offset != $oldoffset));
707  // remove closing delimiter
708  array_pop($objdata);
709  // return raw object content
710  return $objdata;
711  }
712 
720  protected function getObjectVal($obj) {
721  if ($obj[0] == 'objref') {
722  // reference to indirect object
723  if (isset($this->objects[$obj[1]])) {
724  // this object has been already parsed
725  return $this->objects[$obj[1]];
726  } elseif (isset($this->xref[$obj[1]])) {
727  // parse new object
728  $this->objects[$obj[1]] = $this->getIndirectObject($obj[1], $this->xref[$obj[1]], false);
729  return $this->objects[$obj[1]];
730  }
731  }
732  return $obj;
733  }
734 
743  protected function decodeStream($sdic, $stream) {
744  // get stream length and filters
745  $slength = strlen($stream);
746  if ($slength <= 0) {
747  return array('', array());
748  }
749  $filters = array();
750  foreach ($sdic as $k => $v) {
751  if ($v[0] == '/') {
752  if (($v[1] == 'Length') AND (isset($sdic[($k + 1)])) AND ($sdic[($k + 1)][0] == 'numeric')) {
753  // get declared stream length
754  $declength = intval($sdic[($k + 1)][1]);
755  if ($declength < $slength) {
756  $stream = substr($stream, 0, $declength);
757  $slength = $declength;
758  }
759  } elseif (($v[1] == 'Filter') AND (isset($sdic[($k + 1)]))) {
760  // resolve indirect object
761  $objval = $this->getObjectVal($sdic[($k + 1)]);
762  if ($objval[0] == '/') {
763  // single filter
764  $filters[] = $objval[1];
765  } elseif ($objval[0] == '[') {
766  // array of filters
767  foreach ($objval[1] as $flt) {
768  if ($flt[0] == '/') {
769  $filters[] = $flt[1];
770  }
771  }
772  }
773  }
774  }
775  }
776  // decode the stream
777  $remaining_filters = array();
778  foreach ($filters as $filter) {
779  if (in_array($filter, TCPDF_FILTERS::getAvailableFilters())) {
780  try {
781  $stream = TCPDF_FILTERS::decodeFilter($filter, $stream);
782  } catch (Exception $e) {
783  $emsg = $e->getMessage();
784  if ((($emsg[0] == '~') AND !$this->cfg['ignore_missing_filter_decoders'])
785  OR (($emsg[0] != '~') AND !$this->cfg['ignore_filter_decoding_errors'])) {
786  $this->Error($e->getMessage());
787  }
788  }
789  } else {
790  // add missing filter to array
791  $remaining_filters[] = $filter;
792  }
793  }
794  return array($stream, $remaining_filters);
795  }
796 
803  public function Error($msg) {
804  if ($this->cfg['die_for_errors']) {
805  die('<strong>TCPDF_PARSER ERROR: </strong>'.$msg);
806  } else {
807  throw new Exception('TCPDF_PARSER ERROR: '.$msg);
808  }
809  }
810 
811 } // END OF TCPDF_PARSER CLASS
812 
813 //============================================================+
814 // END OF FILE
815 //============================================================+
$pdfdata
Raw content of the PDF document.
static decodeFilter($filter, $data)
Decode data using the specified filter type.
static getAvailableFilters()
Get a list of available decoding filters.
decodeXrefStream($startxref, $xref=array())
Decode the Cross-Reference Stream section.
$objects
Array of PDF objects.
__construct($data, $cfg=array())
Parse a PDF document an return an array of objects.
getXrefData($offset=0, $xref=array())
Get Cross-Reference (xref) table and trailer data from PDF document data.
decodeStream($sdic, $stream)
Decode the specified stream.
getRawObject($offset=0)
Get object type, raw value and offset to next object.
setConfig($cfg)
Set the configuration parameters.
$cfg
Array of configuration parameters.
Create styles array
The data for the language used.
getObjectVal($obj)
Get the content of object, resolving indect object reference if necessary.
$xref
XREF data.
getParsedData()
Return an array of parsed PDF document objects.
Error($msg)
Throw an exception or print an error message and die if the K_TCPDF_PARSER_THROW_EXCEPTION_ERROR cons...
$FilterDecoders
Class object for decoding filters.
if(! $in) $columns
Definition: Utf8Test.php:45
This is a PHP class for parsing PDF documents.
decodeXref($startxref, $xref=array())
Decode the Cross-Reference section.
getIndirectObject($obj_ref, $offset=0, $decoding=true)
Get content of indirect object.