44 require_once(dirname(__FILE__).
'/include/tcpdf_filters.php');
85 'die_for_errors' =>
false,
86 'ignore_filter_decoding_errors' =>
true,
87 'ignore_missing_filter_decoders' =>
true,
104 $this->
Error(
'Empty PDF data.');
107 if (($trimpos = strpos(
$data,
'%PDF-')) === FALSE) {
108 $this->
Error(
'Invalid PDF data: missing %PDF header.');
111 $this->pdfdata = substr(
$data, $trimpos);
113 $pdflen = strlen($this->pdfdata);
119 $this->objects =
array();
120 foreach ($this->xref[
'xref'] as $obj => $offset) {
121 if (!isset($this->objects[$obj]) AND ($offset > 0)) {
127 unset($this->pdfdata);
140 if (isset(
$cfg[
'die_for_errors'])) {
141 $this->cfg[
'die_for_errors'] = !!
$cfg[
'die_for_errors'];
143 if (isset(
$cfg[
'ignore_filter_decoding_errors'])) {
144 $this->cfg[
'ignore_filter_decoding_errors'] = !!
$cfg[
'ignore_filter_decoding_errors'];
146 if (isset(
$cfg[
'ignore_missing_filter_decoders'])) {
147 $this->cfg[
'ignore_missing_filter_decoders'] = !!
$cfg[
'ignore_missing_filter_decoders'];
158 return array($this->xref, $this->objects);
172 if (preg_match_all(
'/[\r\n]startxref[\s]*[\r\n]+([0-9]+)[\s]*[\r\n]+%%EOF/i', $this->pdfdata, $matches, PREG_SET_ORDER, $offset) == 0) {
173 $this->
Error(
'Unable to find startxref');
175 $matches = array_pop($matches);
176 $startxref = $matches[1];
177 } elseif (strpos($this->pdfdata,
'xref', $offset) == $offset) {
179 $startxref = $offset;
180 } elseif (preg_match(
'/([0-9]+[\s][0-9]+[\s]obj)/i', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset)) {
182 $startxref = $offset;
183 } elseif (preg_match(
'/[\r\n]startxref[\s]*[\r\n]+([0-9]+)[\s]*[\r\n]+%%EOF/i', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset)) {
185 $startxref = $matches[1][0];
187 $this->
Error(
'Unable to find startxref');
190 if (strpos($this->pdfdata,
'xref', $startxref) == $startxref) {
198 $this->
Error(
'Unable to find xref');
214 $offset = $startxref + strspn($this->pdfdata,
"\x00\x09\x0a\x0c\x0d\x20", $startxref);
218 while (preg_match(
'/([0-9]+)[\x20]([0-9]+)[\x20]?([nf]?)(\r\n|[\x20]?[\r\n])/', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset) > 0) {
219 if ($matches[0][1] != $offset) {
223 $offset += strlen($matches[0][0]);
224 if ($matches[3][0] ==
'n') {
226 $index = $obj_num.
'_'.intval($matches[2][0]);
228 if (!isset(
$xref[
'xref'][$index])) {
230 $xref[
'xref'][$index] = intval($matches[1][0]);
233 } elseif ($matches[3][0] ==
'f') {
237 $obj_num = intval($matches[1][0]);
241 if (preg_match(
'/trailer[\s]*<<(.*)>>/isU', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset) > 0) {
242 $trailer_data = $matches[1][0];
243 if (!isset(
$xref[
'trailer']) OR empty(
$xref[
'trailer'])) {
247 if (preg_match(
'/Size[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
248 $xref[
'trailer'][
'size'] = intval($matches[1]);
250 if (preg_match(
'/Root[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
251 $xref[
'trailer'][
'root'] = intval($matches[1]).
'_'.intval($matches[2]);
253 if (preg_match(
'/Encrypt[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
254 $xref[
'trailer'][
'encrypt'] = intval($matches[1]).
'_'.intval($matches[2]);
256 if (preg_match(
'/Info[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
257 $xref[
'trailer'][
'info'] = intval($matches[1]).
'_'.intval($matches[2]);
259 if (preg_match(
'/ID[\s]*[\[][\s]*[<]([^>]*)[>][\s]*[<]([^>]*)[>]/i', $trailer_data, $matches) > 0) {
261 $xref[
'trailer'][
'id'][0] = $matches[1];
262 $xref[
'trailer'][
'id'][1] = $matches[2];
265 if (preg_match(
'/Prev[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
270 $this->
Error(
'Unable to find trailer');
287 if (!isset(
$xref[
'trailer']) OR empty(
$xref[
'trailer'])) {
292 $filltrailer =
false;
294 if (!isset(
$xref[
'xref'])) {
299 $sarr = $xrefcrs[0][1];
300 if (!is_array($sarr)) {
303 foreach ($sarr as $k => $v) {
304 if (($v[0] ==
'/') AND ($v[1] ==
'Type') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] ==
'/') AND ($sarr[($k +1)][1] ==
'XRef'))) {
306 } elseif (($v[0] ==
'/') AND ($v[1] ==
'Index') AND (isset($sarr[($k +1)]))) {
308 $index_first = intval($sarr[($k +1)][1][0][1]);
310 $index_entries = intval($sarr[($k +1)][1][1][1]);
311 } elseif (($v[0] ==
'/') AND ($v[1] ==
'Prev') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] ==
'numeric'))) {
313 $prevxref = intval($sarr[($k +1)][1]);
314 } elseif (($v[0] ==
'/') AND ($v[1] ==
'W') AND (isset($sarr[($k +1)]))) {
317 $wb[0] = intval($sarr[($k +1)][1][0][1]);
318 $wb[1] = intval($sarr[($k +1)][1][1][1]);
319 $wb[2] = intval($sarr[($k +1)][1][2][1]);
320 } elseif (($v[0] ==
'/') AND ($v[1] ==
'DecodeParms') AND (isset($sarr[($k +1)][1]))) {
321 $decpar = $sarr[($k +1)][1];
322 foreach ($decpar as $kdc => $vdc) {
323 if (($vdc[0] ==
'/') AND ($vdc[1] ==
'Columns') AND (isset($decpar[($kdc +1)]) AND ($decpar[($kdc +1)][0] ==
'numeric'))) {
324 $columns = intval($decpar[($kdc +1)][1]);
325 } elseif (($vdc[0] ==
'/') AND ($vdc[1] ==
'Predictor') AND (isset($decpar[($kdc +1)]) AND ($decpar[($kdc +1)][0] ==
'numeric'))) {
326 $predictor = intval($decpar[($kdc +1)][1]);
329 } elseif ($filltrailer) {
330 if (($v[0] ==
'/') AND ($v[1] ==
'Size') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] ==
'numeric'))) {
331 $xref[
'trailer'][
'size'] = $sarr[($k +1)][1];
332 } elseif (($v[0] ==
'/') AND ($v[1] ==
'Root') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] ==
'objref'))) {
333 $xref[
'trailer'][
'root'] = $sarr[($k +1)][1];
334 } elseif (($v[0] ==
'/') AND ($v[1] ==
'Info') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] ==
'objref'))) {
335 $xref[
'trailer'][
'info'] = $sarr[($k +1)][1];
336 } elseif (($v[0] ==
'/') AND ($v[1] ==
'Encrypt') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] ==
'objref'))) {
337 $xref[
'trailer'][
'encrypt'] = $sarr[($k +1)][1];
338 } elseif (($v[0] ==
'/') AND ($v[1] ==
'ID') AND (isset($sarr[($k +1)]))) {
340 $xref[
'trailer'][
'id'][0] = $sarr[($k +1)][1][0][1];
341 $xref[
'trailer'][
'id'][1] = $sarr[($k +1)][1][1][1];
346 if ($valid_crs AND isset($xrefcrs[1][3][0])) {
350 $sdata = unpack(
'C*', $xrefcrs[1][3][0]);
352 $sdata = array_chunk($sdata, $rowlen);
356 $prev_row = array_fill (0, $rowlen, 0);
358 foreach ($sdata as $k =>
$row) {
360 $ddata[$k] =
array();
362 $predictor = (10 +
$row[0]);
367 $row_up = $prev_row[$j];
372 $row_left =
$row[($i - 1)];
373 $row_upleft = $prev_row[($j - 1)];
375 switch ($predictor) {
377 $ddata[$k][$j] =
$row[$i];
381 $ddata[$k][$j] = ((
$row[$i] + $row_left) & 0xff);
385 $ddata[$k][$j] = ((
$row[$i] + $row_up) & 0xff);
389 $ddata[$k][$j] = ((
$row[$i] + (($row_left + $row_up) / 2)) & 0xff);
394 $p = ($row_left + $row_up - $row_upleft);
396 $pa = abs($p - $row_left);
397 $pb = abs($p - $row_up);
398 $pc = abs($p - $row_upleft);
399 $pmin = min($pa, $pb, $pc);
403 $ddata[$k][$j] = ((
$row[$i] + $row_left) & 0xff);
407 $ddata[$k][$j] = ((
$row[$i] + $row_up) & 0xff);
411 $ddata[$k][$j] = ((
$row[$i] + $row_upleft) & 0xff);
418 $this->
Error(
'Unknown PNG predictor');
423 $prev_row = $ddata[$k];
428 foreach ($ddata as $k =>
$row) {
430 $sdata[$k] =
array(0, 0, 0);
437 for ($c = 0; $c < 3; ++$c) {
439 for ($b = 0; $b < $wb[$c]; ++$b) {
440 if (isset(
$row[$i])) {
441 $sdata[$k][$c] += (
$row[$i] << (($wb[$c] - 1 - $b) * 8));
449 if (isset($index_first)) {
450 $obj_num = $index_first;
454 foreach ($sdata as $k =>
$row) {
461 $index = $obj_num.
'_'.
$row[2];
463 if (!isset(
$xref[
'xref'][$index])) {
473 $xref[
'xref'][$index] = -1;
483 if (isset($prevxref)) {
501 $offset += strspn($this->pdfdata,
"\x00\x09\x0a\x0c\x0d\x20", $offset);
503 $char = $this->pdfdata[$offset];
508 $next = strcspn($this->pdfdata,
"\r\n", $offset);
519 if (preg_match(
'/^([^\x00\x09\x0a\x0c\x0d\x20\s\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25]+)/', substr($this->pdfdata, $offset, 256), $matches) == 1) {
520 $objval = $matches[1];
521 $offset += strlen($objval);
533 while ($open_bracket > 0) {
534 if (!isset($this->pdfdata{$strpos})) {
537 $ch = $this->pdfdata{$strpos};
555 $objval = substr($this->pdfdata, $offset, ($strpos - $offset - 1));
571 $offset = $element[2];
572 $objval[] = $element;
573 }
while ($element[0] !=
']');
581 if (isset($this->pdfdata{($offset + 1)}) AND ($this->pdfdata{($offset + 1)} == $char)) {
583 $objtype = $char.$char;
591 $offset = $element[2];
592 $objval[] = $element;
593 }
while ($element[0] !=
'>>');
601 if (($char ==
'<') AND (preg_match(
'/^([0-9A-Fa-f\x09\x0a\x0c\x0d\x20]+)>/iU', substr($this->pdfdata, $offset), $matches) == 1)) {
603 $objval = strtr($matches[1],
"\x09\x0a\x0c\x0d\x20",
'');
604 $offset += strlen($matches[0]);
605 } elseif (($endpos = strpos($this->pdfdata,
'>', $offset)) !== FALSE) {
606 $offset = $endpos + 1;
612 if (substr($this->pdfdata, $offset, 6) ==
'endobj') {
616 } elseif (substr($this->pdfdata, $offset, 4) ==
'null') {
621 } elseif (substr($this->pdfdata, $offset, 4) ==
'true') {
623 $objtype =
'boolean';
626 } elseif (substr($this->pdfdata, $offset, 5) ==
'false') {
628 $objtype =
'boolean';
631 } elseif (substr($this->pdfdata, $offset, 6) ==
'stream') {
635 if (preg_match(
'/^([\r]?[\n])/isU', substr($this->pdfdata, $offset), $matches) == 1) {
636 $offset += strlen($matches[0]);
637 if (preg_match(
'/(endstream)[\x09\x0a\x0c\x0d\x20]/isU', substr($this->pdfdata, $offset), $matches, PREG_OFFSET_CAPTURE) == 1) {
638 $objval = substr($this->pdfdata, $offset, $matches[0][1]);
639 $offset += $matches[1][1];
642 } elseif (substr($this->pdfdata, $offset, 9) ==
'endstream') {
644 $objtype =
'endstream';
646 } elseif (preg_match(
'/^([0-9]+)[\s]+([0-9]+)[\s]+R/iU', substr($this->pdfdata, $offset, 33), $matches) == 1) {
649 $offset += strlen($matches[0]);
650 $objval = intval($matches[1]).
'_'.intval($matches[2]);
651 } elseif (preg_match(
'/^([0-9]+)[\s]+([0-9]+)[\s]+obj/iU', substr($this->pdfdata, $offset, 33), $matches) == 1) {
654 $objval = intval($matches[1]).
'_'.intval($matches[2]);
655 $offset += strlen ($matches[0]);
656 } elseif (($numlen = strspn($this->pdfdata,
'+-.0123456789', $offset)) > 0) {
658 $objtype =
'numeric';
659 $objval = substr($this->pdfdata, $offset, $numlen);
665 return array($objtype, $objval, $offset);
678 $obj = explode(
'_', $obj_ref);
679 if (($obj ===
false) OR (count($obj) != 2)) {
680 $this->
Error(
'Invalid object reference: '.$obj);
683 $objref = $obj[0].
' '.$obj[1].
' obj';
685 $offset += strspn($this->pdfdata,
'0', $offset);
686 if (strpos($this->pdfdata, $objref, $offset) != $offset) {
688 return array(
'null',
'null', $offset);
691 $offset += strlen($objref);
696 $oldoffset = $offset;
699 $offset = $element[2];
701 if ($decoding AND ($element[0] ==
'stream') AND (isset($objdata[($i - 1)][0])) AND ($objdata[($i - 1)][0] ==
'<<')) {
702 $element[3] = $this->
decodeStream($objdata[($i - 1)][1], $element[1]);
704 $objdata[$i] = $element;
706 }
while (($element[0] !=
'endobj') AND ($offset != $oldoffset));
721 if ($obj[0] ==
'objref') {
723 if (isset($this->objects[$obj[1]])) {
725 return $this->objects[$obj[1]];
726 } elseif (isset($this->xref[$obj[1]])) {
728 $this->objects[$obj[1]] = $this->
getIndirectObject($obj[1], $this->xref[$obj[1]],
false);
729 return $this->objects[$obj[1]];
745 $slength = strlen($stream);
750 foreach ($sdic as $k => $v) {
752 if (($v[1] ==
'Length') AND (isset($sdic[($k + 1)])) AND ($sdic[($k + 1)][0] ==
'numeric')) {
754 $declength = intval($sdic[($k + 1)][1]);
755 if ($declength < $slength) {
756 $stream = substr($stream, 0, $declength);
757 $slength = $declength;
759 } elseif (($v[1] ==
'Filter') AND (isset($sdic[($k + 1)]))) {
762 if ($objval[0] ==
'/') {
764 $filters[] = $objval[1];
765 } elseif ($objval[0] ==
'[') {
767 foreach ($objval[1] as $flt) {
768 if ($flt[0] ==
'/') {
769 $filters[] = $flt[1];
777 $remaining_filters =
array();
778 foreach ($filters as $filter) {
783 $emsg = $e->getMessage();
784 if ((($emsg[0] ==
'~') AND !$this->cfg[
'ignore_missing_filter_decoders'])
785 OR (($emsg[0] !=
'~') AND !$this->cfg[
'ignore_filter_decoding_errors'])) {
786 $this->
Error($e->getMessage());
791 $remaining_filters[] = $filter;
794 return array($stream, $remaining_filters);
804 if ($this->cfg[
'die_for_errors']) {
805 die(
'<strong>TCPDF_PARSER ERROR: </strong>'.$msg);
807 throw new Exception(
'TCPDF_PARSER ERROR: '.$msg);
$pdfdata
Raw content of the PDF document.
static decodeFilter($filter, $data)
Decode data using the specified filter type.
static getAvailableFilters()
Get a list of available decoding filters.
decodeXrefStream($startxref, $xref=array())
Decode the Cross-Reference Stream section.
$objects
Array of PDF objects.
__construct($data, $cfg=array())
Parse a PDF document an return an array of objects.
getXrefData($offset=0, $xref=array())
Get Cross-Reference (xref) table and trailer data from PDF document data.
decodeStream($sdic, $stream)
Decode the specified stream.
getRawObject($offset=0)
Get object type, raw value and offset to next object.
setConfig($cfg)
Set the configuration parameters.
$cfg
Array of configuration parameters.
Create styles array
The data for the language used.
getObjectVal($obj)
Get the content of object, resolving indect object reference if necessary.
getParsedData()
Return an array of parsed PDF document objects.
Error($msg)
Throw an exception or print an error message and die if the K_TCPDF_PARSER_THROW_EXCEPTION_ERROR cons...
$FilterDecoders
Class object for decoding filters.
This is a PHP class for parsing PDF documents.
decodeXref($startxref, $xref=array())
Decode the Cross-Reference section.
getIndirectObject($obj_ref, $offset=0, $decoding=true)
Get content of indirect object.