ILIAS  release_5-2 Revision v5.2.25-18-g3f80b828510
Xmp.php
Go to the documentation of this file.
1 <?php
2 
3 namespace GetId3\Module\Tag;
4 
7 // available at http://getid3.sourceforge.net //
8 // or http://www.getid3.org //
10 // See readme.txt for more details //
12 // //
13 // module.tag.xmp.php //
14 // module for analyzing XMP metadata (e.g. in JPEG files) //
15 // dependencies: NONE //
16 // //
18 // //
19 // Module originally written [2009-Mar-26] by //
20 // Nigel Barnes <ngbarnesØhotmail*com> //
21 // Bundled into GetId3 with permission //
22 // called by GetId3 in module.graphic.jpg.php //
23 // ///
25 
26 /**************************************************************************************************
27  * SWISScenter Source Nigel Barnes
28  *
29  * Provides functions for reading information from the 'APP1' Extensible Metadata
30  * Platform (XMP) segment of JPEG format files.
31  * This XMP segment is XML based and contains the Resource Description Framework (RDF)
32  * data, which itself can contain the Dublin Core Metadata Initiative (DCMI) information.
33  *
34  * This code uses segments from the JPEG Metadata Toolkit project by Evan Hunter.
35  *************************************************************************************************/
36 
61 class Xmp
62 {
68  public $_sFilename = null;
69 
75  public $_aXMP = array();
76 
82  public $_bXMPParse = false;
83 
89  protected static $XMP_tag_captions = array(
90  // IPTC Core
91  'Iptc4xmpCore:CiAdrCity',
92  'Iptc4xmpCore:CiAdrCtry',
93  'Iptc4xmpCore:CiAdrExtadr',
94  'Iptc4xmpCore:CiAdrPcode',
95  'Iptc4xmpCore:CiAdrRegion',
96  'Iptc4xmpCore:CiEmailWork',
97  'Iptc4xmpCore:CiTelWork',
98  'Iptc4xmpCore:CiUrlWork',
99  'Iptc4xmpCore:CountryCode',
100  'Iptc4xmpCore:CreatorContactInfo',
101  'Iptc4xmpCore:IntellectualGenre',
102  'Iptc4xmpCore:Location',
103  'Iptc4xmpCore:Scene',
104  'Iptc4xmpCore:SubjectCode',
105  // Dublin Core Schema
106  'dc:contributor',
107  'dc:coverage',
108  'dc:creator',
109  'dc:date',
110  'dc:description',
111  'dc:format',
112  'dc:identifier',
113  'dc:language',
114  'dc:publisher',
115  'dc:relation',
116  'dc:rights',
117  'dc:source',
118  'dc:subject',
119  'dc:title',
120  'dc:type',
121  // XMP Basic Schema
122  'xmp:Advisory',
123  'xmp:BaseURL',
124  'xmp:CreateDate',
125  'xmp:CreatorTool',
126  'xmp:Identifier',
127  'xmp:Label',
128  'xmp:MetadataDate',
129  'xmp:ModifyDate',
130  'xmp:Nickname',
131  'xmp:Rating',
132  'xmp:Thumbnails',
133  'xmpidq:Scheme',
134  // XMP Rights Management Schema
135  'xmpRights:Certificate',
136  'xmpRights:Marked',
137  'xmpRights:Owner',
138  'xmpRights:UsageTerms',
139  'xmpRights:WebStatement',
140  // These are not in spec but Photoshop CS seems to use them
141  'xap:Advisory',
142  'xap:BaseURL',
143  'xap:CreateDate',
144  'xap:CreatorTool',
145  'xap:Identifier',
146  'xap:MetadataDate',
147  'xap:ModifyDate',
148  'xap:Nickname',
149  'xap:Rating',
150  'xap:Thumbnails',
151  'xapidq:Scheme',
152  'xapRights:Certificate',
153  'xapRights:Copyright',
154  'xapRights:Marked',
155  'xapRights:Owner',
156  'xapRights:UsageTerms',
157  'xapRights:WebStatement',
158  // XMP Media Management Schema
159  'xapMM:DerivedFrom',
160  'xapMM:DocumentID',
161  'xapMM:History',
162  'xapMM:InstanceID',
163  'xapMM:ManagedFrom',
164  'xapMM:Manager',
165  'xapMM:ManageTo',
166  'xapMM:ManageUI',
167  'xapMM:ManagerVariant',
168  'xapMM:RenditionClass',
169  'xapMM:RenditionParams',
170  'xapMM:VersionID',
171  'xapMM:Versions',
172  'xapMM:LastURL',
173  'xapMM:RenditionOf',
174  'xapMM:SaveID',
175  // XMP Basic Job Ticket Schema
176  'xapBJ:JobRef',
177  // XMP Paged-Text Schema
178  'xmpTPg:MaxPageSize',
179  'xmpTPg:NPages',
180  'xmpTPg:Fonts',
181  'xmpTPg:Colorants',
182  'xmpTPg:PlateNames',
183  // Adobe PDF Schema
184  'pdf:Keywords',
185  'pdf:PDFVersion',
186  'pdf:Producer',
187  // Photoshop Schema
188  'photoshop:AuthorsPosition',
189  'photoshop:CaptionWriter',
190  'photoshop:Category',
191  'photoshop:City',
192  'photoshop:Country',
193  'photoshop:Credit',
194  'photoshop:DateCreated',
195  'photoshop:Headline',
196  'photoshop:History',
197  // Not in XMP spec
198  'photoshop:Instructions',
199  'photoshop:Source',
200  'photoshop:State',
201  'photoshop:SupplementalCategories',
202  'photoshop:TransmissionReference',
203  'photoshop:Urgency',
204  // EXIF Schemas
205  'tiff:ImageWidth',
206  'tiff:ImageLength',
207  'tiff:BitsPerSample',
208  'tiff:Compression',
209  'tiff:PhotometricInterpretation',
210  'tiff:Orientation',
211  'tiff:SamplesPerPixel',
212  'tiff:PlanarConfiguration',
213  'tiff:YCbCrSubSampling',
214  'tiff:YCbCrPositioning',
215  'tiff:XResolution',
216  'tiff:YResolution',
217  'tiff:ResolutionUnit',
218  'tiff:TransferFunction',
219  'tiff:WhitePoint',
220  'tiff:PrimaryChromaticities',
221  'tiff:YCbCrCoefficients',
222  'tiff:ReferenceBlackWhite',
223  'tiff:DateTime',
224  'tiff:ImageDescription',
225  'tiff:Make',
226  'tiff:Model',
227  'tiff:Software',
228  'tiff:Artist',
229  'tiff:Copyright',
230  'exif:ExifVersion',
231  'exif:FlashpixVersion',
232  'exif:ColorSpace',
233  'exif:ComponentsConfiguration',
234  'exif:CompressedBitsPerPixel',
235  'exif:PixelXDimension',
236  'exif:PixelYDimension',
237  'exif:MakerNote',
238  'exif:UserComment',
239  'exif:RelatedSoundFile',
240  'exif:DateTimeOriginal',
241  'exif:DateTimeDigitized',
242  'exif:ExposureTime',
243  'exif:FNumber',
244  'exif:ExposureProgram',
245  'exif:SpectralSensitivity',
246  'exif:ISOSpeedRatings',
247  'exif:OECF',
248  'exif:ShutterSpeedValue',
249  'exif:ApertureValue',
250  'exif:BrightnessValue',
251  'exif:ExposureBiasValue',
252  'exif:MaxApertureValue',
253  'exif:SubjectDistance',
254  'exif:MeteringMode',
255  'exif:LightSource',
256  'exif:Flash',
257  'exif:FocalLength',
258  'exif:SubjectArea',
259  'exif:FlashEnergy',
260  'exif:SpatialFrequencyResponse',
261  'exif:FocalPlaneXResolution',
262  'exif:FocalPlaneYResolution',
263  'exif:FocalPlaneResolutionUnit',
264  'exif:SubjectLocation',
265  'exif:SensingMethod',
266  'exif:FileSource',
267  'exif:SceneType',
268  'exif:CFAPattern',
269  'exif:CustomRendered',
270  'exif:ExposureMode',
271  'exif:WhiteBalance',
272  'exif:DigitalZoomRatio',
273  'exif:FocalLengthIn35mmFilm',
274  'exif:SceneCaptureType',
275  'exif:GainControl',
276  'exif:Contrast',
277  'exif:Saturation',
278  'exif:Sharpness',
279  'exif:DeviceSettingDescription',
280  'exif:SubjectDistanceRange',
281  'exif:ImageUniqueID',
282  'exif:GPSVersionID',
283  'exif:GPSLatitude',
284  'exif:GPSLongitude',
285  'exif:GPSAltitudeRef',
286  'exif:GPSAltitude',
287  'exif:GPSTimeStamp',
288  'exif:GPSSatellites',
289  'exif:GPSStatus',
290  'exif:GPSMeasureMode',
291  'exif:GPSDOP',
292  'exif:GPSSpeedRef',
293  'exif:GPSSpeed',
294  'exif:GPSTrackRef',
295  'exif:GPSTrack',
296  'exif:GPSImgDirectionRef',
297  'exif:GPSImgDirection',
298  'exif:GPSMapDatum',
299  'exif:GPSDestLatitude',
300  'exif:GPSDestLongitude',
301  'exif:GPSDestBearingRef',
302  'exif:GPSDestBearing',
303  'exif:GPSDestDistanceRef',
304  'exif:GPSDestDistance',
305  'exif:GPSProcessingMethod',
306  'exif:GPSAreaInformation',
307  'exif:GPSDifferential',
308  'stDim:w',
309  'stDim:h',
310  'stDim:unit',
311  'xapGImg:height',
312  'xapGImg:width',
313  'xapGImg:format',
314  'xapGImg:image',
315  'stEvt:action',
316  'stEvt:instanceID',
317  'stEvt:parameters',
318  'stEvt:softwareAgent',
319  'stEvt:when',
320  'stRef:instanceID',
321  'stRef:documentID',
322  'stRef:versionID',
323  'stRef:renditionClass',
324  'stRef:renditionParams',
325  'stRef:manager',
326  'stRef:managerVariant',
327  'stRef:manageTo',
328  'stRef:manageUI',
329  'stVer:comments',
330  'stVer:event',
331  'stVer:modifyDate',
332  'stVer:modifier',
333  'stVer:version',
334  'stJob:name',
335  'stJob:id',
336  'stJob:url',
337  // Exif Flash
338  'exif:Fired',
339  'exif:Return',
340  'exif:Mode',
341  'exif:Function',
342  'exif:RedEyeMode',
343  // Exif OECF/SFR
344  'exif:Columns',
345  'exif:Rows',
346  'exif:Names',
347  'exif:Values',
348  // Exif CFAPattern
349  'exif:Columns',
350  'exif:Rows',
351  'exif:Values',
352  // Exif DeviceSettings
353  'exif:Columns',
354  'exif:Rows',
355  'exif:Settings',
356  );
357 
363  protected static $JPEG_Segment_Names = array(
364  0x01 => 'TEM',
365  0x02 => 'RES',
366  0xC0 => 'SOF0',
367  0xC1 => 'SOF1',
368  0xC2 => 'SOF2',
369  0xC3 => 'SOF4',
370  0xC4 => 'DHT',
371  0xC5 => 'SOF5',
372  0xC6 => 'SOF6',
373  0xC7 => 'SOF7',
374  0xC8 => 'JPG',
375  0xC9 => 'SOF9',
376  0xCA => 'SOF10',
377  0xCB => 'SOF11',
378  0xCC => 'DAC',
379  0xCD => 'SOF13',
380  0xCE => 'SOF14',
381  0xCF => 'SOF15',
382  0xD0 => 'RST0',
383  0xD1 => 'RST1',
384  0xD2 => 'RST2',
385  0xD3 => 'RST3',
386  0xD4 => 'RST4',
387  0xD5 => 'RST5',
388  0xD6 => 'RST6',
389  0xD7 => 'RST7',
390  0xD8 => 'SOI',
391  0xD9 => 'EOI',
392  0xDA => 'SOS',
393  0xDB => 'DQT',
394  0xDC => 'DNL',
395  0xDD => 'DRI',
396  0xDE => 'DHP',
397  0xDF => 'EXP',
398  0xE0 => 'APP0',
399  0xE1 => 'APP1',
400  0xE2 => 'APP2',
401  0xE3 => 'APP3',
402  0xE4 => 'APP4',
403  0xE5 => 'APP5',
404  0xE6 => 'APP6',
405  0xE7 => 'APP7',
406  0xE8 => 'APP8',
407  0xE9 => 'APP9',
408  0xEA => 'APP10',
409  0xEB => 'APP11',
410  0xEC => 'APP12',
411  0xED => 'APP13',
412  0xEE => 'APP14',
413  0xEF => 'APP15',
414  0xF0 => 'JPG0',
415  0xF1 => 'JPG1',
416  0xF2 => 'JPG2',
417  0xF3 => 'JPG3',
418  0xF4 => 'JPG4',
419  0xF5 => 'JPG5',
420  0xF6 => 'JPG6',
421  0xF7 => 'JPG7',
422  0xF8 => 'JPG8',
423  0xF9 => 'JPG9',
424  0xFA => 'JPG10',
425  0xFB => 'JPG11',
426  0xFC => 'JPG12',
427  0xFD => 'JPG13',
428  0xFE => 'COM',
429  );
430 
439  public function isValid()
440  {
441  return $this->_bXMPParse;
442  }
443 
449  public function getAllTags()
450  {
451  return $this->_aXMP;
452  }
453 
462  {
463  // prevent refresh from aborting file operations and hosing file
464  ignore_user_abort(true);
465 
466  // Attempt to open the jpeg file - the at symbol supresses the error message about
467  // not being able to open files. The file_exists would have been used, but it
468  // does not work with files fetched over http or ftp.
469  if (is_readable($filename) && is_file($filename) && ($filehnd = fopen($filename, 'rb'))) {
470  // great
471  } else {
472  return false;
473  }
474 
475  // Read the first two characters
476  $data = fread($filehnd, 2);
477 
478  // Check that the first two characters are 0xFF 0xD8 (SOI - Start of image)
479  if ($data != "\xFF\xD8") {
480  // No SOI (FF D8) at start of file - This probably isn't a JPEG file - close file and return;
481  echo '<p>This probably is not a JPEG file</p>'."\n";
482  fclose($filehnd);
483 
484  return false;
485  }
486 
487  // Read the third character
488  $data = fread($filehnd, 2);
489 
490  // Check that the third character is 0xFF (Start of first segment header)
491  if ($data{0} != "\xFF") {
492  // NO FF found - close file and return - JPEG is probably corrupted
493  fclose($filehnd);
494 
495  return false;
496  }
497 
498  // Flag that we havent yet hit the compressed image data
499  $hit_compressed_image_data = false;
500 
501  // Cycle through the file until, one of: 1) an EOI (End of image) marker is hit,
502  // 2) we have hit the compressed image data (no more headers are allowed after data)
503  // 3) or end of file is hit
504 
505  while (($data{1} != "\xD9") && (!$hit_compressed_image_data) && (!feof($filehnd))) {
506  // Found a segment to look at.
507  // Check that the segment marker is not a Restart marker - restart markers don't have size or data after them
508  if ((ord($data{1}) < 0xD0) || (ord($data{1}) > 0xD7)) {
509  // Segment isn't a Restart marker
510  // Read the next two bytes (size)
511  $sizestr = fread($filehnd, 2);
512 
513  // convert the size bytes to an integer
514  $decodedsize = unpack('nsize', $sizestr);
515 
516  // Save the start position of the data
517  $segdatastart = ftell($filehnd);
518 
519  // Read the segment data with length indicated by the previously read size
520  $segdata = fread($filehnd, $decodedsize['size'] - 2);
521 
522  // Store the segment information in the output array
523  $headerdata[] = array(
524  'SegType' => ord($data{1}),
525  'SegName' => self::$JPEG_Segment_Names[ord($data{1})],
526  'SegDataStart' => $segdatastart,
527  'SegData' => $segdata,
528  );
529  }
530 
531  // If this is a SOS (Start Of Scan) segment, then there is no more header data - the compressed image data follows
532  if ($data{1} == "\xDA") {
533  // Flag that we have hit the compressed image data - exit loop as no more headers available.
534  $hit_compressed_image_data = true;
535  } else {
536  // Not an SOS - Read the next two bytes - should be the segment marker for the next segment
537  $data = fread($filehnd, 2);
538 
539  // Check that the first byte of the two is 0xFF as it should be for a marker
540  if ($data{0} != "\xFF") {
541  // NO FF found - close file and return - JPEG is probably corrupted
542  fclose($filehnd);
543 
544  return false;
545  }
546  }
547  }
548 
549  // Close File
550  fclose($filehnd);
551  // Alow the user to abort from now on
552  ignore_user_abort(false);
553 
554  // Return the header data retrieved
555  return $headerdata;
556  }
557 
565  public function _get_XMP_text($filename)
566  {
567  //Get JPEG header data
568  $jpeg_header_data = $this->_get_jpeg_header_data($filename);
569 
570  //Cycle through the header segments
571  for ($i = 0; $i < count($jpeg_header_data); $i++) {
572  // If we find an APP1 header,
573  if (strcmp($jpeg_header_data[$i]['SegName'], 'APP1') == 0) {
574  // And if it has the Adobe XMP/RDF label (http://ns.adobe.com/xap/1.0/\x00) ,
575  if (strncmp($jpeg_header_data[$i]['SegData'], 'http://ns.adobe.com/xap/1.0/'."\x00", 29) == 0) {
576  // Found a XMP/RDF block
577  // Return the XMP text
578  $xmp_data = substr($jpeg_header_data[$i]['SegData'], 29);
579 
580  return trim($xmp_data); // trim() should not be neccesary, but some files found in the wild with null-terminated block (known samples from Apple Aperture) causes problems elsewhere (see http://www.getid3.org/phpBB3/viewtopic.php?f=4&t=1153)
581  }
582  }
583  }
584 
585  return false;
586  }
587 
596  public function read_XMP_array_from_text($xmltext)
597  {
598  // Check if there actually is any text to parse
599  if (trim($xmltext) == '') {
600  return false;
601  }
602 
603  // Create an instance of a xml parser to parse the XML text
604  $xml_parser = xml_parser_create('UTF-8');
605 
606  // Change: Fixed problem that caused the whitespace (especially newlines) to be destroyed when converting xml text to an xml array, as of revision 1.10
607 
608  // We would like to remove unneccessary white space, but this will also
609  // remove things like newlines (&#xA;) in the XML values, so white space
610  // will have to be removed later
611  if (xml_parser_set_option($xml_parser, XML_OPTION_SKIP_WHITE, 0) == false) {
612  // Error setting case folding - destroy the parser and return
613  xml_parser_free($xml_parser);
614 
615  return false;
616  }
617 
618  // to use XML code correctly we have to turn case folding
619  // (uppercasing) off. XML is case sensitive and upper
620  // casing is in reality XML standards violation
621  if (xml_parser_set_option($xml_parser, XML_OPTION_CASE_FOLDING, 0) == false) {
622  // Error setting case folding - destroy the parser and return
623  xml_parser_free($xml_parser);
624 
625  return false;
626  }
627 
628  // Parse the XML text into a array structure
629  if (xml_parse_into_struct($xml_parser, $xmltext, $values, $tags) == 0) {
630  // Error Parsing XML - destroy the parser and return
631  xml_parser_free($xml_parser);
632 
633  return false;
634  }
635 
636  // Destroy the xml parser
637  xml_parser_free($xml_parser);
638 
639  // Clear the output array
640  $xmp_array = array();
641 
642  // The XMP data has now been parsed into an array ...
643 
644  // Cycle through each of the array elements
645  $current_property = ''; // current property being processed
646  $container_index = -1; // -1 = no container open, otherwise index of container content
647  foreach ($values as $xml_elem) {
648  // Syntax and Class names
649  switch ($xml_elem['tag']) {
650  case 'x:xmpmeta':
651  // only defined attribute is x:xmptk written by Adobe XMP Toolkit; value is the version of the toolkit
652  break;
653 
654  case 'rdf:RDF':
655  // required element immediately within x:xmpmeta; no data here
656  break;
657 
658  case 'rdf:Description':
659  switch ($xml_elem['type']) {
660  case 'open':
661  case 'complete':
662  if (array_key_exists('attributes', $xml_elem)) {
663  // rdf:Description may contain wanted attributes
664  foreach (array_keys($xml_elem['attributes']) as $key) {
665  // Check whether we want this details from this attribute
666  if (in_array($key, self::$XMP_tag_captions)) {
667  // Attribute wanted
668  $xmp_array[$key] = $xml_elem['attributes'][$key];
669  }
670  }
671  }
672  case 'cdata':
673  case 'close':
674  break;
675  }
676 
677  case 'rdf:ID':
678  case 'rdf:nodeID':
679  // Attributes are ignored
680  break;
681 
682  case 'rdf:li':
683  // Property member
684  if ($xml_elem['type'] == 'complete') {
685  if (array_key_exists('attributes', $xml_elem)) {
686  // If Lang Alt (language alternatives) then ensure we take the default language
687  if (isset($xml_elem['attributes']['xml:lang']) && ($xml_elem['attributes']['xml:lang'] != 'x-default')) {
688  break;
689  }
690  }
691  if ($current_property != '') {
692  $xmp_array[$current_property][$container_index] = (isset($xml_elem['value']) ? $xml_elem['value'] : '');
693  $container_index += 1;
694  }
695  //else unidentified attribute!!
696  }
697  break;
698 
699  case 'rdf:Seq':
700  case 'rdf:Bag':
701  case 'rdf:Alt':
702  // Container found
703  switch ($xml_elem['type']) {
704  case 'open':
705  $container_index = 0;
706  break;
707  case 'close':
708  $container_index = -1;
709  break;
710  case 'cdata':
711  break;
712  }
713  break;
714 
715  default:
716  // Check whether we want the details from this attribute
717  if (in_array($xml_elem['tag'], self::$XMP_tag_captions)) {
718  switch ($xml_elem['type']) {
719  case 'open':
720  // open current element
721  $current_property = $xml_elem['tag'];
722  break;
723 
724  case 'close':
725  // close current element
726  $current_property = '';
727  break;
728 
729  case 'complete':
730  // store attribute value
731  $xmp_array[$xml_elem['tag']] = (isset($xml_elem['value']) ? $xml_elem['value'] : '');
732  break;
733 
734  case 'cdata':
735  // ignore
736  break;
737  }
738  }
739  break;
740  }
741 
742  }
743 
744  return $xmp_array;
745  }
746 
752  public function Image_XMP($sFilename)
753  {
754  $this->_sFilename = $sFilename;
755 
756  if (is_file($this->_sFilename)) {
757  // Get XMP data
758  $xmp_data = $this->_get_XMP_text($sFilename);
759  if ($xmp_data) {
760  $this->_aXMP = $this->read_XMP_array_from_text($xmp_data);
761  $this->_bXMPParse = true;
762  }
763  }
764  }
765 }
static $JPEG_Segment_Names
Definition: Xmp.php:363
GetId3() by James Heinrich info@getid3.org //.
Definition: Xmp.php:61
static $XMP_tag_captions
Definition: Xmp.php:89
read_XMP_array_from_text($xmltext)
Parses a string containing XMP data (XML), and returns an array which contains all the XMP (XML) info...
Definition: Xmp.php:596
getAllTags()
Get a copy of all XMP tags extracted from the image.
Definition: Xmp.php:449
Create styles array
The data for the language used.
_get_XMP_text($filename)
Retrieves XMP information from an APP1 JPEG segment and returns the raw XML text as a string...
Definition: Xmp.php:565
Image_XMP($sFilename)
Constructor.
Definition: Xmp.php:752
isValid()
Returns the status of XMP parsing during instantiation.
Definition: Xmp.php:439
_get_jpeg_header_data($filename)
Reads all the JPEG header segments from an JPEG image file into an array.
Definition: Xmp.php:461