ILIAS  release_5-2 Revision v5.2.25-18-g3f80b828510
Xmp.php
Go to the documentation of this file.
1<?php
2
3namespace GetId3\Module\Tag;
4
7// available at http://getid3.sourceforge.net //
8// or http://www.getid3.org //
10// See readme.txt for more details //
12// //
13// module.tag.xmp.php //
14// module for analyzing XMP metadata (e.g. in JPEG files) //
15// dependencies: NONE //
16// //
18// //
19// Module originally written [2009-Mar-26] by //
20// Nigel Barnes <ngbarnesØhotmail*com> //
21// Bundled into GetId3 with permission //
22// called by GetId3 in module.graphic.jpg.php //
23// ///
25
26/**************************************************************************************************
27 * SWISScenter Source Nigel Barnes
28 *
29 * Provides functions for reading information from the 'APP1' Extensible Metadata
30 * Platform (XMP) segment of JPEG format files.
31 * This XMP segment is XML based and contains the Resource Description Framework (RDF)
32 * data, which itself can contain the Dublin Core Metadata Initiative (DCMI) information.
33 *
34 * This code uses segments from the JPEG Metadata Toolkit project by Evan Hunter.
35 *************************************************************************************************/
36
61class Xmp
62{
68 public $_sFilename = null;
69
75 public $_aXMP = array();
76
82 public $_bXMPParse = false;
83
89 protected static $XMP_tag_captions = array(
90 // IPTC Core
91 'Iptc4xmpCore:CiAdrCity',
92 'Iptc4xmpCore:CiAdrCtry',
93 'Iptc4xmpCore:CiAdrExtadr',
94 'Iptc4xmpCore:CiAdrPcode',
95 'Iptc4xmpCore:CiAdrRegion',
96 'Iptc4xmpCore:CiEmailWork',
97 'Iptc4xmpCore:CiTelWork',
98 'Iptc4xmpCore:CiUrlWork',
99 'Iptc4xmpCore:CountryCode',
100 'Iptc4xmpCore:CreatorContactInfo',
101 'Iptc4xmpCore:IntellectualGenre',
102 'Iptc4xmpCore:Location',
103 'Iptc4xmpCore:Scene',
104 'Iptc4xmpCore:SubjectCode',
105 // Dublin Core Schema
106 'dc:contributor',
107 'dc:coverage',
108 'dc:creator',
109 'dc:date',
110 'dc:description',
111 'dc:format',
112 'dc:identifier',
113 'dc:language',
114 'dc:publisher',
115 'dc:relation',
116 'dc:rights',
117 'dc:source',
118 'dc:subject',
119 'dc:title',
120 'dc:type',
121 // XMP Basic Schema
122 'xmp:Advisory',
123 'xmp:BaseURL',
124 'xmp:CreateDate',
125 'xmp:CreatorTool',
126 'xmp:Identifier',
127 'xmp:Label',
128 'xmp:MetadataDate',
129 'xmp:ModifyDate',
130 'xmp:Nickname',
131 'xmp:Rating',
132 'xmp:Thumbnails',
133 'xmpidq:Scheme',
134 // XMP Rights Management Schema
135 'xmpRights:Certificate',
136 'xmpRights:Marked',
137 'xmpRights:Owner',
138 'xmpRights:UsageTerms',
139 'xmpRights:WebStatement',
140 // These are not in spec but Photoshop CS seems to use them
141 'xap:Advisory',
142 'xap:BaseURL',
143 'xap:CreateDate',
144 'xap:CreatorTool',
145 'xap:Identifier',
146 'xap:MetadataDate',
147 'xap:ModifyDate',
148 'xap:Nickname',
149 'xap:Rating',
150 'xap:Thumbnails',
151 'xapidq:Scheme',
152 'xapRights:Certificate',
153 'xapRights:Copyright',
154 'xapRights:Marked',
155 'xapRights:Owner',
156 'xapRights:UsageTerms',
157 'xapRights:WebStatement',
158 // XMP Media Management Schema
159 'xapMM:DerivedFrom',
160 'xapMM:DocumentID',
161 'xapMM:History',
162 'xapMM:InstanceID',
163 'xapMM:ManagedFrom',
164 'xapMM:Manager',
165 'xapMM:ManageTo',
166 'xapMM:ManageUI',
167 'xapMM:ManagerVariant',
168 'xapMM:RenditionClass',
169 'xapMM:RenditionParams',
170 'xapMM:VersionID',
171 'xapMM:Versions',
172 'xapMM:LastURL',
173 'xapMM:RenditionOf',
174 'xapMM:SaveID',
175 // XMP Basic Job Ticket Schema
176 'xapBJ:JobRef',
177 // XMP Paged-Text Schema
178 'xmpTPg:MaxPageSize',
179 'xmpTPg:NPages',
180 'xmpTPg:Fonts',
181 'xmpTPg:Colorants',
182 'xmpTPg:PlateNames',
183 // Adobe PDF Schema
184 'pdf:Keywords',
185 'pdf:PDFVersion',
186 'pdf:Producer',
187 // Photoshop Schema
188 'photoshop:AuthorsPosition',
189 'photoshop:CaptionWriter',
190 'photoshop:Category',
191 'photoshop:City',
192 'photoshop:Country',
193 'photoshop:Credit',
194 'photoshop:DateCreated',
195 'photoshop:Headline',
196 'photoshop:History',
197 // Not in XMP spec
198 'photoshop:Instructions',
199 'photoshop:Source',
200 'photoshop:State',
201 'photoshop:SupplementalCategories',
202 'photoshop:TransmissionReference',
203 'photoshop:Urgency',
204 // EXIF Schemas
205 'tiff:ImageWidth',
206 'tiff:ImageLength',
207 'tiff:BitsPerSample',
208 'tiff:Compression',
209 'tiff:PhotometricInterpretation',
210 'tiff:Orientation',
211 'tiff:SamplesPerPixel',
212 'tiff:PlanarConfiguration',
213 'tiff:YCbCrSubSampling',
214 'tiff:YCbCrPositioning',
215 'tiff:XResolution',
216 'tiff:YResolution',
217 'tiff:ResolutionUnit',
218 'tiff:TransferFunction',
219 'tiff:WhitePoint',
220 'tiff:PrimaryChromaticities',
221 'tiff:YCbCrCoefficients',
222 'tiff:ReferenceBlackWhite',
223 'tiff:DateTime',
224 'tiff:ImageDescription',
225 'tiff:Make',
226 'tiff:Model',
227 'tiff:Software',
228 'tiff:Artist',
229 'tiff:Copyright',
230 'exif:ExifVersion',
231 'exif:FlashpixVersion',
232 'exif:ColorSpace',
233 'exif:ComponentsConfiguration',
234 'exif:CompressedBitsPerPixel',
235 'exif:PixelXDimension',
236 'exif:PixelYDimension',
237 'exif:MakerNote',
238 'exif:UserComment',
239 'exif:RelatedSoundFile',
240 'exif:DateTimeOriginal',
241 'exif:DateTimeDigitized',
242 'exif:ExposureTime',
243 'exif:FNumber',
244 'exif:ExposureProgram',
245 'exif:SpectralSensitivity',
246 'exif:ISOSpeedRatings',
247 'exif:OECF',
248 'exif:ShutterSpeedValue',
249 'exif:ApertureValue',
250 'exif:BrightnessValue',
251 'exif:ExposureBiasValue',
252 'exif:MaxApertureValue',
253 'exif:SubjectDistance',
254 'exif:MeteringMode',
255 'exif:LightSource',
256 'exif:Flash',
257 'exif:FocalLength',
258 'exif:SubjectArea',
259 'exif:FlashEnergy',
260 'exif:SpatialFrequencyResponse',
261 'exif:FocalPlaneXResolution',
262 'exif:FocalPlaneYResolution',
263 'exif:FocalPlaneResolutionUnit',
264 'exif:SubjectLocation',
265 'exif:SensingMethod',
266 'exif:FileSource',
267 'exif:SceneType',
268 'exif:CFAPattern',
269 'exif:CustomRendered',
270 'exif:ExposureMode',
271 'exif:WhiteBalance',
272 'exif:DigitalZoomRatio',
273 'exif:FocalLengthIn35mmFilm',
274 'exif:SceneCaptureType',
275 'exif:GainControl',
276 'exif:Contrast',
277 'exif:Saturation',
278 'exif:Sharpness',
279 'exif:DeviceSettingDescription',
280 'exif:SubjectDistanceRange',
281 'exif:ImageUniqueID',
282 'exif:GPSVersionID',
283 'exif:GPSLatitude',
284 'exif:GPSLongitude',
285 'exif:GPSAltitudeRef',
286 'exif:GPSAltitude',
287 'exif:GPSTimeStamp',
288 'exif:GPSSatellites',
289 'exif:GPSStatus',
290 'exif:GPSMeasureMode',
291 'exif:GPSDOP',
292 'exif:GPSSpeedRef',
293 'exif:GPSSpeed',
294 'exif:GPSTrackRef',
295 'exif:GPSTrack',
296 'exif:GPSImgDirectionRef',
297 'exif:GPSImgDirection',
298 'exif:GPSMapDatum',
299 'exif:GPSDestLatitude',
300 'exif:GPSDestLongitude',
301 'exif:GPSDestBearingRef',
302 'exif:GPSDestBearing',
303 'exif:GPSDestDistanceRef',
304 'exif:GPSDestDistance',
305 'exif:GPSProcessingMethod',
306 'exif:GPSAreaInformation',
307 'exif:GPSDifferential',
308 'stDim:w',
309 'stDim:h',
310 'stDim:unit',
311 'xapGImg:height',
312 'xapGImg:width',
313 'xapGImg:format',
314 'xapGImg:image',
315 'stEvt:action',
316 'stEvt:instanceID',
317 'stEvt:parameters',
318 'stEvt:softwareAgent',
319 'stEvt:when',
320 'stRef:instanceID',
321 'stRef:documentID',
322 'stRef:versionID',
323 'stRef:renditionClass',
324 'stRef:renditionParams',
325 'stRef:manager',
326 'stRef:managerVariant',
327 'stRef:manageTo',
328 'stRef:manageUI',
329 'stVer:comments',
330 'stVer:event',
331 'stVer:modifyDate',
332 'stVer:modifier',
333 'stVer:version',
334 'stJob:name',
335 'stJob:id',
336 'stJob:url',
337 // Exif Flash
338 'exif:Fired',
339 'exif:Return',
340 'exif:Mode',
341 'exif:Function',
342 'exif:RedEyeMode',
343 // Exif OECF/SFR
344 'exif:Columns',
345 'exif:Rows',
346 'exif:Names',
347 'exif:Values',
348 // Exif CFAPattern
349 'exif:Columns',
350 'exif:Rows',
351 'exif:Values',
352 // Exif DeviceSettings
353 'exif:Columns',
354 'exif:Rows',
355 'exif:Settings',
356 );
357
363 protected static $JPEG_Segment_Names = array(
364 0x01 => 'TEM',
365 0x02 => 'RES',
366 0xC0 => 'SOF0',
367 0xC1 => 'SOF1',
368 0xC2 => 'SOF2',
369 0xC3 => 'SOF4',
370 0xC4 => 'DHT',
371 0xC5 => 'SOF5',
372 0xC6 => 'SOF6',
373 0xC7 => 'SOF7',
374 0xC8 => 'JPG',
375 0xC9 => 'SOF9',
376 0xCA => 'SOF10',
377 0xCB => 'SOF11',
378 0xCC => 'DAC',
379 0xCD => 'SOF13',
380 0xCE => 'SOF14',
381 0xCF => 'SOF15',
382 0xD0 => 'RST0',
383 0xD1 => 'RST1',
384 0xD2 => 'RST2',
385 0xD3 => 'RST3',
386 0xD4 => 'RST4',
387 0xD5 => 'RST5',
388 0xD6 => 'RST6',
389 0xD7 => 'RST7',
390 0xD8 => 'SOI',
391 0xD9 => 'EOI',
392 0xDA => 'SOS',
393 0xDB => 'DQT',
394 0xDC => 'DNL',
395 0xDD => 'DRI',
396 0xDE => 'DHP',
397 0xDF => 'EXP',
398 0xE0 => 'APP0',
399 0xE1 => 'APP1',
400 0xE2 => 'APP2',
401 0xE3 => 'APP3',
402 0xE4 => 'APP4',
403 0xE5 => 'APP5',
404 0xE6 => 'APP6',
405 0xE7 => 'APP7',
406 0xE8 => 'APP8',
407 0xE9 => 'APP9',
408 0xEA => 'APP10',
409 0xEB => 'APP11',
410 0xEC => 'APP12',
411 0xED => 'APP13',
412 0xEE => 'APP14',
413 0xEF => 'APP15',
414 0xF0 => 'JPG0',
415 0xF1 => 'JPG1',
416 0xF2 => 'JPG2',
417 0xF3 => 'JPG3',
418 0xF4 => 'JPG4',
419 0xF5 => 'JPG5',
420 0xF6 => 'JPG6',
421 0xF7 => 'JPG7',
422 0xF8 => 'JPG8',
423 0xF9 => 'JPG9',
424 0xFA => 'JPG10',
425 0xFB => 'JPG11',
426 0xFC => 'JPG12',
427 0xFD => 'JPG13',
428 0xFE => 'COM',
429 );
430
439 public function isValid()
440 {
441 return $this->_bXMPParse;
442 }
443
449 public function getAllTags()
450 {
451 return $this->_aXMP;
452 }
453
462 {
463 // prevent refresh from aborting file operations and hosing file
464 ignore_user_abort(true);
465
466 // Attempt to open the jpeg file - the at symbol supresses the error message about
467 // not being able to open files. The file_exists would have been used, but it
468 // does not work with files fetched over http or ftp.
469 if (is_readable($filename) && is_file($filename) && ($filehnd = fopen($filename, 'rb'))) {
470 // great
471 } else {
472 return false;
473 }
474
475 // Read the first two characters
476 $data = fread($filehnd, 2);
477
478 // Check that the first two characters are 0xFF 0xD8 (SOI - Start of image)
479 if ($data != "\xFF\xD8") {
480 // No SOI (FF D8) at start of file - This probably isn't a JPEG file - close file and return;
481 echo '<p>This probably is not a JPEG file</p>'."\n";
482 fclose($filehnd);
483
484 return false;
485 }
486
487 // Read the third character
488 $data = fread($filehnd, 2);
489
490 // Check that the third character is 0xFF (Start of first segment header)
491 if ($data{0} != "\xFF") {
492 // NO FF found - close file and return - JPEG is probably corrupted
493 fclose($filehnd);
494
495 return false;
496 }
497
498 // Flag that we havent yet hit the compressed image data
499 $hit_compressed_image_data = false;
500
501 // Cycle through the file until, one of: 1) an EOI (End of image) marker is hit,
502 // 2) we have hit the compressed image data (no more headers are allowed after data)
503 // 3) or end of file is hit
504
505 while (($data{1} != "\xD9") && (!$hit_compressed_image_data) && (!feof($filehnd))) {
506 // Found a segment to look at.
507 // Check that the segment marker is not a Restart marker - restart markers don't have size or data after them
508 if ((ord($data{1}) < 0xD0) || (ord($data{1}) > 0xD7)) {
509 // Segment isn't a Restart marker
510 // Read the next two bytes (size)
511 $sizestr = fread($filehnd, 2);
512
513 // convert the size bytes to an integer
514 $decodedsize = unpack('nsize', $sizestr);
515
516 // Save the start position of the data
517 $segdatastart = ftell($filehnd);
518
519 // Read the segment data with length indicated by the previously read size
520 $segdata = fread($filehnd, $decodedsize['size'] - 2);
521
522 // Store the segment information in the output array
523 $headerdata[] = array(
524 'SegType' => ord($data{1}),
525 'SegName' => self::$JPEG_Segment_Names[ord($data{1})],
526 'SegDataStart' => $segdatastart,
527 'SegData' => $segdata,
528 );
529 }
530
531 // If this is a SOS (Start Of Scan) segment, then there is no more header data - the compressed image data follows
532 if ($data{1} == "\xDA") {
533 // Flag that we have hit the compressed image data - exit loop as no more headers available.
534 $hit_compressed_image_data = true;
535 } else {
536 // Not an SOS - Read the next two bytes - should be the segment marker for the next segment
537 $data = fread($filehnd, 2);
538
539 // Check that the first byte of the two is 0xFF as it should be for a marker
540 if ($data{0} != "\xFF") {
541 // NO FF found - close file and return - JPEG is probably corrupted
542 fclose($filehnd);
543
544 return false;
545 }
546 }
547 }
548
549 // Close File
550 fclose($filehnd);
551 // Alow the user to abort from now on
552 ignore_user_abort(false);
553
554 // Return the header data retrieved
555 return $headerdata;
556 }
557
565 public function _get_XMP_text($filename)
566 {
567 //Get JPEG header data
568 $jpeg_header_data = $this->_get_jpeg_header_data($filename);
569
570 //Cycle through the header segments
571 for ($i = 0; $i < count($jpeg_header_data); $i++) {
572 // If we find an APP1 header,
573 if (strcmp($jpeg_header_data[$i]['SegName'], 'APP1') == 0) {
574 // And if it has the Adobe XMP/RDF label (http://ns.adobe.com/xap/1.0/\x00) ,
575 if (strncmp($jpeg_header_data[$i]['SegData'], 'http://ns.adobe.com/xap/1.0/'."\x00", 29) == 0) {
576 // Found a XMP/RDF block
577 // Return the XMP text
578 $xmp_data = substr($jpeg_header_data[$i]['SegData'], 29);
579
580 return trim($xmp_data); // trim() should not be neccesary, but some files found in the wild with null-terminated block (known samples from Apple Aperture) causes problems elsewhere (see http://www.getid3.org/phpBB3/viewtopic.php?f=4&t=1153)
581 }
582 }
583 }
584
585 return false;
586 }
587
596 public function read_XMP_array_from_text($xmltext)
597 {
598 // Check if there actually is any text to parse
599 if (trim($xmltext) == '') {
600 return false;
601 }
602
603 // Create an instance of a xml parser to parse the XML text
604 $xml_parser = xml_parser_create('UTF-8');
605
606 // Change: Fixed problem that caused the whitespace (especially newlines) to be destroyed when converting xml text to an xml array, as of revision 1.10
607
608 // We would like to remove unneccessary white space, but this will also
609 // remove things like newlines (&#xA;) in the XML values, so white space
610 // will have to be removed later
611 if (xml_parser_set_option($xml_parser, XML_OPTION_SKIP_WHITE, 0) == false) {
612 // Error setting case folding - destroy the parser and return
613 xml_parser_free($xml_parser);
614
615 return false;
616 }
617
618 // to use XML code correctly we have to turn case folding
619 // (uppercasing) off. XML is case sensitive and upper
620 // casing is in reality XML standards violation
621 if (xml_parser_set_option($xml_parser, XML_OPTION_CASE_FOLDING, 0) == false) {
622 // Error setting case folding - destroy the parser and return
623 xml_parser_free($xml_parser);
624
625 return false;
626 }
627
628 // Parse the XML text into a array structure
629 if (xml_parse_into_struct($xml_parser, $xmltext, $values, $tags) == 0) {
630 // Error Parsing XML - destroy the parser and return
631 xml_parser_free($xml_parser);
632
633 return false;
634 }
635
636 // Destroy the xml parser
637 xml_parser_free($xml_parser);
638
639 // Clear the output array
640 $xmp_array = array();
641
642 // The XMP data has now been parsed into an array ...
643
644 // Cycle through each of the array elements
645 $current_property = ''; // current property being processed
646 $container_index = -1; // -1 = no container open, otherwise index of container content
647 foreach ($values as $xml_elem) {
648 // Syntax and Class names
649 switch ($xml_elem['tag']) {
650 case 'x:xmpmeta':
651 // only defined attribute is x:xmptk written by Adobe XMP Toolkit; value is the version of the toolkit
652 break;
653
654 case 'rdf:RDF':
655 // required element immediately within x:xmpmeta; no data here
656 break;
657
658 case 'rdf:Description':
659 switch ($xml_elem['type']) {
660 case 'open':
661 case 'complete':
662 if (array_key_exists('attributes', $xml_elem)) {
663 // rdf:Description may contain wanted attributes
664 foreach (array_keys($xml_elem['attributes']) as $key) {
665 // Check whether we want this details from this attribute
666 if (in_array($key, self::$XMP_tag_captions)) {
667 // Attribute wanted
668 $xmp_array[$key] = $xml_elem['attributes'][$key];
669 }
670 }
671 }
672 case 'cdata':
673 case 'close':
674 break;
675 }
676
677 case 'rdf:ID':
678 case 'rdf:nodeID':
679 // Attributes are ignored
680 break;
681
682 case 'rdf:li':
683 // Property member
684 if ($xml_elem['type'] == 'complete') {
685 if (array_key_exists('attributes', $xml_elem)) {
686 // If Lang Alt (language alternatives) then ensure we take the default language
687 if (isset($xml_elem['attributes']['xml:lang']) && ($xml_elem['attributes']['xml:lang'] != 'x-default')) {
688 break;
689 }
690 }
691 if ($current_property != '') {
692 $xmp_array[$current_property][$container_index] = (isset($xml_elem['value']) ? $xml_elem['value'] : '');
693 $container_index += 1;
694 }
695 //else unidentified attribute!!
696 }
697 break;
698
699 case 'rdf:Seq':
700 case 'rdf:Bag':
701 case 'rdf:Alt':
702 // Container found
703 switch ($xml_elem['type']) {
704 case 'open':
705 $container_index = 0;
706 break;
707 case 'close':
708 $container_index = -1;
709 break;
710 case 'cdata':
711 break;
712 }
713 break;
714
715 default:
716 // Check whether we want the details from this attribute
717 if (in_array($xml_elem['tag'], self::$XMP_tag_captions)) {
718 switch ($xml_elem['type']) {
719 case 'open':
720 // open current element
721 $current_property = $xml_elem['tag'];
722 break;
723
724 case 'close':
725 // close current element
726 $current_property = '';
727 break;
728
729 case 'complete':
730 // store attribute value
731 $xmp_array[$xml_elem['tag']] = (isset($xml_elem['value']) ? $xml_elem['value'] : '');
732 break;
733
734 case 'cdata':
735 // ignore
736 break;
737 }
738 }
739 break;
740 }
741
742 }
743
744 return $xmp_array;
745 }
746
752 public function Image_XMP($sFilename)
753 {
754 $this->_sFilename = $sFilename;
755
756 if (is_file($this->_sFilename)) {
757 // Get XMP data
758 $xmp_data = $this->_get_XMP_text($sFilename);
759 if ($xmp_data) {
760 $this->_aXMP = $this->read_XMP_array_from_text($xmp_data);
761 $this->_bXMPParse = true;
762 }
763 }
764 }
765}
An exception for terminatinating execution or to throw for unit testing.
GetId3() by James Heinrich info@getid3.org //.
Definition: Xmp.php:62
isValid()
Returns the status of XMP parsing during instantiation.
Definition: Xmp.php:439
static $XMP_tag_captions
Definition: Xmp.php:89
_get_jpeg_header_data($filename)
Reads all the JPEG header segments from an JPEG image file into an array.
Definition: Xmp.php:461
Image_XMP($sFilename)
Constructor.
Definition: Xmp.php:752
_get_XMP_text($filename)
Retrieves XMP information from an APP1 JPEG segment and returns the raw XML text as a string.
Definition: Xmp.php:565
getAllTags()
Get a copy of all XMP tags extracted from the image.
Definition: Xmp.php:449
static $JPEG_Segment_Names
Definition: Xmp.php:363
read_XMP_array_from_text($xmltext)
Parses a string containing XMP data (XML), and returns an array which contains all the XMP (XML) info...
Definition: Xmp.php:596