1111# All rights reserved.
1212#
1313# This program is free software; you can redistribute it and/or modify
14- # it under the same terms as Python itself.
14+ # it under the terms of the Artistic License or the GNU General Public
15+ # License (GPL). You may choose either license.
1516#
1617# VERSION = '1.9';
1718"""
2425import sys
2526import tempfile
2627from struct import pack , unpack
28+ import json
2729
28- __version__ = '2.1.4 '
30+ __version__ = '2.2.0 '
2931__author__ = 'Gulácsi, Tamás'
3032__updated_by__ = 'Campbell, James'
3133
@@ -177,7 +179,7 @@ def jpeg_get_variable_length(fh):
177179
178180 # Length includes itself, so must be at least 2
179181 if length < 2 :
180- logger .warn ("jpeg_get_variable_length: erroneous JPEG marker length" )
182+ logger .warning ("jpeg_get_variable_length: erroneous JPEG marker length" )
181183 return 0
182184 return length - 2
183185
@@ -192,7 +194,7 @@ def jpeg_next_marker(fh):
192194 try :
193195 byte = read_exactly (fh , 1 )
194196 while ord3 (byte ) != 0xff :
195- # logger.warn ("jpeg_next_marker: bogus stuff in Jpeg file at: ')
197+ # logger.warning ("jpeg_next_marker: bogus stuff in Jpeg file at: ')
196198 byte = read_exactly (fh , 1 )
197199
198200 # Now skip any extra 0xffs, which are valid padding.
@@ -360,15 +362,15 @@ def jpeg_debug_scan(filename): # pragma: no cover
360362 break
361363
362364 if ord3 (marker ) == 0 :
363- logger .warn ("Marker scan failed" )
365+ logger .warning ("Marker scan failed" )
364366 break
365367
366368 elif ord3 (marker ) == 0xd9 :
367369 logger .debug ("Marker scan hit end of image marker" )
368370 break
369371
370372 if not jpeg_skip_variable (fh ):
371- logger .warn ("jpeg_skip_variable failed" )
373+ logger .warning ("jpeg_skip_variable failed" )
372374 return None
373375
374376
@@ -480,7 +482,7 @@ def collect_adobe_parts(data):
480482 101 : 'country/primary location name' ,
481483 103 : 'original transmission reference' ,
482484 105 : 'headline' ,
483- 110 : 'credit' ,
485+ 110 : 'credit line' , # Updated from 'credit' to 'credit line' per IPTC Core 1.1
484486 115 : 'source' ,
485487 116 : 'copyright notice' ,
486488 118 : 'contact' ,
@@ -536,6 +538,12 @@ def _key_as_int(cls, key):
536538 return key
537539 elif isinstance (key , str ) and key .lower () in c_datasets_r :
538540 return c_datasets_r [key .lower ()]
541+ # Backward compatibility: 'credit' is now 'credit line' per IPTC Core 1.1
542+ elif isinstance (key , str ) and key .lower () == 'credit' :
543+ return 110
544+ # Alias for compatibility with gThumb/exiftool
545+ elif isinstance (key , str ) and key .lower () == 'destination' :
546+ return 103 # Maps to 'original transmission reference'
539547 elif key .startswith (cls .c_cust_pre ) and key [len (cls .c_cust_pre ):].isdigit ():
540548 # example: nonstandard_69 -> 69
541549 return int (key [len (cls .c_cust_pre ):])
@@ -553,6 +561,13 @@ def _key_as_str(cls, key):
553561 else :
554562 raise KeyError ("Key %s is not in %s!" % (key , list (c_datasets .keys ())))
555563
564+ def __contains__ (self , name ):
565+ try :
566+ key = self ._key_as_int (name )
567+ except KeyError :
568+ return False
569+ return super ().__contains__ (key )
570+
556571 def __getitem__ (self , name ):
557572 return self .get (self ._key_as_int (name ), None )
558573
@@ -598,6 +613,7 @@ def __init__(self, fobj, force=False, inp_charset=None, out_charset=None):
598613 'contact' : [],
599614 })
600615 self ._fobj = fobj
616+ self ._force = force
601617 if duck_typed (fobj , 'read' ): # DELETEME
602618 self ._filename = None
603619 else :
@@ -613,7 +629,7 @@ def __init__(self, fobj, force=False, inp_charset=None, out_charset=None):
613629 if datafound :
614630 self .collectIIMInfo (fh )
615631 else :
616- logger .warn ('No IPTC data found in %s' , fobj )
632+ logger .warning ('No IPTC data found in %s' , fobj )
617633
618634 def _filepos (self , fh ):
619635 """For debugging, return what position in the file we are."""
@@ -630,7 +646,7 @@ def save_as(self, newfile, options=None):
630646 """Saves Jpeg with IPTC data to a given file name."""
631647 with smart_open (self ._fobj , 'rb' ) as fh :
632648 if not file_is_jpeg (fh ):
633- logger .error ('Source file %s is not a Jpeg.' % self ._fob )
649+ logger .error ('Source file %s is not a Jpeg.' % self ._fobj )
634650 return None
635651
636652 jpeg_parts = jpeg_collect_file_parts (fh )
@@ -686,8 +702,10 @@ def save_as(self, newfile, options=None):
686702 os .unlink (tmpfn )
687703 else :
688704 tmpfh .close ()
689- if os .path .exists (newfile ):
690- shutil .move (newfile , newfile + '~' )
705+ if os .path .exists (newfile ) and options is not None and 'overwrite' in options :
706+ os .unlink (newfile )
707+ elif os .path .exists (newfile ):
708+ shutil .move (newfile , "{file}~" .format (file = newfile ))
691709 shutil .move (tmpfn , newfile )
692710 return True
693711
@@ -699,6 +717,9 @@ def __del__(self):
699717 def __len__ (self ):
700718 return len (self ._data )
701719
720+ def __contains__ (self , key ):
721+ return key in self ._data
722+
702723 def __getitem__ (self , key ):
703724 return self ._data [key ]
704725
@@ -716,7 +737,7 @@ def scanToFirstIMMTag(self, fh):
716737 logger .info ("File is JPEG, proceeding with JpegScan" )
717738 return self .jpegScan (fh )
718739 else :
719- logger .warn ("File not a JPEG, trying blindScan" )
740+ logger .warning ("File not a JPEG, trying blindScan" )
720741 return self .blindScan (fh )
721742
722743 c_marker_err = {0 : "Marker scan failed" ,
@@ -752,22 +773,26 @@ def jpegScan(self, fh):
752773 err = "jpeg_skip_variable failed"
753774 if err is not None :
754775 self .error = err
755- logger .warn (err )
776+ # When force=True, log as INFO instead of WARNING since we expect no IPTC data
777+ if self ._force :
778+ logger .info (err )
779+ else :
780+ logger .warning (err )
756781 return None
757782
758783 # If were's here, we must have found the right marker.
759784 # Now blindScan through the data.
760785 return self .blindScan (fh , MAX = jpeg_get_variable_length (fh ))
761786
762- def blindScan (self , fh , MAX = 8192 ):
787+ def blindScan (self , fh , MAX = 819200 ):
763788 """Scans blindly to first IIM Record 2 tag in the file. This
764789 method may or may not work on any arbitrary file type, but it
765790 doesn't hurt to check. We expect to see this tag within the first
766791 8k of data. (This limit may need to be changed or eliminated
767792 depending on how other programs choose to store IIM.)"""
768793
769794 offset = 0
770- # keep within first 8192 bytes
795+ # keep within first 819200 bytes
771796 # NOTE: this may need to change
772797 logger .debug ('blindScan: starting scan, max length %d' , MAX )
773798
@@ -776,7 +801,7 @@ def blindScan(self, fh, MAX=8192):
776801 try :
777802 temp = read_exactly (fh , 1 )
778803 except EOFException :
779- logger .warn ("BlindScan: hit EOF while scanning" )
804+ logger .warning ("BlindScan: hit EOF while scanning" )
780805 return None
781806 # look for tag identifier 0x1c
782807 if ord3 (temp ) == 0x1c :
@@ -787,15 +812,32 @@ def blindScan(self, fh, MAX=8192):
787812 # found character set's record!
788813 try :
789814 temp = read_exactly (fh , jpeg_get_variable_length (fh ))
790- try :
791- cs = unpack ('!H' , temp )[0 ]
792- except Exception : # TODO better exception
793- #logger.warn('WARNING: problems with charset recognition (%r)', temp)
794- cs = None
795- if cs in c_charset :
796- self .inp_charset = c_charset [cs ]
797- logger .info ("BlindScan: found character set '%s' at offset %d" ,
798- self .inp_charset , offset )
815+ cs = None
816+ # Check for ISO 2022 escape sequence (starts with ESC 0x1b)
817+ if len (temp ) >= 3 and ord3 (temp [0 ]) == 0x1b :
818+ # Parse ISO 2022 escape sequences
819+ # ESC % G = UTF-8
820+ if temp == b'\x1b %G' :
821+ self .inp_charset = 'utf_8'
822+ # ESC % / @ = UTF-16 (not commonly used)
823+ elif temp == b'\x1b %/@' :
824+ self .inp_charset = 'utf_16'
825+ else :
826+ logger .debug (
827+ "BlindScan: unknown ISO 2022 charset escape sequence %r" ,
828+ temp )
829+ else :
830+ # Try legacy numeric charset encoding
831+ try :
832+ cs = unpack ('!H' , temp )[0 ]
833+ if cs in c_charset :
834+ self .inp_charset = c_charset [cs ]
835+ except Exception :
836+ logger .debug ('BlindScan: could not parse charset from %r' , temp )
837+
838+ if self .inp_charset :
839+ logger .info ("BlindScan: found character set '%s' at offset %d" ,
840+ self .inp_charset , offset )
799841 except EOFException :
800842 pass
801843
@@ -845,7 +887,7 @@ def collectIIMInfo(self, fh):
845887 try :
846888 value = str (value , encoding = self .inp_charset , errors = 'strict' )
847889 except Exception : # TODO better exception
848- logger .warn ('Data "%r" is not in encoding %s!' , value , self .inp_charset )
890+ logger .warning ('Data "%r" is not in encoding %s!' , value , self .inp_charset )
849891 value = str (value , encoding = self .inp_charset , errors = 'replace' )
850892
851893 # try to extract first into _listdata (keywords, categories)
@@ -889,11 +931,22 @@ def packedIIMData(self):
889931 LOGDBG .debug ('out=%s' , hex_dump (out ))
890932 # Iterate over data sets
891933 for dataset , value in self ._data .items ():
892- if len (value ) == 0 :
934+ # Skip None, empty strings, empty lists, and NaN values
935+ if value is None :
936+ continue
937+ # Handle float/int that might be NaN
938+ if isinstance (value , (float , int )):
939+ import math
940+ if isinstance (value , float ) and math .isnan (value ):
941+ continue
942+ # Convert numeric values to strings
943+ value = str (value )
944+ # Check length for strings and lists
945+ if hasattr (value , '__len__' ) and len (value ) == 0 :
893946 continue
894947
895948 if not (isinstance (dataset , int ) and dataset in c_datasets ):
896- logger .warn ("packedIIMData: illegal dataname '%s' (%d)" , dataset , dataset )
949+ logger .warning ("packedIIMData: illegal dataname '%s' (%d)" , dataset , dataset )
897950 continue
898951
899952 logger .debug ('packedIIMData %02X: %r -> %r' , dataset , value , self ._enc (value ))
@@ -944,7 +997,16 @@ def photoshopIIMBlock(self, otherparts, data):
944997
945998
946999if __name__ == '__main__' : # pragma: no cover
947- logging .basicConfig (level = logging .DEBUG )
1000+ logging .basicConfig (level = logging .ERROR )
9481001 if len (sys .argv ) > 1 :
9491002 info = IPTCInfo (sys .argv [1 ])
950- print (info )
1003+ if info .__dict__ != '' :
1004+ for k , v in info .__dict__ .items ():
1005+ if k == '_data' :
1006+ print (k )
1007+ for key , value in v .items ():
1008+ if type (value ) == list :
1009+ print (key , [x .decode () for x in value ])
1010+ [print (x .decode ()) for x in value ]
1011+ print (key , value )
1012+ print (k , v )
0 commit comments