|
11 | 11 | # All rights reserved. |
12 | 12 | # |
13 | 13 | # This program is free software; you can redistribute it and/or modify |
14 | | -# it under the same terms as Python itself. |
| 14 | +# it under the terms of the Artistic License or the GNU General Public |
| 15 | +# License (GPL). You may choose either license. |
15 | 16 | # |
16 | 17 | # VERSION = '1.9'; |
17 | 18 | """ |
|
26 | 27 | from struct import pack, unpack |
27 | 28 | import json |
28 | 29 |
|
29 | | -__version__ = '2.1.4' |
| 30 | +__version__ = '2.2.0' |
30 | 31 | __author__ = 'Gulácsi, Tamás' |
31 | 32 | __updated_by__ = 'Campbell, James' |
32 | 33 |
|
@@ -481,7 +482,7 @@ def collect_adobe_parts(data): |
481 | 482 | 101: 'country/primary location name', |
482 | 483 | 103: 'original transmission reference', |
483 | 484 | 105: 'headline', |
484 | | - 110: 'credit', |
| 485 | + 110: 'credit line', # Updated from 'credit' to 'credit line' per IPTC Core 1.1 |
485 | 486 | 115: 'source', |
486 | 487 | 116: 'copyright notice', |
487 | 488 | 118: 'contact', |
@@ -537,6 +538,12 @@ def _key_as_int(cls, key): |
537 | 538 | return key |
538 | 539 | elif isinstance(key, str) and key.lower() in c_datasets_r: |
539 | 540 | return c_datasets_r[key.lower()] |
| 541 | + # Backward compatibility: 'credit' is now 'credit line' per IPTC Core 1.1 |
| 542 | + elif isinstance(key, str) and key.lower() == 'credit': |
| 543 | + return 110 |
| 544 | + # Alias for compatibility with gThumb/exiftool |
| 545 | + elif isinstance(key, str) and key.lower() == 'destination': |
| 546 | + return 103 # Maps to 'original transmission reference' |
540 | 547 | elif key.startswith(cls.c_cust_pre) and key[len(cls.c_cust_pre):].isdigit(): |
541 | 548 | # example: nonstandard_69 -> 69 |
542 | 549 | return int(key[len(cls.c_cust_pre):]) |
@@ -606,6 +613,7 @@ def __init__(self, fobj, force=False, inp_charset=None, out_charset=None): |
606 | 613 | 'contact': [], |
607 | 614 | }) |
608 | 615 | self._fobj = fobj |
| 616 | + self._force = force |
609 | 617 | if duck_typed(fobj, 'read'): # DELETEME |
610 | 618 | self._filename = None |
611 | 619 | else: |
@@ -765,7 +773,11 @@ def jpegScan(self, fh): |
765 | 773 | err = "jpeg_skip_variable failed" |
766 | 774 | if err is not None: |
767 | 775 | self.error = err |
768 | | - logger.warning(err) |
| 776 | + # When force=True, log as INFO instead of WARNING since we expect no IPTC data |
| 777 | + if self._force: |
| 778 | + logger.info(err) |
| 779 | + else: |
| 780 | + logger.warning(err) |
769 | 781 | return None |
770 | 782 |
|
771 | 783 | # If were's here, we must have found the right marker. |
@@ -800,15 +812,32 @@ def blindScan(self, fh, MAX=819200): |
800 | 812 | # found character set's record! |
801 | 813 | try: |
802 | 814 | temp = read_exactly(fh, jpeg_get_variable_length(fh)) |
803 | | - try: |
804 | | - cs = unpack('!H', temp)[0] |
805 | | - except Exception: # TODO better exception |
806 | | - logger.warning('WARNING: problems with charset recognition (%r)', temp) |
807 | | - cs = None |
808 | | - if cs in c_charset: |
809 | | - self.inp_charset = c_charset[cs] |
810 | | - logger.info("BlindScan: found character set '%s' at offset %d", |
811 | | - self.inp_charset, offset) |
| 815 | + cs = None |
| 816 | + # Check for ISO 2022 escape sequence (starts with ESC 0x1b) |
| 817 | + if len(temp) >= 3 and ord3(temp[0]) == 0x1b: |
| 818 | + # Parse ISO 2022 escape sequences |
| 819 | + # ESC % G = UTF-8 |
| 820 | + if temp == b'\x1b%G': |
| 821 | + self.inp_charset = 'utf_8' |
| 822 | + # ESC % / @ = UTF-16 (not commonly used) |
| 823 | + elif temp == b'\x1b%/@': |
| 824 | + self.inp_charset = 'utf_16' |
| 825 | + else: |
| 826 | + logger.debug( |
| 827 | + "BlindScan: unknown ISO 2022 charset escape sequence %r", |
| 828 | + temp) |
| 829 | + else: |
| 830 | + # Try legacy numeric charset encoding |
| 831 | + try: |
| 832 | + cs = unpack('!H', temp)[0] |
| 833 | + if cs in c_charset: |
| 834 | + self.inp_charset = c_charset[cs] |
| 835 | + except Exception: |
| 836 | + logger.debug('BlindScan: could not parse charset from %r', temp) |
| 837 | + |
| 838 | + if self.inp_charset: |
| 839 | + logger.info("BlindScan: found character set '%s' at offset %d", |
| 840 | + self.inp_charset, offset) |
812 | 841 | except EOFException: |
813 | 842 | pass |
814 | 843 |
|
@@ -902,7 +931,18 @@ def packedIIMData(self): |
902 | 931 | LOGDBG.debug('out=%s', hex_dump(out)) |
903 | 932 | # Iterate over data sets |
904 | 933 | for dataset, value in self._data.items(): |
905 | | - if len(value) == 0: |
| 934 | + # Skip None, empty strings, empty lists, and NaN values |
| 935 | + if value is None: |
| 936 | + continue |
| 937 | + # Handle float/int that might be NaN |
| 938 | + if isinstance(value, (float, int)): |
| 939 | + import math |
| 940 | + if isinstance(value, float) and math.isnan(value): |
| 941 | + continue |
| 942 | + # Convert numeric values to strings |
| 943 | + value = str(value) |
| 944 | + # Check length for strings and lists |
| 945 | + if hasattr(value, '__len__') and len(value) == 0: |
906 | 946 | continue |
907 | 947 |
|
908 | 948 | if not (isinstance(dataset, int) and dataset in c_datasets): |
|
0 commit comments