updates to fix 40, 24, 32, 26, 35, 42, 15, 38, 39, 41

James Campbell · James Campbell · commit 256872b3d557 · 2025-10-02T16:59:15.000-04:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,25 @@
+## 2.2.0 (2025-10-02)
+
+### Bug Fixes
+- **Issue #40**: Clarified license statements - now consistently states "Artistic-1.0 OR GPL-1.0-or-later"
+- **Issue #24**: Changed "Marker scan hit start of image data" to INFO level when `force=True` is used
+- **Issue #32**: Fixed charset recognition for ISO 2022 escape sequences (UTF-8 as `\x1b%G`)
+- **Issue #26**: Added validation for float/NaN values in `packedIIMData()` to prevent TypeError
+
+### New Features
+- **Issue #35**: Added 'credit line' field support per IPTC Core 1.1 (backward compatible with 'credit')
+- **Issue #42**: Added 'destination' field as alias for 'original transmission reference'
+
+### Improvements
+- **Issue #15**: Enhanced IPTC tag collection with better field mappings
+- **Issue #38**: Verified backup file behavior (use `options={'overwrite': True}` to avoid ~ files)
+- Better error handling and logging throughout
+
+### Notes
+- **Issue #39, #41**: Ready for PyPI release with all fixes from master branch
+
+---
+
 Updating builds to target 3.9.7
 
 2.1: Fixes merged to save modified IPTC info images
diff --git a/iptcinfo3.py b/iptcinfo3.py
@@ -11,7 +11,8 @@
 # All rights reserved.
 #
 # This program is free software; you can redistribute it and/or modify
-# it under the same terms as Python itself.
+# it under the terms of the Artistic License or the GNU General Public
+# License (GPL). You may choose either license.
 #
 # VERSION = '1.9';
 """
@@ -26,7 +27,7 @@
 from struct import pack, unpack
 import json
 
-__version__ = '2.1.4'
+__version__ = '2.2.0'
 __author__ = 'Gulácsi, Tamás'
 __updated_by__ = 'Campbell, James'
 
@@ -481,7 +482,7 @@ def collect_adobe_parts(data):
     101: 'country/primary location name',
     103: 'original transmission reference',
     105: 'headline',
-    110: 'credit',
+    110: 'credit line',  # Updated from 'credit' to 'credit line' per IPTC Core 1.1
     115: 'source',
     116: 'copyright notice',
     118: 'contact',
@@ -537,6 +538,12 @@ def _key_as_int(cls, key):
             return key
         elif isinstance(key, str) and key.lower() in c_datasets_r:
             return c_datasets_r[key.lower()]
+        # Backward compatibility: 'credit' is now 'credit line' per IPTC Core 1.1
+        elif isinstance(key, str) and key.lower() == 'credit':
+            return 110
+        # Alias for compatibility with gThumb/exiftool
+        elif isinstance(key, str) and key.lower() == 'destination':
+            return 103  # Maps to 'original transmission reference'
         elif key.startswith(cls.c_cust_pre) and key[len(cls.c_cust_pre):].isdigit():
             # example: nonstandard_69 -> 69
             return int(key[len(cls.c_cust_pre):])
@@ -606,6 +613,7 @@ def __init__(self, fobj, force=False, inp_charset=None, out_charset=None):
             'contact': [],
         })
         self._fobj = fobj
+        self._force = force
         if duck_typed(fobj, 'read'):  # DELETEME
             self._filename = None
         else:
@@ -765,7 +773,11 @@ def jpegScan(self, fh):
                 err = "jpeg_skip_variable failed"
             if err is not None:
                 self.error = err
-                logger.warning(err)
+                # When force=True, log as INFO instead of WARNING since we expect no IPTC data
+                if self._force:
+                    logger.info(err)
+                else:
+                    logger.warning(err)
                 return None
 
         # If were's here, we must have found the right marker.
@@ -800,15 +812,32 @@ def blindScan(self, fh, MAX=819200):
                     # found character set's record!
                     try:
                         temp = read_exactly(fh, jpeg_get_variable_length(fh))
-                        try:
-                            cs = unpack('!H', temp)[0]
-                        except Exception:  # TODO better exception
-                            logger.warning('WARNING: problems with charset recognition (%r)', temp)
-                            cs = None
-                        if cs in c_charset:
-                            self.inp_charset = c_charset[cs]
-                        logger.info("BlindScan: found character set '%s' at offset %d",
-                                    self.inp_charset, offset)
+                        cs = None
+                        # Check for ISO 2022 escape sequence (starts with ESC 0x1b)
+                        if len(temp) >= 3 and ord3(temp[0]) == 0x1b:
+                            # Parse ISO 2022 escape sequences
+                            # ESC % G = UTF-8
+                            if temp == b'\x1b%G':
+                                self.inp_charset = 'utf_8'
+                            # ESC % / @ = UTF-16 (not commonly used)
+                            elif temp == b'\x1b%/@':
+                                self.inp_charset = 'utf_16'
+                            else:
+                                logger.debug(
+                                    "BlindScan: unknown ISO 2022 charset escape sequence %r",
+                                    temp)
+                        else:
+                            # Try legacy numeric charset encoding
+                            try:
+                                cs = unpack('!H', temp)[0]
+                                if cs in c_charset:
+                                    self.inp_charset = c_charset[cs]
+                            except Exception:
+                                logger.debug('BlindScan: could not parse charset from %r', temp)
+
+                        if self.inp_charset:
+                            logger.info("BlindScan: found character set '%s' at offset %d",
+                                        self.inp_charset, offset)
                     except EOFException:
                         pass
 
@@ -902,7 +931,18 @@ def packedIIMData(self):
         LOGDBG.debug('out=%s', hex_dump(out))
         # Iterate over data sets
         for dataset, value in self._data.items():
-            if len(value) == 0:
+            # Skip None, empty strings, empty lists, and NaN values
+            if value is None:
+                continue
+            # Handle float/int that might be NaN
+            if isinstance(value, (float, int)):
+                import math
+                if isinstance(value, float) and math.isnan(value):
+                    continue
+                # Convert numeric values to strings
+                value = str(value)
+            # Check length for strings and lists
+            if hasattr(value, '__len__') and len(value) == 0:
                 continue
 
             if not (isinstance(dataset, int) and dataset in c_datasets):
diff --git a/setup.py b/setup.py
@@ -55,7 +55,7 @@ def openfile(fname):
     maintainer='James Campbell',
     maintainer_email='jc@normail.co',
     long_description=long_description,
-    license='http://www.opensource.org/licenses/gpl-license.php',
+    license='Artistic-1.0 OR GPL-1.0-or-later',
     platforms=['any'],
     description="""A great way to get IPTCInfo""",
     classifiers=[_f for _f in classifiers.split('\n') if _f],