Skip to content

Commit c00aced

Browse files
committed
fix picture metadata parsing errors (#3123)
* fix picture metadata parsing errors use exiftool as a fallback when exiv2 fails SDCP-1061
1 parent dd0a0cb commit c00aced

File tree

3 files changed

+158
-9
lines changed

3 files changed

+158
-9
lines changed

superdesk/media/image.py

Lines changed: 79 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -165,10 +165,9 @@ def get_meta_iptc(file_stream: BinaryIO):
165165
except KeyError:
166166
continue
167167
if isinstance(value, list):
168-
value = [decode(v) for v in value]
168+
metadata[tag] = [decode(v) for v in value]
169169
elif isinstance(value, bytes):
170-
value = decode(value)
171-
metadata[tag] = value
170+
metadata[tag] = decode(value)
172171
return metadata
173172

174173

@@ -177,8 +176,18 @@ def read_metadata(input: bytes) -> MediaMetadata:
177176
178177
@param file_stream: stream
179178
"""
180-
with pyexiv2.ImageData(input) as img:
181-
xmp = img.read_xmp()
179+
try:
180+
with pyexiv2.ImageData(input) as img:
181+
xmp = img.read_xmp()
182+
except (RuntimeError, ValueError) as e:
183+
logger.warning(f"Failed to read image metadata with pyexiv2: {e}, trying exiftool fallback", exc_info=True)
184+
# Try to use exiftool as fallback when pyexiv2 fails
185+
# Import here to avoid loading exiftool dependencies unless needed (lazy loading)
186+
from superdesk.media.video import read_metadata as read_metadata_video
187+
188+
# Map video module metadata to image module field names
189+
return _map_video_to_image(read_metadata_video(input))
190+
182191
return {
183192
"Description": get_xmp_lang_string(xmp.get("Xmp.dc.description")),
184193
"DescriptionWriter": xmp.get("Xmp.photoshop.CaptionWriter", ""),
@@ -197,6 +206,57 @@ def read_metadata(input: bytes) -> MediaMetadata:
197206
}
198207

199208

209+
# Field mappings between image module and video module field names
210+
# Keys are video module names, values are image module names
211+
_FIELD_MAPPING: Dict[str, str] = {
212+
"CaptionWriter": "DescriptionWriter",
213+
"TransmissionReference": "JobId",
214+
"AuthorsPosition": "CreatorsJobtitle",
215+
"Rights": "CopyrightNotice",
216+
"State": "ProvinceState",
217+
"Credit": "CreditLine",
218+
}
219+
220+
221+
def _map_video_to_image(metadata: MediaMetadata) -> MediaMetadata:
222+
"""Map video module metadata field names to image module field names.
223+
224+
Converts video module field names to image module field names for consistency.
225+
226+
@param metadata: Metadata dict from exiftool fallback
227+
@return: Metadata with image module field names
228+
"""
229+
result: MediaMetadata = {}
230+
231+
for key, value in metadata.items():
232+
# Use mapped name if available, otherwise keep original name
233+
mapped_key = _FIELD_MAPPING.get(key, key)
234+
result[mapped_key] = value # type: ignore[literal-required]
235+
236+
return result
237+
238+
239+
def _map_image_to_video(metadata: MediaMetadata) -> MediaMetadata:
240+
"""Map image module metadata field names to video module field names.
241+
242+
Converts image module field names back to video module field names for
243+
compatibility with the video module's exiftool handler.
244+
245+
@param metadata: Metadata dict with image module field names
246+
@return: Metadata with video module field names
247+
"""
248+
# Create reverse mapping: image module field names to video module field names
249+
reverse_mapping = {v: k for k, v in _FIELD_MAPPING.items()}
250+
251+
result: MediaMetadata = {}
252+
for key, value in metadata.items():
253+
# Use reverse mapped name if available, otherwise keep original name
254+
mapped_key = reverse_mapping.get(key, key)
255+
result[mapped_key] = value # type: ignore[literal-required]
256+
257+
return result
258+
259+
200260
def get_xmp_lang_string(value, lang="x-default"):
201261
lang_key = 'lang="{}"'.format(lang)
202262
if value and isinstance(value, dict) and value.get(lang_key):
@@ -234,7 +294,17 @@ def write_metadata(input: bytes, metadata: MediaMetadata) -> bytes:
234294
xmp = {k: v for k, v in xmp.items() if v}
235295
iptc = convert_xmp_to_iptc(xmp)
236296

237-
with pyexiv2.ImageData(input) as img:
238-
img.modify_xmp(xmp)
239-
img.modify_iptc(iptc)
240-
return img.get_bytes()
297+
try:
298+
with pyexiv2.ImageData(input) as img:
299+
img.modify_xmp(xmp)
300+
img.modify_iptc(iptc)
301+
return img.get_bytes()
302+
except (RuntimeError, ValueError) as e:
303+
logger.warning(f"Failed to write image metadata with pyexiv2: {e}, trying exiftool fallback", exc_info=True)
304+
# Try to use exiftool as fallback when pyexiv2 fails
305+
# Import here to avoid loading exiftool dependencies unless needed (lazy loading)
306+
from superdesk.media.video import write_metadata as write_metadata_video
307+
308+
# Map image module metadata to video module field names
309+
mapped_metadata = _map_image_to_video(metadata)
310+
return write_metadata_video(input, mapped_metadata)

tests/media/fixtures/cp-sports.jpg

23.6 KB
Loading

tests/media/image_metadata_test.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,13 @@ def image_binary() -> bytes:
2121
return f.read()
2222

2323

24+
@fixture
25+
def cp_sports_image_binary() -> bytes:
26+
image_path = fixture_path("cp-sports.jpg", "media")
27+
with open(image_path, mode="rb") as f:
28+
return f.read()
29+
30+
2431
def test_picture_metadata_read_write(image_binary) -> None:
2532
metadata = read_metadata(image_binary)
2633
assert metadata == MediaMetadata(
@@ -91,3 +98,75 @@ def test_get_metadata_from_item() -> None:
9198
Title="bar",
9299
JobId="baz",
93100
)
101+
102+
103+
def test_cp_sports_metadata_write(cp_sports_image_binary) -> None:
104+
"""Test updating metadata in cp-sports.jpg."""
105+
metadata = read_metadata(cp_sports_image_binary)
106+
assert metadata["Country"] == "CHN"
107+
108+
# Update the metadata
109+
updated = MediaMetadata(
110+
Country="Canada",
111+
City="Toronto",
112+
)
113+
114+
next_image = write_metadata(cp_sports_image_binary, updated)
115+
116+
# Read back and verify the changes
117+
metadata = read_metadata(next_image)
118+
assert metadata["Country"] == "Canada"
119+
assert metadata["City"] == "Toronto"
120+
121+
122+
def test_cp_sports_metadata_normalization(cp_sports_image_binary) -> None:
123+
"""Test that exiftool fallback metadata is normalized to image module field names."""
124+
# Read metadata using exiftool fallback (since cp-sports.jpg triggers the fallback)
125+
metadata = read_metadata(cp_sports_image_binary)
126+
127+
# Verify that video module field names are NOT present (should be normalized to image module names)
128+
assert "CaptionWriter" not in metadata # Should be normalized to DescriptionWriter
129+
assert "TransmissionReference" not in metadata # Should be normalized to JobId
130+
assert "AuthorsPosition" not in metadata # Should be normalized to CreatorsJobtitle
131+
assert "Rights" not in metadata # Should be normalized to CopyrightNotice
132+
assert "State" not in metadata # Should be normalized to ProvinceState
133+
assert "Credit" not in metadata # Should be normalized to CreditLine
134+
135+
# Verify actual metadata values with image module field names
136+
assert metadata.get("DescriptionWriter") == "AW"
137+
assert metadata.get("JobId") == "RJB101_2022030616"
138+
assert metadata.get("CreatorsJobtitle") == "STF"
139+
assert metadata.get("CopyrightNotice") == "Copyright 2022 The Associated Press. All rights reserved"
140+
assert metadata.get("CreditLine") == "THE CANADIAN PRESS"
141+
142+
143+
def test_metadata_field_mapping_consistency(cp_sports_image_binary) -> None:
144+
"""Test that metadata field mapping denormalization works correctly in write_metadata fallback."""
145+
# Create metadata with image module field names
146+
metadata_to_write = MediaMetadata(
147+
Country="Canada",
148+
City="Toronto",
149+
DescriptionWriter="updated_writer",
150+
JobId="updated_job_123",
151+
CreatorsJobtitle="updated_photographer",
152+
CopyrightNotice="Updated Copyright",
153+
CreditLine="Updated Credit",
154+
ProvinceState="ON",
155+
)
156+
157+
# Use cp-sports.jpg which triggers the exiftool fallback for both read and write
158+
# This tests that denormalization works correctly in the fallback path
159+
updated_image = write_metadata(cp_sports_image_binary, metadata_to_write)
160+
161+
# Read it back
162+
read_back = read_metadata(updated_image)
163+
164+
# Verify the mapped fields were written and read back correctly through the fallback
165+
assert read_back.get("Country") == "Canada"
166+
assert read_back.get("City") == "Toronto"
167+
assert read_back.get("DescriptionWriter") == "updated_writer"
168+
assert read_back.get("JobId") == "updated_job_123"
169+
assert read_back.get("CreatorsJobtitle") == "updated_photographer"
170+
assert read_back.get("CopyrightNotice") == "Updated Copyright"
171+
assert read_back.get("CreditLine") == "Updated Credit"
172+
assert read_back.get("ProvinceState") == "ON"

0 commit comments

Comments
 (0)