Skip to content

Commit 3fdbbde

Browse files
author
Balys Anikevicius
committed
bugfix/gracefully-handle-large-thumbnails
Summary: User report: mapillary_tools crashes when trying to upload the attached set of images. Also, there is no mention on which image is causing the crash, meaning the user has to hunt through potentially thousands of images to find the misbehaving one. Traceback (most recent call last): File "main.py", line 8, in <module> File "mapillary_tools\commands\__main__.py", line 164, in main File "mapillary_tools\commands\process_and_upload.py", line 33, in run File "mapillary_tools\commands\upload.py", line 75, in run File "mapillary_tools\upload.py", line 117, in upload File "mapillary_tools\upload.py", line 108, in upload File "mapillary_tools\upload.py", line 631, in _continue_or_fail File "mapillary_tools\uploader.py", line 517, in upload_images File "mapillary_tools\uploader.py", line 543, in _upload_sequence File "concurrent\futures\_base.py", line 619, in result_iterator File "concurrent\futures\_base.py", line 317, in _result_or_cancel File "concurrent\futures\_base.py", line 449, in result File "concurrent\futures\_base.py", line 401, in __get_result File "concurrent\futures\thread.py", line 59, in run File "mapillary_tools\uploader.py", line 602, in upload File "mapillary_tools\uploader.py", line 643, in dump_image_bytes File "mapillary_tools\exif_write.py", line 201, in dump_image_bytes File "mapillary_tools\exif_write.py", line 186, in _safe_dump File "mapillary_tools\exif_write.py", line 153, in _safe_dump File "piexif\_dump.py", line 94, in dump ValueError: Given thumbnail is too large. max 64kB [PYI-9472:ERROR] Failed to execute script 'main' due to unhandled exception! "But I guess the actual problems are quite clear: (1) ValueError: Given thumbnail is too large. max 64kB, Mapillary needs to gracefully handle that situation and (2) on file errors, Mapillary needs to report the failing image name." Test Plan: Added unit tests, ran the existing ones as described in README. It looks like the tests get stuck on tests/unit/test_persistent_cache.py::test_multithread_shared_cache_comprehensive but I believe this is unrelated to the change. To reproduce test getting stuck run: ``` pytest -s -vv tests ``` and observe it getting stuck after 181 tests.
1 parent 6823dba commit 3fdbbde

File tree

4 files changed

+107
-0
lines changed

4 files changed

+107
-0
lines changed

mapillary_tools/exif_write.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,18 @@ def _safe_dump(self) -> bytes:
214214
else:
215215
del self._ef[ifd][tag]
216216
# retry later
217+
elif "thumbnail is too large" in message.lower():
218+
# Handle oversized thumbnails (max 64kB per EXIF spec)
219+
if thumbnail_removed:
220+
raise exc
221+
LOG.debug(
222+
"Thumbnail too large (max 64kB) -- removing thumbnail and 1st: %s",
223+
exc,
224+
)
225+
del self._ef["thumbnail"]
226+
del self._ef["1st"]
227+
thumbnail_removed = True
228+
# retry later
217229
else:
218230
raise exc
219231
except Exception as exc:

mapillary_tools/uploader.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -797,6 +797,10 @@ def dump_image_bytes(cls, metadata: types.ImageMetadata) -> bytes:
797797
raise ExifError(
798798
f"Failed to dump EXIF bytes: {ex}", metadata.filename
799799
) from ex
800+
except ValueError as ex:
801+
raise ExifError(
802+
f"Failed to dump EXIF bytes: {ex}", metadata.filename
803+
) from ex
800804

801805
# Thread-safe
802806
def _get_cached_file_handle(self, key: str) -> str | None:

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ include = ["mapillary_tools*"]
5858
[dependency-groups]
5959
dev = [
6060
"mypy",
61+
"Pillow",
6162
"pyinstaller",
6263
"pyre-check",
6364
"pytest",

tests/unit/test_exifedit.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
import datetime
2+
import io
23
import os
34
import shutil
45
import unittest
56
from pathlib import Path
67

8+
import piexif
79
import py.path
810

911
from mapillary_tools.exif_read import ExifRead
1012
from mapillary_tools.exif_write import ExifEdit
13+
from PIL import Image
1114

1215
this_file = Path(__file__)
1316
this_file_dir = this_file.parent
@@ -260,6 +263,93 @@ def test_add_repeatedly_time_original_corrupt_exif(self):
260263
def test_add_repeatedly_time_original_corrupt_exif_2(self):
261264
add_repeatedly_time_original_general(self, CORRUPT_EXIF_FILE_2)
262265

266+
def test_large_thumbnail_handling(self):
267+
"""Test that images with thumbnails larger than 64kB are handled gracefully."""
268+
# Create a test image with a large thumbnail (>64kB)
269+
test_image_path = data_dir.joinpath("tmp", "large_thumbnail.jpg")
270+
271+
# Create a simple test image
272+
img = Image.new("RGB", (100, 100), color="red")
273+
img.save(test_image_path, "JPEG")
274+
275+
# Create a large thumbnail (>64kB) by creating a high-quality large thumbnail
276+
# Use a larger size and add noise to make it incompressible
277+
large_thumbnail = Image.new("RGB", (2048, 2048))
278+
# Fill with random-like data to prevent compression
279+
pixels = large_thumbnail.load()
280+
for i in range(2048):
281+
for j in range(2048):
282+
pixels[i, j] = (
283+
(i * 7 + j * 13) % 256,
284+
(i * 11 + j * 17) % 256,
285+
(i * 19 + j * 23) % 256,
286+
)
287+
288+
thumbnail_bytes = io.BytesIO()
289+
large_thumbnail.save(thumbnail_bytes, "JPEG", quality=100)
290+
thumbnail_data = thumbnail_bytes.getvalue()
291+
292+
# Verify thumbnail is larger than 64kB
293+
self.assertGreater(
294+
len(thumbnail_data),
295+
64 * 1024,
296+
f"Test thumbnail should be larger than 64kB but got {len(thumbnail_data)} bytes",
297+
)
298+
299+
# Load the image and add GPS data first
300+
exif_edit = ExifEdit(test_image_path)
301+
test_latitude = 50.5475894785
302+
test_longitude = 15.595866685
303+
exif_edit.add_lat_lon(test_latitude, test_longitude)
304+
305+
# Manually insert the large thumbnail into the internal EXIF structure
306+
# This simulates what would happen if an image came in with a large thumbnail
307+
exif_edit._ef["thumbnail"] = thumbnail_data
308+
exif_edit._ef["1st"] = {
309+
piexif.ImageIFD.Compression: 6,
310+
piexif.ImageIFD.XResolution: (72, 1),
311+
piexif.ImageIFD.YResolution: (72, 1),
312+
piexif.ImageIFD.ResolutionUnit: 2,
313+
piexif.ImageIFD.JPEGInterchangeFormat: 0,
314+
piexif.ImageIFD.JPEGInterchangeFormatLength: len(thumbnail_data),
315+
}
316+
317+
# Without the fix, this would raise: ValueError: Given thumbnail is too large. max 64kB
318+
# With the fix, it should remove the thumbnail and succeed
319+
image_bytes = exif_edit.dump_image_bytes()
320+
321+
# Verify the output is valid
322+
self.assertIsNotNone(image_bytes)
323+
self.assertGreater(len(image_bytes), 0)
324+
325+
# Verify the resulting image is a valid JPEG
326+
result_image = Image.open(io.BytesIO(image_bytes))
327+
self.assertEqual(result_image.format, "JPEG")
328+
self.assertEqual(result_image.size, (100, 100))
329+
330+
# Verify we can read the GPS data from the result
331+
output_exif = piexif.load(image_bytes)
332+
self.assertIn("GPS", output_exif)
333+
self.assertIn(piexif.GPSIFD.GPSLatitude, output_exif["GPS"])
334+
self.assertIn(piexif.GPSIFD.GPSLongitude, output_exif["GPS"])
335+
336+
# CRITICAL: Verify the large thumbnail was actually removed
337+
# The fix should have deleted both "thumbnail" and "1st" to handle the error
338+
# piexif.load() may include "thumbnail": None after removal
339+
thumbnail_value = output_exif.get("thumbnail")
340+
self.assertTrue(
341+
thumbnail_value is None or thumbnail_value == b"",
342+
f"Large thumbnail should have been removed but got: {thumbnail_value[:100] if thumbnail_value else None}",
343+
)
344+
# If 1st exists, it should be empty or not contain thumbnail reference data
345+
if "1st" in output_exif:
346+
# If 1st exists, it should be empty or minimal
347+
self.assertNotIn(
348+
piexif.ImageIFD.JPEGInterchangeFormat,
349+
output_exif.get("1st", {}),
350+
"Thumbnail metadata should have been removed",
351+
)
352+
263353

264354
def test_exif_write(tmpdir: py.path.local):
265355
image_dir = tmpdir.mkdir("images")

0 commit comments

Comments
 (0)