From 0ca1f829f013af62cc991d7ff018b18ae56b3849 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Mon, 17 Mar 2025 16:51:46 -0700 Subject: [PATCH 1/8] disable metadata validation and upload status in process stage --- mapillary_tools/process_geotag_properties.py | 118 +------------------ 1 file changed, 1 insertion(+), 117 deletions(-) diff --git a/mapillary_tools/process_geotag_properties.py b/mapillary_tools/process_geotag_properties.py index 054a8cbd3..515d04bd1 100644 --- a/mapillary_tools/process_geotag_properties.py +++ b/mapillary_tools/process_geotag_properties.py @@ -1,15 +1,13 @@ import collections import datetime -import itertools import json import logging import typing as T -from multiprocessing import Pool from pathlib import Path from tqdm import tqdm -from . import constants, exceptions, exif_write, history, types, utils +from . import constants, exceptions, exif_write, types, utils from .geotag import ( geotag_from_generic, geotag_images_from_exif, @@ -446,113 +444,6 @@ def _show_stats_per_filetype( ) -_IT = T.TypeVar("_IT") - - -def split_if( - it: T.Iterable[_IT], sep: T.Callable[[_IT], bool] -) -> T.Tuple[T.List[_IT], T.List[_IT]]: - yes, no = [], [] - for e in it: - if sep(e): - yes.append(e) - else: - no.append(e) - return yes, no - - -def _check_upload_status( - metadatas: T.Sequence[types.MetadataOrError], -) -> T.List[types.MetadataOrError]: - groups = types.group_and_sort_images( - [ - metadata - for metadata in metadatas - if isinstance(metadata, types.ImageMetadata) - ] - ) - uploaded_sequence_uuids = set() - for sequence_uuid, group in groups.items(): - for m in group: - m.update_md5sum() - sequence_md5sum = types.sequence_md5sum(group) - if history.is_uploaded(sequence_md5sum): - uploaded_sequence_uuids.add(sequence_uuid) - - output: T.List[types.MetadataOrError] = [] - for metadata in metadatas: - if isinstance(metadata, types.ImageMetadata): - if metadata.MAPSequenceUUID in uploaded_sequence_uuids: - output.append( - types.describe_error_metadata( - exceptions.MapillaryUploadedAlreadyError( - "The image was already uploaded", - types.as_desc(metadata), - ), - filename=metadata.filename, - filetype=types.FileType.IMAGE, - ) - ) - else: - output.append(metadata) - elif isinstance(metadata, types.VideoMetadata): - metadata.update_md5sum() - assert isinstance(metadata.md5sum, str) - if history.is_uploaded(metadata.md5sum): - output.append( - types.describe_error_metadata( - exceptions.MapillaryUploadedAlreadyError( - "The video was already uploaded", - types.as_desc(metadata), - ), - filename=metadata.filename, - filetype=metadata.filetype, - ) - ) - else: - output.append(metadata) - else: - output.append(metadata) - assert len(output) == len(metadatas), "length mismatch" - return output - - -def _validate_metadatas( - metadatas: T.Sequence[types.MetadataOrError], num_processes: T.Optional[int] -) -> T.List[types.MetadataOrError]: - # validating metadatas is slow, hence multiprocessing - if num_processes is None: - pool_num_processes = None - disable_multiprocessing = False - else: - pool_num_processes = max(num_processes, 1) - disable_multiprocessing = num_processes <= 0 - with Pool(processes=pool_num_processes) as pool: - validated_metadatas_iter: T.Iterator[types.MetadataOrError] - if disable_multiprocessing: - validated_metadatas_iter = map(types.validate_and_fail_metadata, metadatas) - else: - # Do not pass error metadatas where the error object can not be pickled for multiprocessing to work - # Otherwise we get: - # TypeError: __init__() missing 3 required positional arguments: 'image_time', 'gpx_start_time', and 'gpx_end_time' - # See https://stackoverflow.com/a/61432070 - yes, no = split_if(metadatas, lambda m: isinstance(m, types.ErrorMetadata)) - no_iter = pool.imap( - types.validate_and_fail_metadata, - no, - ) - validated_metadatas_iter = itertools.chain(yes, no_iter) - return list( - tqdm( - validated_metadatas_iter, - desc="Validating metadatas", - unit="metadata", - disable=LOG.getEffectiveLevel() <= logging.DEBUG, - total=len(metadatas), - ) - ) - - def process_finalize( import_path: T.Union[T.Sequence[Path], Path], metadatas: T.List[types.MetadataOrError], @@ -567,7 +458,6 @@ def process_finalize( offset_time: float = 0.0, offset_angle: float = 0.0, desc_path: T.Optional[str] = None, - num_processes: T.Optional[int] = None, ) -> T.List[types.MetadataOrError]: for metadata in metadatas: if isinstance(metadata, types.VideoMetadata): @@ -592,12 +482,6 @@ def process_finalize( offset_angle=offset_angle, ) - LOG.debug("Validating %d metadatas", len(metadatas)) - metadatas = _validate_metadatas(metadatas, num_processes) - - LOG.info("Checking upload status for %d metadatas", len(metadatas)) - metadatas = _check_upload_status(metadatas) - _overwrite_exif_tags( # search image metadatas again because some of them might have been failed [ From d32196769c4f07a60931c145881973c8d5f8665a Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Mon, 17 Mar 2025 17:09:32 -0700 Subject: [PATCH 2/8] turn off detials and debug flags in exifread.process_file --- mapillary_tools/exif_read.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mapillary_tools/exif_read.py b/mapillary_tools/exif_read.py index d2fac8dfd..38bb6bd5c 100644 --- a/mapillary_tools/exif_read.py +++ b/mapillary_tools/exif_read.py @@ -520,14 +520,15 @@ def __init__(self, path_or_stream: T.Union[Path, T.BinaryIO]) -> None: if isinstance(path_or_stream, Path): with path_or_stream.open("rb") as fp: try: - self.tags = exifread.process_file(fp, details=True, debug=True) + # Turn off details and debug for performance reasons + self.tags = exifread.process_file(fp, details=False, debug=False) except Exception: self.tags = {} else: try: self.tags = exifread.process_file( - path_or_stream, details=True, debug=True + path_or_stream, details=False, debug=False ) except Exception: self.tags = {} From dc85d93ecef8fa89bec441a2a88315c32a7eed54 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Mon, 17 Mar 2025 17:13:48 -0700 Subject: [PATCH 3/8] improve exif extraction --- .../geotag/geotag_images_from_exif.py | 40 +------------------ .../geotag/geotag_images_from_exiftool.py | 6 +-- 2 files changed, 3 insertions(+), 43 deletions(-) diff --git a/mapillary_tools/geotag/geotag_images_from_exif.py b/mapillary_tools/geotag/geotag_images_from_exif.py index 4e46f2198..9bd94e775 100644 --- a/mapillary_tools/geotag/geotag_images_from_exif.py +++ b/mapillary_tools/geotag/geotag_images_from_exif.py @@ -1,4 +1,3 @@ -import io import logging import typing as T from multiprocessing import Pool @@ -6,34 +5,13 @@ from tqdm import tqdm -from .. import exceptions, exif_write, geo, types, utils +from .. import exceptions, geo, types, utils from ..exif_read import ExifRead, ExifReadABC from .geotag_from_generic import GeotagImagesFromGeneric LOG = logging.getLogger(__name__) -def verify_image_exif_write( - metadata: types.ImageMetadata, - image_bytes: T.Optional[bytes] = None, -) -> None: - if image_bytes is None: - edit = exif_write.ExifEdit(metadata.filename) - else: - edit = exif_write.ExifEdit(image_bytes) - - # The cast is to fix the type error in Python3.6: - # Argument 1 to "add_image_description" of "ExifEdit" has incompatible type "ImageDescription"; expected "Dict[str, Any]" - edit.add_image_description( - T.cast(T.Dict, types.desc_file_to_exif(types.as_desc(metadata))) - ) - - # Possible errors thrown here: - # - struct.error: 'H' format requires 0 <= number <= 65535 - # - piexif.InvalidImageDataError - edit.dump_image_bytes() - - class GeotagImagesFromEXIF(GeotagImagesFromGeneric): def __init__( self, image_paths: T.Sequence[Path], num_processes: T.Optional[int] = None @@ -84,30 +62,16 @@ def geotag_image( image_path: Path, skip_lonlat_error: bool = False ) -> types.ImageMetadataOrError: try: - # load the image bytes into memory to avoid reading it multiple times with image_path.open("rb") as fp: - image_bytesio = io.BytesIO(fp.read()) - - image_bytesio.seek(0, io.SEEK_SET) - exif = ExifRead(image_bytesio) - + exif = ExifRead(fp) image_metadata = GeotagImagesFromEXIF.build_image_metadata( image_path, exif, skip_lonlat_error=skip_lonlat_error ) - - image_bytesio.seek(0, io.SEEK_SET) - verify_image_exif_write( - image_metadata, - image_bytes=image_bytesio.read(), - ) except Exception as ex: return types.describe_error_metadata( ex, image_path, filetype=types.FileType.IMAGE ) - image_bytesio.seek(0, io.SEEK_SET) - image_metadata.update_md5sum(image_bytesio) - return image_metadata def to_description(self) -> T.List[types.ImageMetadataOrError]: diff --git a/mapillary_tools/geotag/geotag_images_from_exiftool.py b/mapillary_tools/geotag/geotag_images_from_exiftool.py index b343f7dca..79d0a00dc 100644 --- a/mapillary_tools/geotag/geotag_images_from_exiftool.py +++ b/mapillary_tools/geotag/geotag_images_from_exiftool.py @@ -9,7 +9,7 @@ from .. import exceptions, exiftool_read, types from .geotag_from_generic import GeotagImagesFromGeneric -from .geotag_images_from_exif import GeotagImagesFromEXIF, verify_image_exif_write +from .geotag_images_from_exif import GeotagImagesFromEXIF LOG = logging.getLogger(__name__) @@ -40,10 +40,6 @@ def geotag_image(element: ET.Element) -> types.ImageMetadataOrError: with image_path.open("rb") as fp: image_bytesio = io.BytesIO(fp.read()) image_bytesio.seek(0, io.SEEK_SET) - verify_image_exif_write( - image_metadata, - image_bytes=image_bytesio.read(), - ) except Exception as ex: return types.describe_error_metadata( ex, image_path, filetype=types.FileType.IMAGE From 88ce0b089d0d7528ea0a09ed984b888c29e7bb4f Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Tue, 18 Mar 2025 10:00:09 -0700 Subject: [PATCH 4/8] extract XMP with extract_xmp_efficiently --- mapillary_tools/exif_read.py | 186 +++++++++++++----- .../geotag/geotag_images_from_exif.py | 6 +- 2 files changed, 138 insertions(+), 54 deletions(-) diff --git a/mapillary_tools/exif_read.py b/mapillary_tools/exif_read.py index 38bb6bd5c..505490471 100644 --- a/mapillary_tools/exif_read.py +++ b/mapillary_tools/exif_read.py @@ -1,7 +1,9 @@ import abc import datetime +import io import logging import re +import struct import typing as T import xml.etree.ElementTree as et from fractions import Fraction @@ -11,6 +13,7 @@ from exifread.utils import Ratio +LOG = logging.getLogger(__name__) XMP_NAMESPACES = { "exif": "http://ns.adobe.com/exif/1.0/", "tiff": "http://ns.adobe.com/tiff/1.0/", @@ -508,6 +511,86 @@ def _extract_alternative_fields( return None +def extract_xmp_efficiently(fp) -> T.Optional[str]: + """ + Extract XMP metadata from a JPEG file efficiently by reading only necessary chunks. + + Args: + image_path (str): Path to the JPEG image file + + Returns: + str: Formatted XML string containing XMP metadata, or None if no XMP data found + """ + # JPEG markers + SOI_MARKER = b"\xff\xd8" # Start of Image + APP1_MARKER = b"\xff\xe1" # Application Segment 1 (where XMP usually lives) + XMP_IDENTIFIER = b"http://ns.adobe.com/xap/1.0/\x00" + XMP_META_TAG_BEGIN = b"" + + # Check for JPEG signature (SOI marker) + if fp.read(2) != SOI_MARKER: + return None + + while True: + # Read marker + marker_bytes = fp.read(2) + if len(marker_bytes) < 2: + # End of file + break + + # If not APP1, skip this segment + if marker_bytes != APP1_MARKER: + # Read length field (includes the length bytes themselves) + length_bytes = fp.read(2) + if len(length_bytes) < 2: + break + + length = struct.unpack(">H", length_bytes)[0] + # Skip the rest of this segment (-2 because length includes length bytes) + fp.seek(length - 2, io.SEEK_CUR) + continue + + # It's an APP1 segment - read length + length_bytes = fp.read(2) + if len(length_bytes) < 2: + break + + length = struct.unpack(">H", length_bytes)[0] + segment_data_length = length - 2 # Subtract length field size + + # Read enough bytes to check for XMP identifier + identifier_check = fp.read(len(XMP_IDENTIFIER)) + if len(identifier_check) < len(XMP_IDENTIFIER): + break + + # Check if this APP1 contains XMP data + if identifier_check == XMP_IDENTIFIER: + # We found XMP data - read the rest of the segment + remaining_length = segment_data_length - len(XMP_IDENTIFIER) + if remaining_length > 128 * 1024 * 1024: + raise ValueError("XMP data too large") + xmp_data = fp.read(remaining_length) + + # Process the XMP data + begin_idx = xmp_data.find(XMP_META_TAG_BEGIN) + if begin_idx >= 0: + end_idx = xmp_data.rfind(XMP_META_TAG_END, begin_idx) + if end_idx >= 0: + xmp_data = xmp_data[begin_idx : end_idx + len(XMP_META_TAG_END)] + else: + xmp_data = xmp_data[begin_idx:] + + return xmp_data.decode("utf-8") + else: + # Not XMP data - skip the rest of this APP1 segment + # We already read the identifier_check bytes, so subtract that + fp.seek(segment_data_length - len(identifier_check), io.SEEK_CUR) + + # If we reach here, no XMP data was found + return None + + class ExifReadFromEXIF(ExifReadABC): """ EXIF class for reading exif from an image @@ -522,15 +605,18 @@ def __init__(self, path_or_stream: T.Union[Path, T.BinaryIO]) -> None: try: # Turn off details and debug for performance reasons self.tags = exifread.process_file(fp, details=False, debug=False) - except Exception: + except Exception as ex: + LOG.warning("Error reading EXIF from %s: %s", path_or_stream, ex) self.tags = {} else: try: + # Turn off details and debug for performance reasons self.tags = exifread.process_file( path_or_stream, details=False, debug=False ) - except Exception: + except Exception as ex: + LOG.warning("Error reading EXIF: %s", ex) self.tags = {} def extract_altitude(self) -> T.Optional[float]: @@ -776,27 +862,54 @@ def extract_application_notes(self) -> T.Optional[str]: class ExifRead(ExifReadFromEXIF): + """ + Extract properties from EXIF first and then XMP + NOTE: For performance reasons, XMP is only extracted if EXIF does not contain the required fields + """ + def __init__(self, path_or_stream: T.Union[Path, T.BinaryIO]) -> None: super().__init__(path_or_stream) - self._xmp = self._extract_xmp() + self._path_or_stream = path_or_stream + self._xml_extracted: bool = False + self._cached_xml: T.Optional[ExifReadFromXMP] = None + + def _xmp_with_reason(self, reason: str) -> T.Optional[ExifReadFromXMP]: + if not self._xml_extracted: + LOG.debug('Extracting XMP for "%s"', reason) + self._cached_xml = self._extract_xmp() + self._xml_extracted = True + + return self._cached_xml def _extract_xmp(self) -> T.Optional[ExifReadFromXMP]: - application_notes = self.extract_application_notes() - if application_notes is None: - return None + xml_str = self.extract_application_notes() + if xml_str is None: + if isinstance(self._path_or_stream, Path): + with self._path_or_stream.open("rb") as fp: + xml_str = extract_xmp_efficiently(fp) + else: + self._path_or_stream.seek(0, io.SEEK_SET) + xml_str = extract_xmp_efficiently(self._path_or_stream) + + if xml_str is None: + return None + try: - e = et.fromstring(application_notes) - except et.ParseError: + e = et.fromstring(xml_str) + except et.ParseError as ex: + LOG.warning("Error parsing XMP XML: %s: %s", ex, xml_str) return None + return ExifReadFromXMP(et.ElementTree(e)) def extract_altitude(self) -> T.Optional[float]: val = super().extract_altitude() if val is not None: return val - if self._xmp is None: + xmp = self._xmp_with_reason("altitude") + if xmp is None: return None - val = self._xmp.extract_altitude() + val = xmp.extract_altitude() if val is not None: return val return None @@ -805,20 +918,10 @@ def extract_capture_time(self) -> T.Optional[datetime.datetime]: val = super().extract_capture_time() if val is not None: return val - if self._xmp is None: - return None - val = self._xmp.extract_capture_time() - if val is not None: - return val - return None - - def extract_direction(self) -> T.Optional[float]: - val = super().extract_direction() - if val is not None: - return val - if self._xmp is None: + xmp = self._xmp_with_reason("capture_time") + if xmp is None: return None - val = self._xmp.extract_direction() + val = xmp.extract_capture_time() if val is not None: return val return None @@ -827,9 +930,10 @@ def extract_lon_lat(self) -> T.Optional[T.Tuple[float, float]]: val = super().extract_lon_lat() if val is not None: return val - if self._xmp is None: + xmp = self._xmp_with_reason("lon_lat") + if xmp is None: return None - val = self._xmp.extract_lon_lat() + val = xmp.extract_lon_lat() if val is not None: return val return None @@ -838,9 +942,10 @@ def extract_make(self) -> T.Optional[str]: val = super().extract_make() if val is not None: return val - if self._xmp is None: + xmp = self._xmp_with_reason("make") + if xmp is None: return None - val = self._xmp.extract_make() + val = xmp.extract_make() if val is not None: return val return None @@ -849,31 +954,10 @@ def extract_model(self) -> T.Optional[str]: val = super().extract_model() if val is not None: return val - if self._xmp is None: - return None - val = self._xmp.extract_model() - if val is not None: - return val - return None - - def extract_width(self) -> T.Optional[int]: - val = super().extract_width() - if val is not None: - return val - if self._xmp is None: - return None - val = self._xmp.extract_width() - if val is not None: - return val - return None - - def extract_height(self) -> T.Optional[int]: - val = super().extract_height() - if val is not None: - return val - if self._xmp is None: + xmp = self._xmp_with_reason("model") + if xmp is None: return None - val = self._xmp.extract_height() + val = xmp.extract_model() if val is not None: return val return None diff --git a/mapillary_tools/geotag/geotag_images_from_exif.py b/mapillary_tools/geotag/geotag_images_from_exif.py index 9bd94e775..4f16fb5f2 100644 --- a/mapillary_tools/geotag/geotag_images_from_exif.py +++ b/mapillary_tools/geotag/geotag_images_from_exif.py @@ -64,9 +64,9 @@ def geotag_image( try: with image_path.open("rb") as fp: exif = ExifRead(fp) - image_metadata = GeotagImagesFromEXIF.build_image_metadata( - image_path, exif, skip_lonlat_error=skip_lonlat_error - ) + image_metadata = GeotagImagesFromEXIF.build_image_metadata( + image_path, exif, skip_lonlat_error=skip_lonlat_error + ) except Exception as ex: return types.describe_error_metadata( ex, image_path, filetype=types.FileType.IMAGE From cae61a5911f95324a34ee177034af7afddeba926 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Tue, 18 Mar 2025 10:17:05 -0700 Subject: [PATCH 5/8] add back validation on mapillary desc --- mapillary_tools/process_geotag_properties.py | 57 ++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/mapillary_tools/process_geotag_properties.py b/mapillary_tools/process_geotag_properties.py index 515d04bd1..698e8889e 100644 --- a/mapillary_tools/process_geotag_properties.py +++ b/mapillary_tools/process_geotag_properties.py @@ -1,8 +1,10 @@ import collections import datetime +import itertools import json import logging import typing as T +from multiprocessing import Pool from pathlib import Path from tqdm import tqdm @@ -444,6 +446,57 @@ def _show_stats_per_filetype( ) +_IT = T.TypeVar("_IT") + + +def split_if( + it: T.Iterable[_IT], sep: T.Callable[[_IT], bool] +) -> T.Tuple[T.List[_IT], T.List[_IT]]: + yes, no = [], [] + for e in it: + if sep(e): + yes.append(e) + else: + no.append(e) + return yes, no + + +def _validate_metadatas( + metadatas: T.Sequence[types.MetadataOrError], num_processes: T.Optional[int] +) -> T.List[types.MetadataOrError]: + # validating metadatas is slow, hence multiprocessing + if num_processes is None: + pool_num_processes = None + disable_multiprocessing = False + else: + pool_num_processes = max(num_processes, 1) + disable_multiprocessing = num_processes <= 0 + with Pool(processes=pool_num_processes) as pool: + validated_metadatas_iter: T.Iterator[types.MetadataOrError] + if disable_multiprocessing: + validated_metadatas_iter = map(types.validate_and_fail_metadata, metadatas) + else: + # Do not pass error metadatas where the error object can not be pickled for multiprocessing to work + # Otherwise we get: + # TypeError: __init__() missing 3 required positional arguments: 'image_time', 'gpx_start_time', and 'gpx_end_time' + # See https://stackoverflow.com/a/61432070 + yes, no = split_if(metadatas, lambda m: isinstance(m, types.ErrorMetadata)) + no_iter = pool.imap( + types.validate_and_fail_metadata, + no, + ) + validated_metadatas_iter = itertools.chain(yes, no_iter) + return list( + tqdm( + validated_metadatas_iter, + desc="Validating metadatas", + unit="metadata", + disable=LOG.getEffectiveLevel() <= logging.DEBUG, + total=len(metadatas), + ) + ) + + def process_finalize( import_path: T.Union[T.Sequence[Path], Path], metadatas: T.List[types.MetadataOrError], @@ -458,6 +511,7 @@ def process_finalize( offset_time: float = 0.0, offset_angle: float = 0.0, desc_path: T.Optional[str] = None, + num_processes: T.Optional[int] = None, ) -> T.List[types.MetadataOrError]: for metadata in metadatas: if isinstance(metadata, types.VideoMetadata): @@ -482,6 +536,9 @@ def process_finalize( offset_angle=offset_angle, ) + LOG.debug("Validating %d metadatas", len(metadatas)) + metadatas = _validate_metadatas(metadatas, num_processes=num_processes) + _overwrite_exif_tags( # search image metadatas again because some of them might have been failed [ From c44588ec93b46c99ac4036d4b74dab334786d3bd Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Tue, 18 Mar 2025 10:39:36 -0700 Subject: [PATCH 6/8] fix tests --- tests/integration/fixtures.py | 4 +++- tests/integration/test_history.py | 32 -------------------------- tests/unit/test_sequence_processing.py | 2 +- 3 files changed, 4 insertions(+), 34 deletions(-) diff --git a/tests/integration/fixtures.py b/tests/integration/fixtures.py index 5d19b10dd..cc30f54d3 100644 --- a/tests/integration/fixtures.py +++ b/tests/integration/fixtures.py @@ -224,7 +224,9 @@ def verify_descs(expected: T.List[T.Dict], actual: T.Union[Path, T.List[T.Dict]] actual_desc = actual_map.get(filename) assert actual_desc is not None, expected_desc if "error" in expected_desc: - assert expected_desc["error"]["type"] == actual_desc["error"]["type"] + assert expected_desc["error"]["type"] == actual_desc.get("error", {}).get( + "type" + ), f"{expected_desc=} != {actual_desc=}" if "message" in expected_desc["error"]: assert ( expected_desc["error"]["message"] == actual_desc["error"]["message"] diff --git a/tests/integration/test_history.py b/tests/integration/test_history.py index 656bc626f..a64a5fee7 100644 --- a/tests/integration/test_history.py +++ b/tests/integration/test_history.py @@ -32,38 +32,6 @@ def test_upload_images( for upload in setup_upload.listdir(): upload.remove() - x = subprocess.run( - f"{EXECUTABLE} process --file_types=image {str(setup_data)}", - shell=True, - ) - assert x.returncode == 0, x.stderr - verify_descs( - [ - { - "error": { - "message": "The image was already uploaded", - "type": "MapillaryUploadedAlreadyError", - }, - "filename": str(Path(setup_data.join("images").join("DSC00001.JPG"))), - }, - { - "error": { - "message": "The image was already uploaded", - "type": "MapillaryUploadedAlreadyError", - }, - "filename": str(Path(setup_data.join("images").join("DSC00497.JPG"))), - }, - { - "error": { - "message": "The image was already uploaded", - "type": "MapillaryUploadedAlreadyError", - }, - "filename": str(Path(setup_data.join("images").join("V0370574.JPG"))), - }, - ], - Path(setup_data, "mapillary_image_description.json"), - ) - x = subprocess.run( f"{EXECUTABLE} process_and_upload --file_types=image {UPLOAD_FLAGS} {str(setup_data)}", shell=True, diff --git a/tests/unit/test_sequence_processing.py b/tests/unit/test_sequence_processing.py index 475002366..5d076cb92 100644 --- a/tests/unit/test_sequence_processing.py +++ b/tests/unit/test_sequence_processing.py @@ -456,7 +456,7 @@ def test_process_finalize(setup_data): "MAPLongitude": 1, "MAPCaptureTime": "1970_01_01_00_00_02_000", "MAPCompassHeading": {"TrueHeading": 17.0, "MagneticHeading": 17.0}, - "md5sum": "346c064df2c194e20ea98708fd61ac10", + "md5sum": None, }, { "error": { From 505f04255d503f91aa84b61d6fa3172c18d5158a Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Tue, 18 Mar 2025 10:40:18 -0700 Subject: [PATCH 7/8] clean up tests --- tests/integration/test_history.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/integration/test_history.py b/tests/integration/test_history.py index a64a5fee7..0dbf0e4f8 100644 --- a/tests/integration/test_history.py +++ b/tests/integration/test_history.py @@ -1,5 +1,4 @@ import subprocess -from pathlib import Path import py.path import pytest @@ -10,7 +9,6 @@ setup_data, setup_upload, USERNAME, - verify_descs, ) UPLOAD_FLAGS = f"--dry_run --user_name={USERNAME}" From 6a1e991131669e4e06b906692fbaddd6e637b25d Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Tue, 18 Mar 2025 11:00:36 -0700 Subject: [PATCH 8/8] add back image size extraction --- mapillary_tools/exif_read.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/mapillary_tools/exif_read.py b/mapillary_tools/exif_read.py index 505490471..284d59377 100644 --- a/mapillary_tools/exif_read.py +++ b/mapillary_tools/exif_read.py @@ -961,3 +961,27 @@ def extract_model(self) -> T.Optional[str]: if val is not None: return val return None + + def extract_width(self) -> T.Optional[int]: + val = super().extract_width() + if val is not None: + return val + xmp = self._xmp_with_reason("width") + if xmp is None: + return None + val = xmp.extract_width() + if val is not None: + return val + return None + + def extract_height(self) -> T.Optional[int]: + val = super().extract_height() + if val is not None: + return val + xmp = self._xmp_with_reason("width") + if xmp is None: + return None + val = xmp.extract_height() + if val is not None: + return val + return None