From 33e54db6d76e18833fc51df7c5791b351945d007 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Fri, 4 Apr 2025 15:00:30 -0700 Subject: [PATCH 01/10] git mv mapillary_tools/geotag/geotag_videos_from_exiftool_video.py mapillary_tools/geotag/geotag_videos_from_exiftool.py --- mapillary_tools/geotag/factory.py | 6 +++--- mapillary_tools/geotag/geotag_images_from_exiftool.py | 2 +- .../geotag_images_from_exiftool_both_image_and_video.py | 6 +++--- ...rom_exiftool_video.py => geotag_videos_from_exiftool.py} | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) rename mapillary_tools/geotag/{geotag_videos_from_exiftool_video.py => geotag_videos_from_exiftool.py} (98%) diff --git a/mapillary_tools/geotag/factory.py b/mapillary_tools/geotag/factory.py index 9a3b09b6e..815bd23f4 100644 --- a/mapillary_tools/geotag/factory.py +++ b/mapillary_tools/geotag/factory.py @@ -15,7 +15,7 @@ geotag_images_from_gpx_file, geotag_images_from_nmea_file, geotag_images_from_video, - geotag_videos_from_exiftool_video, + geotag_videos_from_exiftool, geotag_videos_from_gpx, geotag_videos_from_video, ) @@ -258,7 +258,7 @@ def _geotag_videos( return geotag.to_description() if option.source is SourceType.EXIFTOOL_RUNTIME: - geotag = geotag_videos_from_exiftool_video.GeotagVideosFromExifToolRunner( + geotag = geotag_videos_from_exiftool.GeotagVideosFromExifToolRunner( video_paths, num_processes=option.num_processes ) try: @@ -268,7 +268,7 @@ def _geotag_videos( return [] elif option.source is SourceType.EXIFTOOL_XML: - geotag = geotag_videos_from_exiftool_video.GeotagVideosFromExifToolVideo( + geotag = geotag_videos_from_exiftool.GeotagVideosFromExifToolXML( video_paths, xml_path=_ensure_source_path(option), ) diff --git a/mapillary_tools/geotag/geotag_images_from_exiftool.py b/mapillary_tools/geotag/geotag_images_from_exiftool.py index b187a2a41..fbaa6923a 100644 --- a/mapillary_tools/geotag/geotag_images_from_exiftool.py +++ b/mapillary_tools/geotag/geotag_images_from_exiftool.py @@ -24,7 +24,7 @@ def _exif_context(self): yield exiftool_read.ExifToolRead(ET.ElementTree(self.element)) -class GeotagImagesFromExifTool(GeotagImagesFromGeneric): +class GeotagImagesFromExifToolXML(GeotagImagesFromGeneric): def __init__( self, image_paths: T.Sequence[Path], diff --git a/mapillary_tools/geotag/geotag_images_from_exiftool_both_image_and_video.py b/mapillary_tools/geotag/geotag_images_from_exiftool_both_image_and_video.py index 4b9e19d49..47debd070 100644 --- a/mapillary_tools/geotag/geotag_images_from_exiftool_both_image_and_video.py +++ b/mapillary_tools/geotag/geotag_images_from_exiftool_both_image_and_video.py @@ -8,7 +8,7 @@ from . import ( geotag_images_from_exiftool, geotag_images_from_video, - geotag_videos_from_exiftool_video, + geotag_videos_from_exiftool, ) from .geotag_from_generic import GeotagImagesFromGeneric @@ -41,7 +41,7 @@ def geotag_samples(self) -> list[types.ImageMetadataOrError]: samples_by_video = utils.find_all_image_samples(self.image_paths, video_paths) video_metadata_or_errors = ( - geotag_videos_from_exiftool_video.GeotagVideosFromExifToolVideo( + geotag_videos_from_exiftool.GeotagVideosFromExifToolXML( list(samples_by_video.keys()), self.xml_path, num_processes=self.num_processes, @@ -67,7 +67,7 @@ def to_description(self) -> list[types.ImageMetadataOrError]: ] non_sample_metadata_or_errors = ( - geotag_images_from_exiftool.GeotagImagesFromExifTool( + geotag_images_from_exiftool.GeotagImagesFromExifToolXML( non_sample_paths, self.xml_path, num_processes=self.num_processes, diff --git a/mapillary_tools/geotag/geotag_videos_from_exiftool_video.py b/mapillary_tools/geotag/geotag_videos_from_exiftool.py similarity index 98% rename from mapillary_tools/geotag/geotag_videos_from_exiftool_video.py rename to mapillary_tools/geotag/geotag_videos_from_exiftool.py index 3f95fc5ed..03023baa2 100644 --- a/mapillary_tools/geotag/geotag_videos_from_exiftool_video.py +++ b/mapillary_tools/geotag/geotag_videos_from_exiftool.py @@ -69,7 +69,7 @@ def extract(self) -> types.VideoMetadataOrError: return video_metadata -class GeotagVideosFromExifToolVideo(GeotagVideosFromGeneric): +class GeotagVideosFromExifToolXML(GeotagVideosFromGeneric): def __init__( self, video_paths: T.Sequence[Path], From 33e4524ec142dbd03f985fa7ab3b6dbbac4af5a3 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Fri, 4 Apr 2025 15:12:24 -0700 Subject: [PATCH 02/10] update --- mapillary_tools/geotag/factory.py | 3 +- .../geotag/geotag_images_from_exiftool.py | 61 ++++++++++++++- ...ages_from_exiftool_both_image_and_video.py | 77 ------------------- 3 files changed, 61 insertions(+), 80 deletions(-) delete mode 100644 mapillary_tools/geotag/geotag_images_from_exiftool_both_image_and_video.py diff --git a/mapillary_tools/geotag/factory.py b/mapillary_tools/geotag/factory.py index 815bd23f4..7707f862f 100644 --- a/mapillary_tools/geotag/factory.py +++ b/mapillary_tools/geotag/factory.py @@ -11,7 +11,6 @@ geotag_from_generic, geotag_images_from_exif, geotag_images_from_exiftool, - geotag_images_from_exiftool_both_image_and_video, geotag_images_from_gpx_file, geotag_images_from_nmea_file, geotag_images_from_video, @@ -175,7 +174,7 @@ def _geotag_images( elif option.source is SourceType.EXIFTOOL_XML: # This is to ensure 'video_process --geotag={"source": "exiftool_xml", "source_path": "/tmp/xml_path"}' # to work - geotag = geotag_images_from_exiftool_both_image_and_video.GeotagImagesFromExifToolBothImageAndVideo( + geotag = geotag_images_from_exiftool.GeotagImagesFromExifToolWithSamples( image_paths, xml_path=_ensure_source_path(option), num_processes=option.num_processes, diff --git a/mapillary_tools/geotag/geotag_images_from_exiftool.py b/mapillary_tools/geotag/geotag_images_from_exiftool.py index fbaa6923a..7edc4f82a 100644 --- a/mapillary_tools/geotag/geotag_images_from_exiftool.py +++ b/mapillary_tools/geotag/geotag_images_from_exiftool.py @@ -6,10 +6,12 @@ import xml.etree.ElementTree as ET from pathlib import Path -from .. import constants, exceptions, exiftool_read, types +from .. import constants, exceptions, exiftool_read, types, utils from ..exiftool_runner import ExiftoolRunner from .geotag_from_generic import GeotagImagesFromGeneric from .geotag_images_from_exif import ImageEXIFExtractor +from .geotag_images_from_video import GeotagImagesFromVideo +from .geotag_videos_from_exiftool import GeotagVideosFromExifToolXML LOG = logging.getLogger(__name__) @@ -103,3 +105,60 @@ def _generate_image_extractors( results.append(ImageExifToolExtractor(path, rdf_description)) return results + + +class GeotagImagesFromExifToolWithSamples(GeotagImagesFromGeneric): + def __init__( + self, + image_paths: T.Sequence[Path], + xml_path: Path, + offset_time: float = 0.0, + num_processes: int | None = None, + ): + super().__init__(image_paths, num_processes=num_processes) + self.xml_path = xml_path + self.offset_time = offset_time + + def geotag_samples(self) -> list[types.ImageMetadataOrError]: + # Find all video paths in self.xml_path + rdf_description_by_path = exiftool_read.index_rdf_description_by_path( + [self.xml_path] + ) + video_paths = utils.find_videos( + [Path(pathstr) for pathstr in rdf_description_by_path.keys()], + skip_subfolders=True, + ) + # Find all video paths that have sample images + samples_by_video = utils.find_all_image_samples(self.image_paths, video_paths) + + video_metadata_or_errors = GeotagVideosFromExifToolXML( + list(samples_by_video.keys()), + self.xml_path, + num_processes=self.num_processes, + ).to_description() + sample_paths = sum(samples_by_video.values(), []) + sample_metadata_or_errors = GeotagImagesFromVideo( + sample_paths, + video_metadata_or_errors, + offset_time=self.offset_time, + num_processes=self.num_processes, + ).to_description() + + return sample_metadata_or_errors + + def to_description(self) -> list[types.ImageMetadataOrError]: + sample_metadata_or_errors = self.geotag_samples() + + sample_paths = set(metadata.filename for metadata in sample_metadata_or_errors) + + non_sample_paths = [ + path for path in self.image_paths if path not in sample_paths + ] + + non_sample_metadata_or_errors = GeotagImagesFromExifToolXML( + non_sample_paths, + self.xml_path, + num_processes=self.num_processes, + ).to_description() + + return sample_metadata_or_errors + non_sample_metadata_or_errors diff --git a/mapillary_tools/geotag/geotag_images_from_exiftool_both_image_and_video.py b/mapillary_tools/geotag/geotag_images_from_exiftool_both_image_and_video.py deleted file mode 100644 index 47debd070..000000000 --- a/mapillary_tools/geotag/geotag_images_from_exiftool_both_image_and_video.py +++ /dev/null @@ -1,77 +0,0 @@ -from __future__ import annotations - -import logging -import typing as T -from pathlib import Path - -from .. import exiftool_read, types, utils -from . import ( - geotag_images_from_exiftool, - geotag_images_from_video, - geotag_videos_from_exiftool, -) -from .geotag_from_generic import GeotagImagesFromGeneric - - -LOG = logging.getLogger(__name__) - - -class GeotagImagesFromExifToolBothImageAndVideo(GeotagImagesFromGeneric): - def __init__( - self, - image_paths: T.Sequence[Path], - xml_path: Path, - offset_time: float = 0.0, - num_processes: int | None = None, - ): - super().__init__(image_paths, num_processes=num_processes) - self.xml_path = xml_path - self.offset_time = offset_time - - def geotag_samples(self) -> list[types.ImageMetadataOrError]: - # Find all video paths in self.xml_path - rdf_description_by_path = exiftool_read.index_rdf_description_by_path( - [self.xml_path] - ) - video_paths = utils.find_videos( - [Path(pathstr) for pathstr in rdf_description_by_path.keys()], - skip_subfolders=True, - ) - # Find all video paths that have sample images - samples_by_video = utils.find_all_image_samples(self.image_paths, video_paths) - - video_metadata_or_errors = ( - geotag_videos_from_exiftool.GeotagVideosFromExifToolXML( - list(samples_by_video.keys()), - self.xml_path, - num_processes=self.num_processes, - ).to_description() - ) - sample_paths = sum(samples_by_video.values(), []) - sample_metadata_or_errors = geotag_images_from_video.GeotagImagesFromVideo( - sample_paths, - video_metadata_or_errors, - offset_time=self.offset_time, - num_processes=self.num_processes, - ).to_description() - - return sample_metadata_or_errors - - def to_description(self) -> list[types.ImageMetadataOrError]: - sample_metadata_or_errors = self.geotag_samples() - - sample_paths = set(metadata.filename for metadata in sample_metadata_or_errors) - - non_sample_paths = [ - path for path in self.image_paths if path not in sample_paths - ] - - non_sample_metadata_or_errors = ( - geotag_images_from_exiftool.GeotagImagesFromExifToolXML( - non_sample_paths, - self.xml_path, - num_processes=self.num_processes, - ).to_description() - ) - - return sample_metadata_or_errors + non_sample_metadata_or_errors From 7755662cdc54d82675dd45709b2d0e75362987d2 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Fri, 4 Apr 2025 16:57:07 -0700 Subject: [PATCH 03/10] move around --- mapillary_tools/geotag/__init__.py | 1 - .../{geotag_from_generic.py => base.py} | 30 +--- mapillary_tools/geotag/factory.py | 6 +- .../geotag/geotag_images_from_exif.py | 52 +------ .../geotag/geotag_images_from_exiftool.py | 15 +- .../geotag/geotag_images_from_gpx.py | 2 +- .../geotag/geotag_images_from_gpx_file.py | 33 +--- .../geotag/geotag_images_from_video.py | 3 +- .../geotag/geotag_videos_from_exiftool.py | 66 +------- .../geotag/geotag_videos_from_gpx.py | 113 +------------- .../geotag/geotag_videos_from_video.py | 146 +---------------- .../geotag/image_extractors/base.py | 18 +++ .../geotag/image_extractors/exif.py | 53 +++++++ .../geotag/image_extractors/exiftool.py | 18 +++ mapillary_tools/geotag/utils.py | 32 ++++ .../geotag/video_extractors/base.py | 18 +++ .../geotag/video_extractors/exiftool.py | 63 ++++++++ .../geotag/video_extractors/gpx.py | 119 ++++++++++++++ .../geotag/video_extractors/native.py | 147 ++++++++++++++++++ 19 files changed, 493 insertions(+), 442 deletions(-) delete mode 100644 mapillary_tools/geotag/__init__.py rename mapillary_tools/geotag/{geotag_from_generic.py => base.py} (86%) create mode 100644 mapillary_tools/geotag/image_extractors/base.py create mode 100644 mapillary_tools/geotag/image_extractors/exif.py create mode 100644 mapillary_tools/geotag/image_extractors/exiftool.py create mode 100644 mapillary_tools/geotag/utils.py create mode 100644 mapillary_tools/geotag/video_extractors/base.py create mode 100644 mapillary_tools/geotag/video_extractors/exiftool.py create mode 100644 mapillary_tools/geotag/video_extractors/gpx.py create mode 100644 mapillary_tools/geotag/video_extractors/native.py diff --git a/mapillary_tools/geotag/__init__.py b/mapillary_tools/geotag/__init__.py deleted file mode 100644 index 75e64f68f..000000000 --- a/mapillary_tools/geotag/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .. import geo # noqa: F401 diff --git a/mapillary_tools/geotag/geotag_from_generic.py b/mapillary_tools/geotag/base.py similarity index 86% rename from mapillary_tools/geotag/geotag_from_generic.py rename to mapillary_tools/geotag/base.py index 6d9f7e89b..37dfdefb4 100644 --- a/mapillary_tools/geotag/geotag_from_generic.py +++ b/mapillary_tools/geotag/base.py @@ -8,24 +8,14 @@ from tqdm import tqdm from .. import exceptions, types, utils +from .image_extractors.base import BaseImageExtractor +from .video_extractors.base import BaseVideoExtractor LOG = logging.getLogger(__name__) -class GenericImageExtractor(abc.ABC): - """ - Extracts metadata from an image file. - """ - - def __init__(self, image_path: Path): - self.image_path = image_path - - def extract(self) -> types.ImageMetadataOrError: - raise NotImplementedError - - -TImageExtractor = T.TypeVar("TImageExtractor", bound=GenericImageExtractor) +TImageExtractor = T.TypeVar("TImageExtractor", bound=BaseImageExtractor) class GeotagImagesFromGeneric(abc.ABC, T.Generic[TImageExtractor]): @@ -88,19 +78,7 @@ def run_extraction(cls, extractor: TImageExtractor) -> types.ImageMetadataOrErro ) -class GenericVideoExtractor(abc.ABC): - """ - Extracts metadata from a video file. - """ - - def __init__(self, video_path: Path): - self.video_path = video_path - - def extract(self) -> types.VideoMetadataOrError: - raise NotImplementedError - - -TVideoExtractor = T.TypeVar("TVideoExtractor", bound=GenericVideoExtractor) +TVideoExtractor = T.TypeVar("TVideoExtractor", bound=BaseVideoExtractor) class GeotagVideosFromGeneric(abc.ABC, T.Generic[TVideoExtractor]): diff --git a/mapillary_tools/geotag/factory.py b/mapillary_tools/geotag/factory.py index 7707f862f..a8536c667 100644 --- a/mapillary_tools/geotag/factory.py +++ b/mapillary_tools/geotag/factory.py @@ -8,7 +8,7 @@ from .. import exceptions, types, utils from ..types import FileType from . import ( - geotag_from_generic, + base, geotag_images_from_exif, geotag_images_from_exiftool, geotag_images_from_gpx_file, @@ -153,7 +153,7 @@ def _geotag_images( else: interpolation = option.interpolation - geotag: geotag_from_generic.GeotagImagesFromGeneric + geotag: base.GeotagImagesFromGeneric if option.source is SourceType.NATIVE: geotag = geotag_images_from_exif.GeotagImagesFromEXIF( @@ -248,7 +248,7 @@ def _geotag_videos( if not video_paths: return [] - geotag: geotag_from_generic.GeotagVideosFromGeneric + geotag: base.GeotagVideosFromGeneric if option.source is SourceType.NATIVE: geotag = geotag_videos_from_video.GeotagVideosFromVideo( diff --git a/mapillary_tools/geotag/geotag_images_from_exif.py b/mapillary_tools/geotag/geotag_images_from_exif.py index 7fd0c7c93..b62baeebc 100644 --- a/mapillary_tools/geotag/geotag_images_from_exif.py +++ b/mapillary_tools/geotag/geotag_images_from_exif.py @@ -1,60 +1,12 @@ -import contextlib import logging import typing as T -from pathlib import Path -from .. import exceptions, geo, types, utils -from ..exif_read import ExifRead, ExifReadABC -from .geotag_from_generic import GenericImageExtractor, GeotagImagesFromGeneric +from .base import GeotagImagesFromGeneric +from .image_extractors.exif import ImageEXIFExtractor LOG = logging.getLogger(__name__) -class ImageEXIFExtractor(GenericImageExtractor): - def __init__(self, image_path: Path, skip_lonlat_error: bool = False): - super().__init__(image_path) - self.skip_lonlat_error = skip_lonlat_error - - @contextlib.contextmanager - def _exif_context(self) -> T.Generator[ExifReadABC, None, None]: - with self.image_path.open("rb") as fp: - yield ExifRead(fp) - - def extract(self) -> types.ImageMetadata: - with self._exif_context() as exif: - lonlat = exif.extract_lon_lat() - if lonlat is None: - if not self.skip_lonlat_error: - raise exceptions.MapillaryGeoTaggingError( - "Unable to extract GPS Longitude or GPS Latitude from the image" - ) - lonlat = (0.0, 0.0) - lon, lat = lonlat - - capture_time = exif.extract_capture_time() - if capture_time is None: - raise exceptions.MapillaryGeoTaggingError( - "Unable to extract timestamp from the image" - ) - - image_metadata = types.ImageMetadata( - filename=self.image_path, - filesize=utils.get_file_size(self.image_path), - time=geo.as_unix_time(capture_time), - lat=lat, - lon=lon, - alt=exif.extract_altitude(), - angle=exif.extract_direction(), - width=exif.extract_width(), - height=exif.extract_height(), - MAPOrientation=exif.extract_orientation(), - MAPDeviceMake=exif.extract_make(), - MAPDeviceModel=exif.extract_model(), - ) - - return image_metadata - - class GeotagImagesFromEXIF(GeotagImagesFromGeneric): def _generate_image_extractors(self) -> T.Sequence[ImageEXIFExtractor]: return [ImageEXIFExtractor(path) for path in self.image_paths] diff --git a/mapillary_tools/geotag/geotag_images_from_exiftool.py b/mapillary_tools/geotag/geotag_images_from_exiftool.py index 7edc4f82a..28ab5cbab 100644 --- a/mapillary_tools/geotag/geotag_images_from_exiftool.py +++ b/mapillary_tools/geotag/geotag_images_from_exiftool.py @@ -1,6 +1,5 @@ from __future__ import annotations -import contextlib import logging import typing as T import xml.etree.ElementTree as ET @@ -8,24 +7,14 @@ from .. import constants, exceptions, exiftool_read, types, utils from ..exiftool_runner import ExiftoolRunner -from .geotag_from_generic import GeotagImagesFromGeneric -from .geotag_images_from_exif import ImageEXIFExtractor +from .base import GeotagImagesFromGeneric from .geotag_images_from_video import GeotagImagesFromVideo from .geotag_videos_from_exiftool import GeotagVideosFromExifToolXML +from .image_extractors.exiftool import ImageExifToolExtractor LOG = logging.getLogger(__name__) -class ImageExifToolExtractor(ImageEXIFExtractor): - def __init__(self, image_path: Path, element: ET.Element): - super().__init__(image_path) - self.element = element - - @contextlib.contextmanager - def _exif_context(self): - yield exiftool_read.ExifToolRead(ET.ElementTree(self.element)) - - class GeotagImagesFromExifToolXML(GeotagImagesFromGeneric): def __init__( self, diff --git a/mapillary_tools/geotag/geotag_images_from_gpx.py b/mapillary_tools/geotag/geotag_images_from_gpx.py index ac06d90b3..ce81cf9c8 100644 --- a/mapillary_tools/geotag/geotag_images_from_gpx.py +++ b/mapillary_tools/geotag/geotag_images_from_gpx.py @@ -6,7 +6,7 @@ from pathlib import Path from .. import exceptions, geo, types -from .geotag_from_generic import GeotagImagesFromGeneric +from .base import GeotagImagesFromGeneric from .geotag_images_from_exif import ImageEXIFExtractor diff --git a/mapillary_tools/geotag/geotag_images_from_gpx_file.py b/mapillary_tools/geotag/geotag_images_from_gpx_file.py index 347ea5294..ecc95067c 100644 --- a/mapillary_tools/geotag/geotag_images_from_gpx_file.py +++ b/mapillary_tools/geotag/geotag_images_from_gpx_file.py @@ -4,9 +4,7 @@ import typing as T from pathlib import Path -import gpxpy - -from .. import geo +from . import utils from .geotag_images_from_gpx import GeotagImagesFromGPX @@ -23,7 +21,7 @@ def __init__( num_processes: int | None = None, ): try: - tracks = parse_gpx(source_path) + tracks = utils.parse_gpx(source_path) except Exception as ex: raise RuntimeError( f"Error parsing GPX {source_path}: {ex.__class__.__name__}: {ex}" @@ -43,30 +41,3 @@ def __init__( offset_time=offset_time, num_processes=num_processes, ) - - -Track = T.List[geo.Point] - - -def parse_gpx(gpx_file: Path) -> list[Track]: - with gpx_file.open("r") as f: - gpx = gpxpy.parse(f) - - tracks: list[Track] = [] - - for track in gpx.tracks: - for segment in track.segments: - tracks.append([]) - for point in segment.points: - if point.time is not None: - tracks[-1].append( - geo.Point( - time=geo.as_unix_time(point.time), - lat=point.latitude, - lon=point.longitude, - alt=point.elevation, - angle=None, - ) - ) - - return tracks diff --git a/mapillary_tools/geotag/geotag_images_from_video.py b/mapillary_tools/geotag/geotag_images_from_video.py index 6332b3eee..820f1a0ae 100644 --- a/mapillary_tools/geotag/geotag_images_from_video.py +++ b/mapillary_tools/geotag/geotag_images_from_video.py @@ -5,8 +5,7 @@ from pathlib import Path from .. import types, utils - -from .geotag_from_generic import GeotagImagesFromGeneric +from .base import GeotagImagesFromGeneric from .geotag_images_from_gpx import GeotagImagesFromGPX diff --git a/mapillary_tools/geotag/geotag_videos_from_exiftool.py b/mapillary_tools/geotag/geotag_videos_from_exiftool.py index 03023baa2..1a405e87c 100644 --- a/mapillary_tools/geotag/geotag_videos_from_exiftool.py +++ b/mapillary_tools/geotag/geotag_videos_from_exiftool.py @@ -5,70 +5,14 @@ import xml.etree.ElementTree as ET from pathlib import Path -from .. import constants, exceptions, exiftool_read, geo, types, utils -from ..exiftool_read_video import ExifToolReadVideo +from .. import constants, exceptions, exiftool_read, types from ..exiftool_runner import ExiftoolRunner -from ..gpmf import gpmf_gps_filter -from ..telemetry import GPSPoint -from .geotag_from_generic import GenericVideoExtractor, GeotagVideosFromGeneric +from .base import GeotagVideosFromGeneric +from .video_extractors.exiftool import VideoExifToolExtractor LOG = logging.getLogger(__name__) -class VideoExifToolExtractor(GenericVideoExtractor): - def __init__(self, video_path: Path, element: ET.Element): - super().__init__(video_path) - self.element = element - - def extract(self) -> types.VideoMetadataOrError: - exif = ExifToolReadVideo(ET.ElementTree(self.element)) - - make = exif.extract_make() - model = exif.extract_model() - - is_gopro = make is not None and make.upper() in ["GOPRO"] - - points = exif.extract_gps_track() - - # ExifTool has no idea if GPS is not found or found but empty - if is_gopro: - if not points: - raise exceptions.MapillaryGPXEmptyError("Empty GPS data found") - - # ExifTool (since 13.04) converts GPSSpeed for GoPro to km/h, so here we convert it back to m/s - for p in points: - if isinstance(p, GPSPoint) and p.ground_speed is not None: - p.ground_speed = p.ground_speed / 3.6 - - if isinstance(points[0], GPSPoint): - points = T.cast( - T.List[geo.Point], - gpmf_gps_filter.remove_noisy_points( - T.cast(T.List[GPSPoint], points) - ), - ) - if not points: - raise exceptions.MapillaryGPSNoiseError("GPS is too noisy") - - if not points: - raise exceptions.MapillaryVideoGPSNotFoundError( - "No GPS data found from the video" - ) - - filetype = types.FileType.GOPRO if is_gopro else types.FileType.VIDEO - - video_metadata = types.VideoMetadata( - self.video_path, - filesize=utils.get_file_size(self.video_path), - filetype=filetype, - points=points, - make=make, - model=model, - ) - - return video_metadata - - class GeotagVideosFromExifToolXML(GeotagVideosFromGeneric): def __init__( self, @@ -81,7 +25,7 @@ def __init__( def _generate_video_extractors( self, - ) -> T.Sequence[GenericVideoExtractor | types.ErrorMetadata]: + ) -> T.Sequence[VideoExifToolExtractor | types.ErrorMetadata]: rdf_description_by_path = exiftool_read.index_rdf_description_by_path( [self.xml_path] ) @@ -110,7 +54,7 @@ def _generate_video_extractors( class GeotagVideosFromExifToolRunner(GeotagVideosFromGeneric): def _generate_video_extractors( self, - ) -> T.Sequence[GenericVideoExtractor | types.ErrorMetadata]: + ) -> T.Sequence[VideoExifToolExtractor | types.ErrorMetadata]: runner = ExiftoolRunner(constants.EXIFTOOL_PATH) LOG.debug( diff --git a/mapillary_tools/geotag/geotag_videos_from_gpx.py b/mapillary_tools/geotag/geotag_videos_from_gpx.py index 809a25c7d..80139f14e 100644 --- a/mapillary_tools/geotag/geotag_videos_from_gpx.py +++ b/mapillary_tools/geotag/geotag_videos_from_gpx.py @@ -1,126 +1,17 @@ from __future__ import annotations -import dataclasses -import datetime import logging - import typing as T from pathlib import Path -from .. import geo, telemetry, types from . import options -from .geotag_from_generic import GenericVideoExtractor, GeotagVideosFromGeneric -from .geotag_images_from_gpx_file import parse_gpx -from .geotag_videos_from_video import NativeVideoExtractor +from .base import GeotagVideosFromGeneric +from .video_extractors.gpx import GPXVideoExtractor LOG = logging.getLogger(__name__) -class GPXVideoExtractor(GenericVideoExtractor): - def __init__(self, video_path: Path, gpx_path: Path): - self.video_path = video_path - self.gpx_path = gpx_path - - def extract(self) -> types.VideoMetadataOrError: - try: - gpx_tracks = parse_gpx(self.gpx_path) - except Exception as ex: - raise RuntimeError( - f"Error parsing GPX {self.gpx_path}: {ex.__class__.__name__}: {ex}" - ) - - if 1 < len(gpx_tracks): - LOG.warning( - "Found %s tracks in the GPX file %s. Will merge points in all the tracks as a single track for interpolation", - len(gpx_tracks), - self.gpx_path, - ) - - gpx_points: T.Sequence[geo.Point] = sum(gpx_tracks, []) - - native_extractor = NativeVideoExtractor(self.video_path) - - video_metadata_or_error = native_extractor.extract() - - if isinstance(video_metadata_or_error, types.ErrorMetadata): - self._rebase_times(gpx_points) - return types.VideoMetadata( - filename=video_metadata_or_error.filename, - filetype=video_metadata_or_error.filetype or types.FileType.VIDEO, - points=gpx_points, - ) - - video_metadata = video_metadata_or_error - - offset = self._synx_gpx_by_first_gps_timestamp( - gpx_points, video_metadata.points - ) - - self._rebase_times(gpx_points, offset=offset) - - return dataclasses.replace(video_metadata_or_error, points=gpx_points) - - @staticmethod - def _rebase_times(points: T.Sequence[geo.Point], offset: float = 0.0): - """ - Make point times start from 0 - """ - if points: - first_timestamp = points[0].time - for p in points: - p.time = (p.time - first_timestamp) + offset - return points - - def _synx_gpx_by_first_gps_timestamp( - self, gpx_points: T.Sequence[geo.Point], video_gps_points: T.Sequence[geo.Point] - ) -> float: - offset: float = 0.0 - - if not gpx_points: - return offset - - first_gpx_dt = datetime.datetime.fromtimestamp( - gpx_points[0].time, tz=datetime.timezone.utc - ) - LOG.info("First GPX timestamp: %s", first_gpx_dt) - - if not video_gps_points: - LOG.warning( - "Skip GPX synchronization because no GPS found in video %s", - self.video_path, - ) - return offset - - first_gps_point = video_gps_points[0] - if isinstance(first_gps_point, telemetry.GPSPoint): - if first_gps_point.epoch_time is not None: - first_gps_dt = datetime.datetime.fromtimestamp( - first_gps_point.epoch_time, tz=datetime.timezone.utc - ) - LOG.info("First GPS timestamp: %s", first_gps_dt) - offset = gpx_points[0].time - first_gps_point.epoch_time - if offset: - LOG.warning( - "Found offset between GPX %s and video GPS timestamps %s: %s seconds", - first_gpx_dt, - first_gps_dt, - offset, - ) - else: - LOG.info( - "GPX and GPS are perfectly synchronized (all starts from %s)", - first_gpx_dt, - ) - else: - LOG.warning( - "Skip GPX synchronization because no GPS epoch time found in video %s", - self.video_path, - ) - - return offset - - class GeotagVideosFromGPX(GeotagVideosFromGeneric): def __init__( self, diff --git a/mapillary_tools/geotag/geotag_videos_from_video.py b/mapillary_tools/geotag/geotag_videos_from_video.py index ec0e6d7e2..0e568324e 100644 --- a/mapillary_tools/geotag/geotag_videos_from_video.py +++ b/mapillary_tools/geotag/geotag_videos_from_video.py @@ -3,149 +3,9 @@ import typing as T from pathlib import Path -from .. import blackvue_parser, exceptions, geo, telemetry, types, utils -from ..camm import camm_parser -from ..gpmf import gpmf_gps_filter, gpmf_parser from ..types import FileType -from .geotag_from_generic import GenericVideoExtractor, GeotagVideosFromGeneric - - -class GoProVideoExtractor(GenericVideoExtractor): - def extract(self) -> types.VideoMetadataOrError: - with self.video_path.open("rb") as fp: - gopro_info = gpmf_parser.extract_gopro_info(fp) - - if gopro_info is None: - raise exceptions.MapillaryVideoGPSNotFoundError( - "No GPS data found from the video" - ) - - gps_points = gopro_info.gps - assert gps_points is not None, "must have GPS data extracted" - if not gps_points: - # Instead of raising an exception, return error metadata to tell the file type - ex: exceptions.MapillaryDescriptionError = ( - exceptions.MapillaryGPXEmptyError("Empty GPS data found") - ) - return types.describe_error_metadata( - ex, self.video_path, filetype=FileType.GOPRO - ) - - gps_points = T.cast( - T.List[telemetry.GPSPoint], gpmf_gps_filter.remove_noisy_points(gps_points) - ) - if not gps_points: - # Instead of raising an exception, return error metadata to tell the file type - ex = exceptions.MapillaryGPSNoiseError("GPS is too noisy") - return types.describe_error_metadata( - ex, self.video_path, filetype=FileType.GOPRO - ) - - video_metadata = types.VideoMetadata( - filename=self.video_path, - filesize=utils.get_file_size(self.video_path), - filetype=FileType.GOPRO, - points=T.cast(T.List[geo.Point], gps_points), - make=gopro_info.make, - model=gopro_info.model, - ) - - return video_metadata - - -class CAMMVideoExtractor(GenericVideoExtractor): - def extract(self) -> types.VideoMetadataOrError: - with self.video_path.open("rb") as fp: - camm_info = camm_parser.extract_camm_info(fp) - - if camm_info is None: - raise exceptions.MapillaryVideoGPSNotFoundError( - "No GPS data found from the video" - ) - - if not camm_info.gps and not camm_info.mini_gps: - # Instead of raising an exception, return error metadata to tell the file type - ex: exceptions.MapillaryDescriptionError = ( - exceptions.MapillaryGPXEmptyError("Empty GPS data found") - ) - return types.describe_error_metadata( - ex, self.video_path, filetype=FileType.CAMM - ) - - return types.VideoMetadata( - filename=self.video_path, - filesize=utils.get_file_size(self.video_path), - filetype=FileType.CAMM, - points=T.cast(T.List[geo.Point], camm_info.gps or camm_info.mini_gps), - make=camm_info.make, - model=camm_info.model, - ) - - -class BlackVueVideoExtractor(GenericVideoExtractor): - def extract(self) -> types.VideoMetadataOrError: - with self.video_path.open("rb") as fp: - blackvue_info = blackvue_parser.extract_blackvue_info(fp) - - if blackvue_info is None: - raise exceptions.MapillaryVideoGPSNotFoundError( - "No GPS data found from the video" - ) - - if not blackvue_info.gps: - # Instead of raising an exception, return error metadata to tell the file type - ex: exceptions.MapillaryDescriptionError = ( - exceptions.MapillaryGPXEmptyError("Empty GPS data found") - ) - return types.describe_error_metadata( - ex, self.video_path, filetype=FileType.BLACKVUE - ) - - video_metadata = types.VideoMetadata( - filename=self.video_path, - filesize=utils.get_file_size(self.video_path), - filetype=FileType.BLACKVUE, - points=blackvue_info.gps or [], - make=blackvue_info.make, - model=blackvue_info.model, - ) - - return video_metadata - - -class NativeVideoExtractor(GenericVideoExtractor): - def __init__(self, video_path: Path, filetypes: set[FileType] | None = None): - super().__init__(video_path) - self.filetypes = filetypes - - def extract(self) -> types.VideoMetadataOrError: - ft = self.filetypes - extractor: GenericVideoExtractor - - if ft is None or FileType.VIDEO in ft or FileType.GOPRO in ft: - extractor = GoProVideoExtractor(self.video_path) - try: - return extractor.extract() - except exceptions.MapillaryVideoGPSNotFoundError: - pass - - if ft is None or FileType.VIDEO in ft or FileType.CAMM in ft: - extractor = CAMMVideoExtractor(self.video_path) - try: - return extractor.extract() - except exceptions.MapillaryVideoGPSNotFoundError: - pass - - if ft is None or FileType.VIDEO in ft or FileType.BLACKVUE in ft: - extractor = BlackVueVideoExtractor(self.video_path) - try: - return extractor.extract() - except exceptions.MapillaryVideoGPSNotFoundError: - pass - - raise exceptions.MapillaryVideoGPSNotFoundError( - "No GPS data found from the video" - ) +from .base import GeotagVideosFromGeneric +from .video_extractors.native import NativeVideoExtractor class GeotagVideosFromVideo(GeotagVideosFromGeneric): @@ -158,7 +18,7 @@ def __init__( super().__init__(video_paths, num_processes=num_processes) self.filetypes = filetypes - def _generate_video_extractors(self) -> T.Sequence[GenericVideoExtractor]: + def _generate_video_extractors(self) -> T.Sequence[NativeVideoExtractor]: return [ NativeVideoExtractor(path, filetypes=self.filetypes) for path in self.video_paths diff --git a/mapillary_tools/geotag/image_extractors/base.py b/mapillary_tools/geotag/image_extractors/base.py new file mode 100644 index 000000000..5d65d6cd2 --- /dev/null +++ b/mapillary_tools/geotag/image_extractors/base.py @@ -0,0 +1,18 @@ +from __future__ import annotations + +import abc +from pathlib import Path + +from ... import types + + +class BaseImageExtractor(abc.ABC): + """ + Extracts metadata from an image file. + """ + + def __init__(self, image_path: Path): + self.image_path = image_path + + def extract(self) -> types.ImageMetadataOrError: + raise NotImplementedError diff --git a/mapillary_tools/geotag/image_extractors/exif.py b/mapillary_tools/geotag/image_extractors/exif.py new file mode 100644 index 000000000..ddd3fa191 --- /dev/null +++ b/mapillary_tools/geotag/image_extractors/exif.py @@ -0,0 +1,53 @@ +from __future__ import annotations + +import contextlib +import typing as T +from pathlib import Path + +from ... import exceptions, exif_read, geo, types, utils +from .base import BaseImageExtractor + + +class ImageEXIFExtractor(BaseImageExtractor): + def __init__(self, image_path: Path, skip_lonlat_error: bool = False): + super().__init__(image_path) + self.skip_lonlat_error = skip_lonlat_error + + @contextlib.contextmanager + def _exif_context(self) -> T.Generator[exif_read.ExifReadABC, None, None]: + with self.image_path.open("rb") as fp: + yield exif_read.ExifRead(fp) + + def extract(self) -> types.ImageMetadata: + with self._exif_context() as exif: + lonlat = exif.extract_lon_lat() + if lonlat is None: + if not self.skip_lonlat_error: + raise exceptions.MapillaryGeoTaggingError( + "Unable to extract GPS Longitude or GPS Latitude from the image" + ) + lonlat = (0.0, 0.0) + lon, lat = lonlat + + capture_time = exif.extract_capture_time() + if capture_time is None: + raise exceptions.MapillaryGeoTaggingError( + "Unable to extract timestamp from the image" + ) + + image_metadata = types.ImageMetadata( + filename=self.image_path, + filesize=utils.get_file_size(self.image_path), + time=geo.as_unix_time(capture_time), + lat=lat, + lon=lon, + alt=exif.extract_altitude(), + angle=exif.extract_direction(), + width=exif.extract_width(), + height=exif.extract_height(), + MAPOrientation=exif.extract_orientation(), + MAPDeviceMake=exif.extract_make(), + MAPDeviceModel=exif.extract_model(), + ) + + return image_metadata diff --git a/mapillary_tools/geotag/image_extractors/exiftool.py b/mapillary_tools/geotag/image_extractors/exiftool.py new file mode 100644 index 000000000..a0fbb5c82 --- /dev/null +++ b/mapillary_tools/geotag/image_extractors/exiftool.py @@ -0,0 +1,18 @@ +from __future__ import annotations + +import contextlib +import xml.etree.ElementTree as ET +from pathlib import Path + +from ... import exiftool_read +from .exif import ImageEXIFExtractor + + +class ImageExifToolExtractor(ImageEXIFExtractor): + def __init__(self, image_path: Path, element: ET.Element): + super().__init__(image_path) + self.element = element + + @contextlib.contextmanager + def _exif_context(self): + yield exiftool_read.ExifToolRead(ET.ElementTree(self.element)) diff --git a/mapillary_tools/geotag/utils.py b/mapillary_tools/geotag/utils.py new file mode 100644 index 000000000..103667a01 --- /dev/null +++ b/mapillary_tools/geotag/utils.py @@ -0,0 +1,32 @@ +import typing as T +from pathlib import Path + +import gpxpy + +from .. import geo + +Track = T.List[geo.Point] + + +def parse_gpx(gpx_file: Path) -> list[Track]: + with gpx_file.open("r") as f: + gpx = gpxpy.parse(f) + + tracks: list[Track] = [] + + for track in gpx.tracks: + for segment in track.segments: + tracks.append([]) + for point in segment.points: + if point.time is not None: + tracks[-1].append( + geo.Point( + time=geo.as_unix_time(point.time), + lat=point.latitude, + lon=point.longitude, + alt=point.elevation, + angle=None, + ) + ) + + return tracks diff --git a/mapillary_tools/geotag/video_extractors/base.py b/mapillary_tools/geotag/video_extractors/base.py new file mode 100644 index 000000000..1a2e03b3c --- /dev/null +++ b/mapillary_tools/geotag/video_extractors/base.py @@ -0,0 +1,18 @@ +from __future__ import annotations + +import abc +from pathlib import Path + +from ... import types + + +class BaseVideoExtractor(abc.ABC): + """ + Extracts metadata from a video file. + """ + + def __init__(self, video_path: Path): + self.video_path = video_path + + def extract(self) -> types.VideoMetadataOrError: + raise NotImplementedError diff --git a/mapillary_tools/geotag/video_extractors/exiftool.py b/mapillary_tools/geotag/video_extractors/exiftool.py new file mode 100644 index 000000000..7a4f49101 --- /dev/null +++ b/mapillary_tools/geotag/video_extractors/exiftool.py @@ -0,0 +1,63 @@ +from __future__ import annotations + +import typing as T +from pathlib import Path +from xml.etree import ElementTree as ET + +from ... import exceptions, exiftool_read_video, geo, telemetry, types, utils +from ...gpmf import gpmf_gps_filter +from .base import BaseVideoExtractor + + +class VideoExifToolExtractor(BaseVideoExtractor): + def __init__(self, video_path: Path, element: ET.Element): + super().__init__(video_path) + self.element = element + + def extract(self) -> types.VideoMetadataOrError: + exif = exiftool_read_video.ExifToolReadVideo(ET.ElementTree(self.element)) + + make = exif.extract_make() + model = exif.extract_model() + + is_gopro = make is not None and make.upper() in ["GOPRO"] + + points = exif.extract_gps_track() + + # ExifTool has no idea if GPS is not found or found but empty + if is_gopro: + if not points: + raise exceptions.MapillaryGPXEmptyError("Empty GPS data found") + + # ExifTool (since 13.04) converts GPSSpeed for GoPro to km/h, so here we convert it back to m/s + for p in points: + if isinstance(p, telemetry.GPSPoint) and p.ground_speed is not None: + p.ground_speed = p.ground_speed / 3.6 + + if isinstance(points[0], telemetry.GPSPoint): + points = T.cast( + T.List[geo.Point], + gpmf_gps_filter.remove_noisy_points( + T.cast(T.List[telemetry.GPSPoint], points) + ), + ) + if not points: + raise exceptions.MapillaryGPSNoiseError("GPS is too noisy") + + if not points: + raise exceptions.MapillaryVideoGPSNotFoundError( + "No GPS data found from the video" + ) + + filetype = types.FileType.GOPRO if is_gopro else types.FileType.VIDEO + + video_metadata = types.VideoMetadata( + self.video_path, + filesize=utils.get_file_size(self.video_path), + filetype=filetype, + points=points, + make=make, + model=model, + ) + + return video_metadata diff --git a/mapillary_tools/geotag/video_extractors/gpx.py b/mapillary_tools/geotag/video_extractors/gpx.py new file mode 100644 index 000000000..bf97dd4bc --- /dev/null +++ b/mapillary_tools/geotag/video_extractors/gpx.py @@ -0,0 +1,119 @@ +from __future__ import annotations + +import dataclasses +import datetime +import logging +import typing as T +from pathlib import Path + +from ... import geo, telemetry, types +from ..utils import parse_gpx +from .base import BaseVideoExtractor +from .native import NativeVideoExtractor + + +LOG = logging.getLogger(__name__) + + +class GPXVideoExtractor(BaseVideoExtractor): + def __init__(self, video_path: Path, gpx_path: Path): + self.video_path = video_path + self.gpx_path = gpx_path + + def extract(self) -> types.VideoMetadataOrError: + try: + gpx_tracks = parse_gpx(self.gpx_path) + except Exception as ex: + raise RuntimeError( + f"Error parsing GPX {self.gpx_path}: {ex.__class__.__name__}: {ex}" + ) + + if 1 < len(gpx_tracks): + LOG.warning( + "Found %s tracks in the GPX file %s. Will merge points in all the tracks as a single track for interpolation", + len(gpx_tracks), + self.gpx_path, + ) + + gpx_points: T.Sequence[geo.Point] = sum(gpx_tracks, []) + + native_extractor = NativeVideoExtractor(self.video_path) + + video_metadata_or_error = native_extractor.extract() + + if isinstance(video_metadata_or_error, types.ErrorMetadata): + self._rebase_times(gpx_points) + return types.VideoMetadata( + filename=video_metadata_or_error.filename, + filetype=video_metadata_or_error.filetype or types.FileType.VIDEO, + points=gpx_points, + ) + + video_metadata = video_metadata_or_error + + offset = self._synx_gpx_by_first_gps_timestamp( + gpx_points, video_metadata.points + ) + + self._rebase_times(gpx_points, offset=offset) + + return dataclasses.replace(video_metadata_or_error, points=gpx_points) + + @staticmethod + def _rebase_times(points: T.Sequence[geo.Point], offset: float = 0.0): + """ + Make point times start from 0 + """ + if points: + first_timestamp = points[0].time + for p in points: + p.time = (p.time - first_timestamp) + offset + return points + + def _synx_gpx_by_first_gps_timestamp( + self, gpx_points: T.Sequence[geo.Point], video_gps_points: T.Sequence[geo.Point] + ) -> float: + offset: float = 0.0 + + if not gpx_points: + return offset + + first_gpx_dt = datetime.datetime.fromtimestamp( + gpx_points[0].time, tz=datetime.timezone.utc + ) + LOG.info("First GPX timestamp: %s", first_gpx_dt) + + if not video_gps_points: + LOG.warning( + "Skip GPX synchronization because no GPS found in video %s", + self.video_path, + ) + return offset + + first_gps_point = video_gps_points[0] + if isinstance(first_gps_point, telemetry.GPSPoint): + if first_gps_point.epoch_time is not None: + first_gps_dt = datetime.datetime.fromtimestamp( + first_gps_point.epoch_time, tz=datetime.timezone.utc + ) + LOG.info("First GPS timestamp: %s", first_gps_dt) + offset = gpx_points[0].time - first_gps_point.epoch_time + if offset: + LOG.warning( + "Found offset between GPX %s and video GPS timestamps %s: %s seconds", + first_gpx_dt, + first_gps_dt, + offset, + ) + else: + LOG.info( + "GPX and GPS are perfectly synchronized (all starts from %s)", + first_gpx_dt, + ) + else: + LOG.warning( + "Skip GPX synchronization because no GPS epoch time found in video %s", + self.video_path, + ) + + return offset diff --git a/mapillary_tools/geotag/video_extractors/native.py b/mapillary_tools/geotag/video_extractors/native.py new file mode 100644 index 000000000..af6250b4f --- /dev/null +++ b/mapillary_tools/geotag/video_extractors/native.py @@ -0,0 +1,147 @@ +from __future__ import annotations + +import typing as T +from pathlib import Path + +from ... import blackvue_parser, exceptions, geo, telemetry, types, utils +from ...camm import camm_parser +from ...gpmf import gpmf_gps_filter, gpmf_parser +from .base import BaseVideoExtractor + + +class GoProVideoExtractor(BaseVideoExtractor): + def extract(self) -> types.VideoMetadataOrError: + with self.video_path.open("rb") as fp: + gopro_info = gpmf_parser.extract_gopro_info(fp) + + if gopro_info is None: + raise exceptions.MapillaryVideoGPSNotFoundError( + "No GPS data found from the video" + ) + + gps_points = gopro_info.gps + assert gps_points is not None, "must have GPS data extracted" + if not gps_points: + # Instead of raising an exception, return error metadata to tell the file type + ex: exceptions.MapillaryDescriptionError = ( + exceptions.MapillaryGPXEmptyError("Empty GPS data found") + ) + return types.describe_error_metadata( + ex, self.video_path, filetype=types.FileType.GOPRO + ) + + gps_points = T.cast( + T.List[telemetry.GPSPoint], gpmf_gps_filter.remove_noisy_points(gps_points) + ) + if not gps_points: + # Instead of raising an exception, return error metadata to tell the file type + ex = exceptions.MapillaryGPSNoiseError("GPS is too noisy") + return types.describe_error_metadata( + ex, self.video_path, filetype=types.FileType.GOPRO + ) + + video_metadata = types.VideoMetadata( + filename=self.video_path, + filesize=utils.get_file_size(self.video_path), + filetype=types.FileType.GOPRO, + points=T.cast(T.List[geo.Point], gps_points), + make=gopro_info.make, + model=gopro_info.model, + ) + + return video_metadata + + +class CAMMVideoExtractor(BaseVideoExtractor): + def extract(self) -> types.VideoMetadataOrError: + with self.video_path.open("rb") as fp: + camm_info = camm_parser.extract_camm_info(fp) + + if camm_info is None: + raise exceptions.MapillaryVideoGPSNotFoundError( + "No GPS data found from the video" + ) + + if not camm_info.gps and not camm_info.mini_gps: + # Instead of raising an exception, return error metadata to tell the file type + ex: exceptions.MapillaryDescriptionError = ( + exceptions.MapillaryGPXEmptyError("Empty GPS data found") + ) + return types.describe_error_metadata( + ex, self.video_path, filetype=types.FileType.CAMM + ) + + return types.VideoMetadata( + filename=self.video_path, + filesize=utils.get_file_size(self.video_path), + filetype=types.FileType.CAMM, + points=T.cast(T.List[geo.Point], camm_info.gps or camm_info.mini_gps), + make=camm_info.make, + model=camm_info.model, + ) + + +class BlackVueVideoExtractor(BaseVideoExtractor): + def extract(self) -> types.VideoMetadataOrError: + with self.video_path.open("rb") as fp: + blackvue_info = blackvue_parser.extract_blackvue_info(fp) + + if blackvue_info is None: + raise exceptions.MapillaryVideoGPSNotFoundError( + "No GPS data found from the video" + ) + + if not blackvue_info.gps: + # Instead of raising an exception, return error metadata to tell the file type + ex: exceptions.MapillaryDescriptionError = ( + exceptions.MapillaryGPXEmptyError("Empty GPS data found") + ) + return types.describe_error_metadata( + ex, self.video_path, filetype=types.FileType.BLACKVUE + ) + + video_metadata = types.VideoMetadata( + filename=self.video_path, + filesize=utils.get_file_size(self.video_path), + filetype=types.FileType.BLACKVUE, + points=blackvue_info.gps or [], + make=blackvue_info.make, + model=blackvue_info.model, + ) + + return video_metadata + + +class NativeVideoExtractor(BaseVideoExtractor): + def __init__(self, video_path: Path, filetypes: set[types.FileType] | None = None): + super().__init__(video_path) + self.filetypes = filetypes + + def extract(self) -> types.VideoMetadataOrError: + ft = self.filetypes + extractor: BaseVideoExtractor + + if ft is None or types.FileType.VIDEO in ft or types.FileType.GOPRO in ft: + extractor = GoProVideoExtractor(self.video_path) + try: + return extractor.extract() + except exceptions.MapillaryVideoGPSNotFoundError: + pass + + if ft is None or types.FileType.VIDEO in ft or types.FileType.CAMM in ft: + extractor = CAMMVideoExtractor(self.video_path) + try: + return extractor.extract() + except exceptions.MapillaryVideoGPSNotFoundError: + pass + + if ft is None or types.FileType.VIDEO in ft or types.FileType.BLACKVUE in ft: + extractor = BlackVueVideoExtractor(self.video_path) + try: + return extractor.extract() + except exceptions.MapillaryVideoGPSNotFoundError: + pass + + raise exceptions.MapillaryVideoGPSNotFoundError( + "No GPS data found from the video" + ) From df823e97bf3956130dc87b281b5a6ea623fa5154 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Fri, 4 Apr 2025 17:03:55 -0700 Subject: [PATCH 04/10] add override and fix a bug of overriding wrong method --- mapillary_tools/geotag/geotag_images_from_exif.py | 9 +++++++++ mapillary_tools/geotag/geotag_images_from_exiftool.py | 9 +++++++++ mapillary_tools/geotag/geotag_images_from_gpx.py | 8 ++++++++ mapillary_tools/geotag/geotag_images_from_video.py | 7 +++++++ mapillary_tools/geotag/geotag_videos_from_exiftool.py | 8 ++++++++ mapillary_tools/geotag/geotag_videos_from_gpx.py | 9 ++++++++- mapillary_tools/geotag/geotag_videos_from_video.py | 7 +++++++ mapillary_tools/geotag/image_extractors/exif.py | 7 +++++++ mapillary_tools/geotag/video_extractors/exiftool.py | 7 +++++++ mapillary_tools/geotag/video_extractors/gpx.py | 7 +++++++ mapillary_tools/geotag/video_extractors/native.py | 10 ++++++++++ 11 files changed, 87 insertions(+), 1 deletion(-) diff --git a/mapillary_tools/geotag/geotag_images_from_exif.py b/mapillary_tools/geotag/geotag_images_from_exif.py index b62baeebc..298ba02f4 100644 --- a/mapillary_tools/geotag/geotag_images_from_exif.py +++ b/mapillary_tools/geotag/geotag_images_from_exif.py @@ -1,6 +1,14 @@ +from __future__ import annotations + import logging +import sys import typing as T +if sys.version_info >= (3, 12): + from typing import override +else: + from typing_extensions import override + from .base import GeotagImagesFromGeneric from .image_extractors.exif import ImageEXIFExtractor @@ -8,5 +16,6 @@ class GeotagImagesFromEXIF(GeotagImagesFromGeneric): + @override def _generate_image_extractors(self) -> T.Sequence[ImageEXIFExtractor]: return [ImageEXIFExtractor(path) for path in self.image_paths] diff --git a/mapillary_tools/geotag/geotag_images_from_exiftool.py b/mapillary_tools/geotag/geotag_images_from_exiftool.py index 28ab5cbab..b47130ae5 100644 --- a/mapillary_tools/geotag/geotag_images_from_exiftool.py +++ b/mapillary_tools/geotag/geotag_images_from_exiftool.py @@ -1,10 +1,16 @@ from __future__ import annotations import logging +import sys import typing as T import xml.etree.ElementTree as ET from pathlib import Path +if sys.version_info >= (3, 12): + from typing import override +else: + from typing_extensions import override + from .. import constants, exceptions, exiftool_read, types, utils from ..exiftool_runner import ExiftoolRunner from .base import GeotagImagesFromGeneric @@ -25,6 +31,7 @@ def __init__( self.xml_path = xml_path super().__init__(image_paths=image_paths, num_processes=num_processes) + @override def _generate_image_extractors( self, ) -> T.Sequence[ImageExifToolExtractor | types.ErrorMetadata]: @@ -54,6 +61,7 @@ def _generate_image_extractors( class GeotagImagesFromExifToolRunner(GeotagImagesFromGeneric): + @override def _generate_image_extractors( self, ) -> T.Sequence[ImageExifToolExtractor | types.ErrorMetadata]: @@ -135,6 +143,7 @@ def geotag_samples(self) -> list[types.ImageMetadataOrError]: return sample_metadata_or_errors + @override def to_description(self) -> list[types.ImageMetadataOrError]: sample_metadata_or_errors = self.geotag_samples() diff --git a/mapillary_tools/geotag/geotag_images_from_gpx.py b/mapillary_tools/geotag/geotag_images_from_gpx.py index ce81cf9c8..a267015bf 100644 --- a/mapillary_tools/geotag/geotag_images_from_gpx.py +++ b/mapillary_tools/geotag/geotag_images_from_gpx.py @@ -2,9 +2,15 @@ import dataclasses import logging +import sys import typing as T from pathlib import Path +if sys.version_info >= (3, 12): + from typing import override +else: + from typing_extensions import override + from .. import exceptions, geo, types from .base import GeotagImagesFromGeneric from .geotag_images_from_exif import ImageEXIFExtractor @@ -73,12 +79,14 @@ def _interpolate_image_metadata_along( time=interpolated.time, ) + @override def _generate_image_extractors(self) -> T.Sequence[ImageEXIFExtractor]: return [ ImageEXIFExtractor(path, skip_lonlat_error=True) for path in self.image_paths ] + @override def to_description(self) -> list[types.ImageMetadataOrError]: final_metadatas: list[types.ImageMetadataOrError] = [] diff --git a/mapillary_tools/geotag/geotag_images_from_video.py b/mapillary_tools/geotag/geotag_images_from_video.py index 820f1a0ae..f5e0800f7 100644 --- a/mapillary_tools/geotag/geotag_images_from_video.py +++ b/mapillary_tools/geotag/geotag_images_from_video.py @@ -1,9 +1,15 @@ from __future__ import annotations import logging +import sys import typing as T from pathlib import Path +if sys.version_info >= (3, 12): + from typing import override +else: + from typing_extensions import override + from .. import types, utils from .base import GeotagImagesFromGeneric from .geotag_images_from_gpx import GeotagImagesFromGPX @@ -24,6 +30,7 @@ def __init__( self.video_metadatas = video_metadatas self.offset_time = offset_time + @override def to_description(self) -> list[types.ImageMetadataOrError]: # Will return this list final_image_metadatas: list[types.ImageMetadataOrError] = [] diff --git a/mapillary_tools/geotag/geotag_videos_from_exiftool.py b/mapillary_tools/geotag/geotag_videos_from_exiftool.py index 1a405e87c..336dcd3e9 100644 --- a/mapillary_tools/geotag/geotag_videos_from_exiftool.py +++ b/mapillary_tools/geotag/geotag_videos_from_exiftool.py @@ -1,10 +1,16 @@ from __future__ import annotations import logging +import sys import typing as T import xml.etree.ElementTree as ET from pathlib import Path +if sys.version_info >= (3, 12): + from typing import override +else: + from typing_extensions import override + from .. import constants, exceptions, exiftool_read, types from ..exiftool_runner import ExiftoolRunner from .base import GeotagVideosFromGeneric @@ -23,6 +29,7 @@ def __init__( super().__init__(video_paths, num_processes=num_processes) self.xml_path = xml_path + @override def _generate_video_extractors( self, ) -> T.Sequence[VideoExifToolExtractor | types.ErrorMetadata]: @@ -52,6 +59,7 @@ def _generate_video_extractors( class GeotagVideosFromExifToolRunner(GeotagVideosFromGeneric): + @override def _generate_video_extractors( self, ) -> T.Sequence[VideoExifToolExtractor | types.ErrorMetadata]: diff --git a/mapillary_tools/geotag/geotag_videos_from_gpx.py b/mapillary_tools/geotag/geotag_videos_from_gpx.py index 80139f14e..c624055b6 100644 --- a/mapillary_tools/geotag/geotag_videos_from_gpx.py +++ b/mapillary_tools/geotag/geotag_videos_from_gpx.py @@ -1,9 +1,15 @@ from __future__ import annotations import logging +import sys import typing as T from pathlib import Path +if sys.version_info >= (3, 12): + from typing import override +else: + from typing_extensions import override + from . import options from .base import GeotagVideosFromGeneric from .video_extractors.gpx import GPXVideoExtractor @@ -24,7 +30,8 @@ def __init__( option = options.SourcePathOption(pattern="%f.gpx") self.option = option - def _generate_image_extractors(self) -> T.Sequence[GPXVideoExtractor]: + @override + def _generate_video_extractors(self) -> T.Sequence[GPXVideoExtractor]: return [ GPXVideoExtractor(video_path, self.option.resolve(video_path)) for video_path in self.video_paths diff --git a/mapillary_tools/geotag/geotag_videos_from_video.py b/mapillary_tools/geotag/geotag_videos_from_video.py index 0e568324e..60f15dd19 100644 --- a/mapillary_tools/geotag/geotag_videos_from_video.py +++ b/mapillary_tools/geotag/geotag_videos_from_video.py @@ -1,8 +1,14 @@ from __future__ import annotations +import sys import typing as T from pathlib import Path +if sys.version_info >= (3, 12): + from typing import override +else: + from typing_extensions import override + from ..types import FileType from .base import GeotagVideosFromGeneric from .video_extractors.native import NativeVideoExtractor @@ -18,6 +24,7 @@ def __init__( super().__init__(video_paths, num_processes=num_processes) self.filetypes = filetypes + @override def _generate_video_extractors(self) -> T.Sequence[NativeVideoExtractor]: return [ NativeVideoExtractor(path, filetypes=self.filetypes) diff --git a/mapillary_tools/geotag/image_extractors/exif.py b/mapillary_tools/geotag/image_extractors/exif.py index ddd3fa191..f78b0bdaf 100644 --- a/mapillary_tools/geotag/image_extractors/exif.py +++ b/mapillary_tools/geotag/image_extractors/exif.py @@ -1,9 +1,15 @@ from __future__ import annotations import contextlib +import sys import typing as T from pathlib import Path +if sys.version_info >= (3, 12): + from typing import override +else: + from typing_extensions import override + from ... import exceptions, exif_read, geo, types, utils from .base import BaseImageExtractor @@ -18,6 +24,7 @@ def _exif_context(self) -> T.Generator[exif_read.ExifReadABC, None, None]: with self.image_path.open("rb") as fp: yield exif_read.ExifRead(fp) + @override def extract(self) -> types.ImageMetadata: with self._exif_context() as exif: lonlat = exif.extract_lon_lat() diff --git a/mapillary_tools/geotag/video_extractors/exiftool.py b/mapillary_tools/geotag/video_extractors/exiftool.py index 7a4f49101..bb51863a5 100644 --- a/mapillary_tools/geotag/video_extractors/exiftool.py +++ b/mapillary_tools/geotag/video_extractors/exiftool.py @@ -1,9 +1,15 @@ from __future__ import annotations +import sys import typing as T from pathlib import Path from xml.etree import ElementTree as ET +if sys.version_info >= (3, 12): + from typing import override +else: + from typing_extensions import override + from ... import exceptions, exiftool_read_video, geo, telemetry, types, utils from ...gpmf import gpmf_gps_filter from .base import BaseVideoExtractor @@ -14,6 +20,7 @@ def __init__(self, video_path: Path, element: ET.Element): super().__init__(video_path) self.element = element + @override def extract(self) -> types.VideoMetadataOrError: exif = exiftool_read_video.ExifToolReadVideo(ET.ElementTree(self.element)) diff --git a/mapillary_tools/geotag/video_extractors/gpx.py b/mapillary_tools/geotag/video_extractors/gpx.py index bf97dd4bc..560fa4294 100644 --- a/mapillary_tools/geotag/video_extractors/gpx.py +++ b/mapillary_tools/geotag/video_extractors/gpx.py @@ -3,9 +3,15 @@ import dataclasses import datetime import logging +import sys import typing as T from pathlib import Path +if sys.version_info >= (3, 12): + from typing import override +else: + from typing_extensions import override + from ... import geo, telemetry, types from ..utils import parse_gpx from .base import BaseVideoExtractor @@ -20,6 +26,7 @@ def __init__(self, video_path: Path, gpx_path: Path): self.video_path = video_path self.gpx_path = gpx_path + @override def extract(self) -> types.VideoMetadataOrError: try: gpx_tracks = parse_gpx(self.gpx_path) diff --git a/mapillary_tools/geotag/video_extractors/native.py b/mapillary_tools/geotag/video_extractors/native.py index af6250b4f..b30d3160e 100644 --- a/mapillary_tools/geotag/video_extractors/native.py +++ b/mapillary_tools/geotag/video_extractors/native.py @@ -1,8 +1,14 @@ from __future__ import annotations +import sys import typing as T from pathlib import Path +if sys.version_info >= (3, 12): + from typing import override +else: + from typing_extensions import override + from ... import blackvue_parser, exceptions, geo, telemetry, types, utils from ...camm import camm_parser from ...gpmf import gpmf_gps_filter, gpmf_parser @@ -10,6 +16,7 @@ class GoProVideoExtractor(BaseVideoExtractor): + @override def extract(self) -> types.VideoMetadataOrError: with self.video_path.open("rb") as fp: gopro_info = gpmf_parser.extract_gopro_info(fp) @@ -53,6 +60,7 @@ def extract(self) -> types.VideoMetadataOrError: class CAMMVideoExtractor(BaseVideoExtractor): + @override def extract(self) -> types.VideoMetadataOrError: with self.video_path.open("rb") as fp: camm_info = camm_parser.extract_camm_info(fp) @@ -82,6 +90,7 @@ def extract(self) -> types.VideoMetadataOrError: class BlackVueVideoExtractor(BaseVideoExtractor): + @override def extract(self) -> types.VideoMetadataOrError: with self.video_path.open("rb") as fp: blackvue_info = blackvue_parser.extract_blackvue_info(fp) @@ -117,6 +126,7 @@ def __init__(self, video_path: Path, filetypes: set[types.FileType] | None = Non super().__init__(video_path) self.filetypes = filetypes + @override def extract(self) -> types.VideoMetadataOrError: ft = self.filetypes extractor: BaseVideoExtractor From e05b556fc82be28240d1da12bc1bc7f6fb94d0e5 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Fri, 4 Apr 2025 18:23:03 -0700 Subject: [PATCH 05/10] move image_paths to to_description() --- mapillary_tools/geotag/base.py | 30 ++++++------- mapillary_tools/geotag/factory.py | 42 ++++++++---------- .../geotag/geotag_images_from_exif.py | 7 ++- .../geotag/geotag_images_from_exiftool.py | 43 +++++++++---------- .../geotag/geotag_images_from_gpx.py | 20 +++++---- .../geotag/geotag_images_from_gpx_file.py | 3 -- .../geotag/geotag_images_from_nmea_file.py | 3 -- .../geotag/geotag_images_from_video.py | 20 ++++----- .../geotag/geotag_videos_from_exiftool.py | 15 +++---- .../geotag/geotag_videos_from_gpx.py | 9 ++-- .../geotag/geotag_videos_from_video.py | 10 ++--- mapillary_tools/sample_video.py | 4 +- 12 files changed, 95 insertions(+), 111 deletions(-) diff --git a/mapillary_tools/geotag/base.py b/mapillary_tools/geotag/base.py index 37dfdefb4..c606707a4 100644 --- a/mapillary_tools/geotag/base.py +++ b/mapillary_tools/geotag/base.py @@ -23,16 +23,15 @@ class GeotagImagesFromGeneric(abc.ABC, T.Generic[TImageExtractor]): Extracts metadata from a list of image files with multiprocessing. """ - def __init__( - self, image_paths: T.Sequence[Path], num_processes: int | None = None - ) -> None: - self.image_paths = image_paths + def __init__(self, num_processes: int | None = None) -> None: self.num_processes = num_processes - def to_description(self) -> list[types.ImageMetadataOrError]: - extractor_or_errors = self._generate_image_extractors() + def to_description( + self, image_paths: T.Sequence[Path] + ) -> list[types.ImageMetadataOrError]: + extractor_or_errors = self._generate_image_extractors(image_paths) - assert len(extractor_or_errors) == len(self.image_paths) + assert len(extractor_or_errors) == len(image_paths) extractors, error_metadatas = types.separate_errors(extractor_or_errors) @@ -55,7 +54,7 @@ def to_description(self) -> list[types.ImageMetadataOrError]: return results + error_metadatas def _generate_image_extractors( - self, + self, image_paths: T.Sequence[Path] ) -> T.Sequence[TImageExtractor | types.ErrorMetadata]: raise NotImplementedError @@ -86,16 +85,15 @@ class GeotagVideosFromGeneric(abc.ABC, T.Generic[TVideoExtractor]): Extracts metadata from a list of video files with multiprocessing. """ - def __init__( - self, video_paths: T.Sequence[Path], num_processes: int | None = None - ) -> None: - self.video_paths = video_paths + def __init__(self, num_processes: int | None = None) -> None: self.num_processes = num_processes - def to_description(self) -> list[types.VideoMetadataOrError]: - extractor_or_errors = self._generate_video_extractors() + def to_description( + self, video_paths: T.Sequence[Path] + ) -> list[types.VideoMetadataOrError]: + extractor_or_errors = self._generate_video_extractors(video_paths) - assert len(extractor_or_errors) == len(self.video_paths) + assert len(extractor_or_errors) == len(video_paths) extractors, error_metadatas = types.separate_errors(extractor_or_errors) @@ -118,7 +116,7 @@ def to_description(self) -> list[types.VideoMetadataOrError]: return results + error_metadatas def _generate_video_extractors( - self, + self, video_paths: T.Sequence[Path] ) -> T.Sequence[TVideoExtractor | types.ErrorMetadata]: raise NotImplementedError diff --git a/mapillary_tools/geotag/factory.py b/mapillary_tools/geotag/factory.py index a8536c667..ca505b59a 100644 --- a/mapillary_tools/geotag/factory.py +++ b/mapillary_tools/geotag/factory.py @@ -157,16 +157,16 @@ def _geotag_images( if option.source is SourceType.NATIVE: geotag = geotag_images_from_exif.GeotagImagesFromEXIF( - image_paths, num_processes=option.num_processes + num_processes=option.num_processes ) - return geotag.to_description() + return geotag.to_description(image_paths) if option.source is SourceType.EXIFTOOL_RUNTIME: geotag = geotag_images_from_exiftool.GeotagImagesFromExifToolRunner( - image_paths, num_processes=option.num_processes + num_processes=option.num_processes ) try: - return geotag.to_description() + return geotag.to_description(image_paths) except exceptions.MapillaryExiftoolNotFoundError as ex: LOG.warning('Skip "%s" because: %s', option.source.value, ex) return [] @@ -175,38 +175,35 @@ def _geotag_images( # This is to ensure 'video_process --geotag={"source": "exiftool_xml", "source_path": "/tmp/xml_path"}' # to work geotag = geotag_images_from_exiftool.GeotagImagesFromExifToolWithSamples( - image_paths, xml_path=_ensure_source_path(option), num_processes=option.num_processes, ) - return geotag.to_description() + return geotag.to_description(image_paths) elif option.source is SourceType.GPX: geotag = geotag_images_from_gpx_file.GeotagImagesFromGPXFile( - image_paths, source_path=_ensure_source_path(option), use_gpx_start_time=interpolation.use_gpx_start_time, offset_time=interpolation.offset_time, num_processes=option.num_processes, ) - return geotag.to_description() + return geotag.to_description(image_paths) elif option.source is SourceType.NMEA: geotag = geotag_images_from_nmea_file.GeotagImagesFromNMEAFile( - image_paths, source_path=_ensure_source_path(option), use_gpx_start_time=interpolation.use_gpx_start_time, offset_time=interpolation.offset_time, num_processes=option.num_processes, ) - return geotag.to_description() + return geotag.to_description(image_paths) elif option.source is SourceType.EXIF: geotag = geotag_images_from_exif.GeotagImagesFromEXIF( - image_paths, num_processes=option.num_processes + num_processes=option.num_processes ) - return geotag.to_description() + return geotag.to_description(image_paths) elif option.source in [ SourceType.GOPRO, @@ -224,17 +221,15 @@ def _geotag_images( ) video_paths_with_image_samples = list(image_samples_by_video_path.keys()) video_metadatas = geotag_videos_from_video.GeotagVideosFromVideo( - video_paths_with_image_samples, filetypes={map_geotag_source_to_filetype[option.source]}, num_processes=option.num_processes, - ).to_description() + ).to_description(video_paths_with_image_samples) geotag = geotag_images_from_video.GeotagImagesFromVideo( - image_paths, video_metadatas, offset_time=interpolation.offset_time, num_processes=option.num_processes, ) - return geotag.to_description() + return geotag.to_description(image_paths) else: raise ValueError(f"Invalid geotag source {option.source}") @@ -252,30 +247,29 @@ def _geotag_videos( if option.source is SourceType.NATIVE: geotag = geotag_videos_from_video.GeotagVideosFromVideo( - video_paths, num_processes=option.num_processes, filetypes=option.filetypes + num_processes=option.num_processes, filetypes=option.filetypes ) - return geotag.to_description() + return geotag.to_description(video_paths) if option.source is SourceType.EXIFTOOL_RUNTIME: geotag = geotag_videos_from_exiftool.GeotagVideosFromExifToolRunner( - video_paths, num_processes=option.num_processes + num_processes=option.num_processes ) try: - return geotag.to_description() + return geotag.to_description(video_paths) except exceptions.MapillaryExiftoolNotFoundError as ex: LOG.warning('Skip "%s" because: %s', option.source.value, ex) return [] elif option.source is SourceType.EXIFTOOL_XML: geotag = geotag_videos_from_exiftool.GeotagVideosFromExifToolXML( - video_paths, xml_path=_ensure_source_path(option), ) - return geotag.to_description() + return geotag.to_description(video_paths) elif option.source is SourceType.GPX: - geotag = geotag_videos_from_gpx.GeotagVideosFromGPX(video_paths) - return geotag.to_description() + geotag = geotag_videos_from_gpx.GeotagVideosFromGPX() + return geotag.to_description(video_paths) elif option.source is SourceType.NMEA: # TODO: geotag videos from NMEA diff --git a/mapillary_tools/geotag/geotag_images_from_exif.py b/mapillary_tools/geotag/geotag_images_from_exif.py index 298ba02f4..809fdc1f4 100644 --- a/mapillary_tools/geotag/geotag_images_from_exif.py +++ b/mapillary_tools/geotag/geotag_images_from_exif.py @@ -2,6 +2,7 @@ import logging import sys +from pathlib import Path import typing as T if sys.version_info >= (3, 12): @@ -17,5 +18,7 @@ class GeotagImagesFromEXIF(GeotagImagesFromGeneric): @override - def _generate_image_extractors(self) -> T.Sequence[ImageEXIFExtractor]: - return [ImageEXIFExtractor(path) for path in self.image_paths] + def _generate_image_extractors( + self, image_paths: T.Sequence[Path] + ) -> T.Sequence[ImageEXIFExtractor]: + return [ImageEXIFExtractor(path) for path in image_paths] diff --git a/mapillary_tools/geotag/geotag_images_from_exiftool.py b/mapillary_tools/geotag/geotag_images_from_exiftool.py index b47130ae5..8c39ee87f 100644 --- a/mapillary_tools/geotag/geotag_images_from_exiftool.py +++ b/mapillary_tools/geotag/geotag_images_from_exiftool.py @@ -24,16 +24,15 @@ class GeotagImagesFromExifToolXML(GeotagImagesFromGeneric): def __init__( self, - image_paths: T.Sequence[Path], xml_path: Path, num_processes: int | None = None, ): self.xml_path = xml_path - super().__init__(image_paths=image_paths, num_processes=num_processes) + super().__init__(num_processes=num_processes) @override def _generate_image_extractors( - self, + self, image_paths: T.Sequence[Path] ) -> T.Sequence[ImageExifToolExtractor | types.ErrorMetadata]: rdf_description_by_path = exiftool_read.index_rdf_description_by_path( [self.xml_path] @@ -41,7 +40,7 @@ def _generate_image_extractors( results: list[ImageExifToolExtractor | types.ErrorMetadata] = [] - for path in self.image_paths: + for path in image_paths: rdf_description = rdf_description_by_path.get( exiftool_read.canonical_path(path) ) @@ -63,17 +62,17 @@ def _generate_image_extractors( class GeotagImagesFromExifToolRunner(GeotagImagesFromGeneric): @override def _generate_image_extractors( - self, + self, image_paths: T.Sequence[Path] ) -> T.Sequence[ImageExifToolExtractor | types.ErrorMetadata]: runner = ExiftoolRunner(constants.EXIFTOOL_PATH) LOG.debug( "Extracting XML from %d images with exiftool command: %s", - len(self.image_paths), + len(image_paths), " ".join(runner._build_args_read_stdin()), ) try: - xml = runner.extract_xml(self.image_paths) + xml = runner.extract_xml(image_paths) except FileNotFoundError as ex: raise exceptions.MapillaryExiftoolNotFoundError(ex) from ex @@ -85,7 +84,7 @@ def _generate_image_extractors( results: list[ImageExifToolExtractor | types.ErrorMetadata] = [] - for path in self.image_paths: + for path in image_paths: rdf_description = rdf_description_by_path.get( exiftool_read.canonical_path(path) ) @@ -107,16 +106,17 @@ def _generate_image_extractors( class GeotagImagesFromExifToolWithSamples(GeotagImagesFromGeneric): def __init__( self, - image_paths: T.Sequence[Path], xml_path: Path, offset_time: float = 0.0, num_processes: int | None = None, ): - super().__init__(image_paths, num_processes=num_processes) + super().__init__(num_processes=num_processes) self.xml_path = xml_path self.offset_time = offset_time - def geotag_samples(self) -> list[types.ImageMetadataOrError]: + def geotag_samples( + self, image_paths: T.Sequence[Path] + ) -> list[types.ImageMetadataOrError]: # Find all video paths in self.xml_path rdf_description_by_path = exiftool_read.index_rdf_description_by_path( [self.xml_path] @@ -126,37 +126,34 @@ def geotag_samples(self) -> list[types.ImageMetadataOrError]: skip_subfolders=True, ) # Find all video paths that have sample images - samples_by_video = utils.find_all_image_samples(self.image_paths, video_paths) + samples_by_video = utils.find_all_image_samples(image_paths, video_paths) video_metadata_or_errors = GeotagVideosFromExifToolXML( - list(samples_by_video.keys()), self.xml_path, num_processes=self.num_processes, - ).to_description() + ).to_description(list(samples_by_video.keys())) sample_paths = sum(samples_by_video.values(), []) sample_metadata_or_errors = GeotagImagesFromVideo( - sample_paths, video_metadata_or_errors, offset_time=self.offset_time, num_processes=self.num_processes, - ).to_description() + ).to_description(sample_paths) return sample_metadata_or_errors @override - def to_description(self) -> list[types.ImageMetadataOrError]: - sample_metadata_or_errors = self.geotag_samples() + def to_description( + self, image_paths: T.Sequence[Path] + ) -> list[types.ImageMetadataOrError]: + sample_metadata_or_errors = self.geotag_samples(image_paths) sample_paths = set(metadata.filename for metadata in sample_metadata_or_errors) - non_sample_paths = [ - path for path in self.image_paths if path not in sample_paths - ] + non_sample_paths = [path for path in image_paths if path not in sample_paths] non_sample_metadata_or_errors = GeotagImagesFromExifToolXML( - non_sample_paths, self.xml_path, num_processes=self.num_processes, - ).to_description() + ).to_description(non_sample_paths) return sample_metadata_or_errors + non_sample_metadata_or_errors diff --git a/mapillary_tools/geotag/geotag_images_from_gpx.py b/mapillary_tools/geotag/geotag_images_from_gpx.py index a267015bf..56f4779ec 100644 --- a/mapillary_tools/geotag/geotag_images_from_gpx.py +++ b/mapillary_tools/geotag/geotag_images_from_gpx.py @@ -22,14 +22,13 @@ class GeotagImagesFromGPX(GeotagImagesFromGeneric): def __init__( self, - image_paths: T.Sequence[Path], points: T.Sequence[geo.Point], use_gpx_start_time: bool = False, use_image_start_time: bool = False, offset_time: float = 0.0, num_processes: int | None = None, ): - super().__init__(image_paths, num_processes=num_processes) + super().__init__(num_processes=num_processes) self.points = points self.use_gpx_start_time = use_gpx_start_time self.use_image_start_time = use_image_start_time @@ -80,17 +79,20 @@ def _interpolate_image_metadata_along( ) @override - def _generate_image_extractors(self) -> T.Sequence[ImageEXIFExtractor]: + def _generate_image_extractors( + self, image_paths: T.Sequence[Path] + ) -> T.Sequence[ImageEXIFExtractor]: return [ - ImageEXIFExtractor(path, skip_lonlat_error=True) - for path in self.image_paths + ImageEXIFExtractor(path, skip_lonlat_error=True) for path in image_paths ] @override - def to_description(self) -> list[types.ImageMetadataOrError]: + def to_description( + self, image_paths: T.Sequence[Path] + ) -> list[types.ImageMetadataOrError]: final_metadatas: list[types.ImageMetadataOrError] = [] - image_metadata_or_errors = super().to_description() + image_metadata_or_errors = super().to_description(image_paths) image_metadatas, error_metadatas = types.separate_errors( image_metadata_or_errors @@ -98,7 +100,7 @@ def to_description(self) -> list[types.ImageMetadataOrError]: final_metadatas.extend(error_metadatas) if not image_metadatas: - assert len(self.image_paths) == len(final_metadatas) + assert len(image_paths) == len(final_metadatas) return final_metadatas # Do not use point itself for comparison because point.angle or point.alt could be None @@ -153,6 +155,6 @@ def to_description(self) -> list[types.ImageMetadataOrError]: ) final_metadatas.append(error_metadata) - assert len(self.image_paths) == len(final_metadatas) + assert len(image_paths) == len(final_metadatas) return final_metadatas diff --git a/mapillary_tools/geotag/geotag_images_from_gpx_file.py b/mapillary_tools/geotag/geotag_images_from_gpx_file.py index ecc95067c..9c0532711 100644 --- a/mapillary_tools/geotag/geotag_images_from_gpx_file.py +++ b/mapillary_tools/geotag/geotag_images_from_gpx_file.py @@ -1,7 +1,6 @@ from __future__ import annotations import logging -import typing as T from pathlib import Path from . import utils @@ -14,7 +13,6 @@ class GeotagImagesFromGPXFile(GeotagImagesFromGPX): def __init__( self, - image_paths: T.Sequence[Path], source_path: Path, use_gpx_start_time: bool = False, offset_time: float = 0.0, @@ -35,7 +33,6 @@ def __init__( ) points = sum(tracks, []) super().__init__( - image_paths, points, use_gpx_start_time=use_gpx_start_time, offset_time=offset_time, diff --git a/mapillary_tools/geotag/geotag_images_from_nmea_file.py b/mapillary_tools/geotag/geotag_images_from_nmea_file.py index 0a90a8487..ffb67e5f4 100644 --- a/mapillary_tools/geotag/geotag_images_from_nmea_file.py +++ b/mapillary_tools/geotag/geotag_images_from_nmea_file.py @@ -1,7 +1,6 @@ from __future__ import annotations import datetime -import typing as T from pathlib import Path import pynmea2 @@ -13,7 +12,6 @@ class GeotagImagesFromNMEAFile(GeotagImagesFromGPX): def __init__( self, - image_paths: T.Sequence[Path], source_path: Path, use_gpx_start_time: bool = False, offset_time: float = 0.0, @@ -21,7 +19,6 @@ def __init__( ): points = get_lat_lon_time_from_nmea(source_path) super().__init__( - image_paths, points, use_gpx_start_time=use_gpx_start_time, offset_time=offset_time, diff --git a/mapillary_tools/geotag/geotag_images_from_video.py b/mapillary_tools/geotag/geotag_images_from_video.py index f5e0800f7..6d032b21b 100644 --- a/mapillary_tools/geotag/geotag_images_from_video.py +++ b/mapillary_tools/geotag/geotag_images_from_video.py @@ -21,17 +21,18 @@ class GeotagImagesFromVideo(GeotagImagesFromGeneric): def __init__( self, - image_paths: T.Sequence[Path], video_metadatas: T.Sequence[types.VideoMetadataOrError], offset_time: float = 0.0, num_processes: int | None = None, ): - super().__init__(image_paths, num_processes=num_processes) + super().__init__(num_processes=num_processes) self.video_metadatas = video_metadatas self.offset_time = offset_time @override - def to_description(self) -> list[types.ImageMetadataOrError]: + def to_description( + self, image_paths: T.Sequence[Path] + ) -> list[types.ImageMetadataOrError]: # Will return this list final_image_metadatas: list[types.ImageMetadataOrError] = [] @@ -41,9 +42,7 @@ def to_description(self) -> list[types.ImageMetadataOrError]: for video_error_metadata in video_error_metadatas: video_path = video_error_metadata.filename - sample_paths = list( - utils.filter_video_samples(self.image_paths, video_path) - ) + sample_paths = list(utils.filter_video_samples(image_paths, video_path)) LOG.debug( "Found %d sample images from video %s with error: %s", len(sample_paths), @@ -61,9 +60,7 @@ def to_description(self) -> list[types.ImageMetadataOrError]: for video_metadata in video_metadatas: video_path = video_metadata.filename - sample_paths = list( - utils.filter_video_samples(self.image_paths, video_path) - ) + sample_paths = list(utils.filter_video_samples(image_paths, video_path)) LOG.debug( "Found %d sample images from video %s", len(sample_paths), @@ -71,7 +68,6 @@ def to_description(self) -> list[types.ImageMetadataOrError]: ) geotag = GeotagImagesFromGPX( - sample_paths, video_metadata.points, use_gpx_start_time=False, use_image_start_time=True, @@ -79,7 +75,7 @@ def to_description(self) -> list[types.ImageMetadataOrError]: num_processes=self.num_processes, ) - image_metadatas = geotag.to_description() + image_metadatas = geotag.to_description(image_paths) for metadata in image_metadatas: if isinstance(metadata, types.ImageMetadata): @@ -91,6 +87,6 @@ def to_description(self) -> list[types.ImageMetadataOrError]: # NOTE: this method only geotags images that have a corresponding video, # so the number of image metadata objects returned might be less than # the number of the input image_paths - assert len(final_image_metadatas) <= len(self.image_paths) + assert len(final_image_metadatas) <= len(image_paths) return final_image_metadatas diff --git a/mapillary_tools/geotag/geotag_videos_from_exiftool.py b/mapillary_tools/geotag/geotag_videos_from_exiftool.py index 336dcd3e9..eeab488de 100644 --- a/mapillary_tools/geotag/geotag_videos_from_exiftool.py +++ b/mapillary_tools/geotag/geotag_videos_from_exiftool.py @@ -22,16 +22,15 @@ class GeotagVideosFromExifToolXML(GeotagVideosFromGeneric): def __init__( self, - video_paths: T.Sequence[Path], xml_path: Path, num_processes: int | None = None, ): - super().__init__(video_paths, num_processes=num_processes) + super().__init__(num_processes=num_processes) self.xml_path = xml_path @override def _generate_video_extractors( - self, + self, video_paths: T.Sequence[Path] ) -> T.Sequence[VideoExifToolExtractor | types.ErrorMetadata]: rdf_description_by_path = exiftool_read.index_rdf_description_by_path( [self.xml_path] @@ -39,7 +38,7 @@ def _generate_video_extractors( results: list[VideoExifToolExtractor | types.ErrorMetadata] = [] - for path in self.video_paths: + for path in video_paths: rdf_description = rdf_description_by_path.get( exiftool_read.canonical_path(path) ) @@ -61,18 +60,18 @@ def _generate_video_extractors( class GeotagVideosFromExifToolRunner(GeotagVideosFromGeneric): @override def _generate_video_extractors( - self, + self, video_paths: T.Sequence[Path] ) -> T.Sequence[VideoExifToolExtractor | types.ErrorMetadata]: runner = ExiftoolRunner(constants.EXIFTOOL_PATH) LOG.debug( "Extracting XML from %d videos with exiftool command: %s", - len(self.video_paths), + len(video_paths), " ".join(runner._build_args_read_stdin()), ) try: - xml = runner.extract_xml(self.video_paths) + xml = runner.extract_xml(video_paths) except FileNotFoundError as ex: raise exceptions.MapillaryExiftoolNotFoundError(ex) from ex @@ -84,7 +83,7 @@ def _generate_video_extractors( results: list[VideoExifToolExtractor | types.ErrorMetadata] = [] - for path in self.video_paths: + for path in video_paths: rdf_description = rdf_description_by_path.get( exiftool_read.canonical_path(path) ) diff --git a/mapillary_tools/geotag/geotag_videos_from_gpx.py b/mapillary_tools/geotag/geotag_videos_from_gpx.py index c624055b6..a5e8afd85 100644 --- a/mapillary_tools/geotag/geotag_videos_from_gpx.py +++ b/mapillary_tools/geotag/geotag_videos_from_gpx.py @@ -21,18 +21,19 @@ class GeotagVideosFromGPX(GeotagVideosFromGeneric): def __init__( self, - video_paths: T.Sequence[Path], option: options.SourcePathOption | None = None, num_processes: int | None = None, ): - super().__init__(video_paths, num_processes=num_processes) + super().__init__(num_processes=num_processes) if option is None: option = options.SourcePathOption(pattern="%f.gpx") self.option = option @override - def _generate_video_extractors(self) -> T.Sequence[GPXVideoExtractor]: + def _generate_video_extractors( + self, video_paths: T.Sequence[Path] + ) -> T.Sequence[GPXVideoExtractor]: return [ GPXVideoExtractor(video_path, self.option.resolve(video_path)) - for video_path in self.video_paths + for video_path in video_paths ] diff --git a/mapillary_tools/geotag/geotag_videos_from_video.py b/mapillary_tools/geotag/geotag_videos_from_video.py index 60f15dd19..f718656eb 100644 --- a/mapillary_tools/geotag/geotag_videos_from_video.py +++ b/mapillary_tools/geotag/geotag_videos_from_video.py @@ -17,16 +17,16 @@ class GeotagVideosFromVideo(GeotagVideosFromGeneric): def __init__( self, - video_paths: T.Sequence[Path], filetypes: set[FileType] | None = None, num_processes: int | None = None, ): - super().__init__(video_paths, num_processes=num_processes) + super().__init__(num_processes=num_processes) self.filetypes = filetypes @override - def _generate_video_extractors(self) -> T.Sequence[NativeVideoExtractor]: + def _generate_video_extractors( + self, video_paths: T.Sequence[Path] + ) -> T.Sequence[NativeVideoExtractor]: return [ - NativeVideoExtractor(path, filetypes=self.filetypes) - for path in self.video_paths + NativeVideoExtractor(path, filetypes=self.filetypes) for path in video_paths ] diff --git a/mapillary_tools/sample_video.py b/mapillary_tools/sample_video.py index 7a0cd5fc2..527d1f581 100644 --- a/mapillary_tools/sample_video.py +++ b/mapillary_tools/sample_video.py @@ -290,9 +290,9 @@ def _sample_single_video_by_distance( LOG.info("Extracting video metdata") - video_metadatas = geotag_videos_from_video.GeotagVideosFromVideo( + video_metadatas = geotag_videos_from_video.GeotagVideosFromVideo().to_description( [video_path] - ).to_description() + ) assert len(video_metadatas) == 1, "expect 1 video metadata" video_metadata = video_metadatas[0] if isinstance(video_metadata, types.ErrorMetadata): From f15d3cc80a948af0e23bea4ca473bbae5a87ab60 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Fri, 4 Apr 2025 20:29:28 -0700 Subject: [PATCH 06/10] add exiftool as alias for exiftool_runtime --- mapillary_tools/geotag/options.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mapillary_tools/geotag/options.py b/mapillary_tools/geotag/options.py index 44b2cd652..c3c243e61 100644 --- a/mapillary_tools/geotag/options.py +++ b/mapillary_tools/geotag/options.py @@ -28,6 +28,7 @@ class SourceType(enum.Enum): SOURCE_TYPE_ALIAS: dict[str, SourceType] = { "blackvue_videos": SourceType.BLACKVUE, "gopro_videos": SourceType.GOPRO, + "exiftool": SourceType.EXIFTOOL_RUNTIME, } From edbcf55b635cc84ca751aaaad27fc376bafde924 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Sat, 5 Apr 2025 11:57:44 -0700 Subject: [PATCH 07/10] rename DESCRIPTION_TAG --- mapillary_tools/exiftool_read.py | 4 ++-- mapillary_tools/geotag/factory.py | 4 ++-- mapillary_tools/geotag/geotag_images_from_exiftool.py | 4 ++-- mapillary_tools/geotag/geotag_videos_from_exiftool.py | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/mapillary_tools/exiftool_read.py b/mapillary_tools/exiftool_read.py index 3929a0fbb..7943ff01b 100644 --- a/mapillary_tools/exiftool_read.py +++ b/mapillary_tools/exiftool_read.py @@ -53,8 +53,8 @@ LOG = logging.getLogger(__name__) +DESCRIPTION_TAG = "rdf:Description" _FIELD_TYPE = T.TypeVar("_FIELD_TYPE", int, float, str) -_DESCRIPTION_TAG = "rdf:Description" def expand_tag(ns_tag: str, namespaces: dict[str, str]) -> str: @@ -107,7 +107,7 @@ def index_rdf_description_by_path_from_xml_element( ) -> dict[str, ET.Element]: rdf_description_by_path: dict[str, ET.Element] = {} - elements = element.iterfind(_DESCRIPTION_TAG, namespaces=EXIFTOOL_NAMESPACES) + elements = element.iterfind(DESCRIPTION_TAG, namespaces=EXIFTOOL_NAMESPACES) for element in elements: path = find_rdf_description_path(element) if path is not None: diff --git a/mapillary_tools/geotag/factory.py b/mapillary_tools/geotag/factory.py index ca505b59a..def91c7a7 100644 --- a/mapillary_tools/geotag/factory.py +++ b/mapillary_tools/geotag/factory.py @@ -105,7 +105,7 @@ def _is_reprocessable(metadata: types.MetadataOrError) -> bool: def _filter_images_and_videos( - file_paths: T.Iterable[Path], + paths: T.Iterable[Path], filetypes: set[types.FileType] | None = None, ) -> tuple[list[Path], list[Path]]: image_paths = [] @@ -120,7 +120,7 @@ def _filter_images_and_videos( include_images = types.FileType.IMAGE in filetypes include_videos = bool(filetypes & ALL_VIDEO_TYPES) - for path in file_paths: + for path in paths: if utils.is_image_file(path): if include_images: image_paths.append(path) diff --git a/mapillary_tools/geotag/geotag_images_from_exiftool.py b/mapillary_tools/geotag/geotag_images_from_exiftool.py index 8c39ee87f..1c2e5f38d 100644 --- a/mapillary_tools/geotag/geotag_images_from_exiftool.py +++ b/mapillary_tools/geotag/geotag_images_from_exiftool.py @@ -46,7 +46,7 @@ def _generate_image_extractors( ) if rdf_description is None: exc = exceptions.MapillaryEXIFNotFoundError( - f"The {exiftool_read._DESCRIPTION_TAG} XML element for the image not found" + f"The {exiftool_read.DESCRIPTION_TAG} XML element for the image not found" ) results.append( types.describe_error_metadata( @@ -90,7 +90,7 @@ def _generate_image_extractors( ) if rdf_description is None: exc = exceptions.MapillaryEXIFNotFoundError( - f"The {exiftool_read._DESCRIPTION_TAG} XML element for the image not found" + f"The {exiftool_read.DESCRIPTION_TAG} XML element for the image not found" ) results.append( types.describe_error_metadata( diff --git a/mapillary_tools/geotag/geotag_videos_from_exiftool.py b/mapillary_tools/geotag/geotag_videos_from_exiftool.py index eeab488de..9a343916b 100644 --- a/mapillary_tools/geotag/geotag_videos_from_exiftool.py +++ b/mapillary_tools/geotag/geotag_videos_from_exiftool.py @@ -44,7 +44,7 @@ def _generate_video_extractors( ) if rdf_description is None: exc = exceptions.MapillaryEXIFNotFoundError( - f"The {exiftool_read._DESCRIPTION_TAG} XML element for the video not found" + f"The {exiftool_read.DESCRIPTION_TAG} XML element for the video not found" ) results.append( types.describe_error_metadata( @@ -89,7 +89,7 @@ def _generate_video_extractors( ) if rdf_description is None: exc = exceptions.MapillaryEXIFNotFoundError( - f"The {exiftool_read._DESCRIPTION_TAG} XML element for the video not found" + f"The {exiftool_read.DESCRIPTION_TAG} XML element for the video not found" ) results.append( types.describe_error_metadata( From f95a861a82dfdc8b34bc3983b9972eae4c9e7671 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Sun, 6 Apr 2025 10:58:03 -0700 Subject: [PATCH 08/10] geotag from exiftool --- mapillary_tools/exceptions.py | 2 +- mapillary_tools/exiftool_read.py | 25 +----- mapillary_tools/geotag/base.py | 28 ++++--- .../geotag/geotag_images_from_exif.py | 2 +- .../geotag/geotag_images_from_exiftool.py | 80 +++++++++---------- .../geotag/geotag_videos_from_exiftool.py | 75 ++++++++--------- mapillary_tools/geotag/utils.py | 30 ++++++- 7 files changed, 122 insertions(+), 120 deletions(-) diff --git a/mapillary_tools/exceptions.py b/mapillary_tools/exceptions.py index 10c8b7b48..cd730d9fc 100644 --- a/mapillary_tools/exceptions.py +++ b/mapillary_tools/exceptions.py @@ -87,7 +87,7 @@ def __init__( self.angle_diff = angle_diff -class MapillaryEXIFNotFoundError(MapillaryDescriptionError): +class MapillaryExifToolXMLNotFoundError(MapillaryDescriptionError): pass diff --git a/mapillary_tools/exiftool_read.py b/mapillary_tools/exiftool_read.py index 7943ff01b..e10da07cb 100644 --- a/mapillary_tools/exiftool_read.py +++ b/mapillary_tools/exiftool_read.py @@ -6,7 +6,7 @@ import xml.etree.ElementTree as ET from pathlib import Path -from . import exif_read, utils +from . import exif_read EXIFTOOL_NAMESPACES: dict[str, str] = { @@ -79,29 +79,6 @@ def find_rdf_description_path(element: ET.Element) -> Path | None: return Path(about) -def index_rdf_description_by_path( - xml_paths: T.Sequence[Path], -) -> dict[str, ET.Element]: - rdf_description_by_path: dict[str, ET.Element] = {} - - for xml_path in utils.find_xml_files(xml_paths): - try: - etree = ET.parse(xml_path) - except ET.ParseError as ex: - verbose = LOG.getEffectiveLevel() <= logging.DEBUG - if verbose: - LOG.warning(f"Failed to parse {xml_path}", exc_info=verbose) - else: - LOG.warning(f"Failed to parse {xml_path}: {ex}", exc_info=verbose) - continue - - rdf_description_by_path.update( - index_rdf_description_by_path_from_xml_element(etree.getroot()) - ) - - return rdf_description_by_path - - def index_rdf_description_by_path_from_xml_element( element: ET.Element, ) -> dict[str, ET.Element]: diff --git a/mapillary_tools/geotag/base.py b/mapillary_tools/geotag/base.py index c606707a4..b0ed1beb9 100644 --- a/mapillary_tools/geotag/base.py +++ b/mapillary_tools/geotag/base.py @@ -53,11 +53,6 @@ def to_description( return results + error_metadatas - def _generate_image_extractors( - self, image_paths: T.Sequence[Path] - ) -> T.Sequence[TImageExtractor | types.ErrorMetadata]: - raise NotImplementedError - # This method is passed to multiprocessing # so it has to be classmethod or staticmethod to avoid pickling the instance @classmethod @@ -70,12 +65,21 @@ def run_extraction(cls, extractor: TImageExtractor) -> types.ImageMetadataOrErro return types.describe_error_metadata( ex, image_path, filetype=types.FileType.IMAGE ) + except exceptions.MapillaryUserError as ex: + # Considered as fatal error if not MapillaryDescriptionError + raise ex except Exception as ex: + # TODO: hide details if not verbose mode LOG.exception("Unexpected error extracting metadata from %s", image_path) return types.describe_error_metadata( ex, image_path, filetype=types.FileType.IMAGE ) + def _generate_image_extractors( + self, image_paths: T.Sequence[Path] + ) -> T.Sequence[TImageExtractor | types.ErrorMetadata]: + raise NotImplementedError + TVideoExtractor = T.TypeVar("TVideoExtractor", bound=BaseVideoExtractor) @@ -115,11 +119,6 @@ def to_description( return results + error_metadatas - def _generate_video_extractors( - self, video_paths: T.Sequence[Path] - ) -> T.Sequence[TVideoExtractor | types.ErrorMetadata]: - raise NotImplementedError - # This method is passed to multiprocessing # so it has to be classmethod or staticmethod to avoid pickling the instance @classmethod @@ -132,8 +131,17 @@ def run_extraction(cls, extractor: TVideoExtractor) -> types.VideoMetadataOrErro return types.describe_error_metadata( ex, video_path, filetype=types.FileType.VIDEO ) + except exceptions.MapillaryUserError as ex: + # Considered as fatal error if not MapillaryDescriptionError + raise ex except Exception as ex: + # TODO: hide details if not verbose mode LOG.exception("Unexpected error extracting metadata from %s", video_path) return types.describe_error_metadata( ex, video_path, filetype=types.FileType.VIDEO ) + + def _generate_video_extractors( + self, video_paths: T.Sequence[Path] + ) -> T.Sequence[TVideoExtractor | types.ErrorMetadata]: + raise NotImplementedError diff --git a/mapillary_tools/geotag/geotag_images_from_exif.py b/mapillary_tools/geotag/geotag_images_from_exif.py index 809fdc1f4..7a5ec1698 100644 --- a/mapillary_tools/geotag/geotag_images_from_exif.py +++ b/mapillary_tools/geotag/geotag_images_from_exif.py @@ -2,8 +2,8 @@ import logging import sys -from pathlib import Path import typing as T +from pathlib import Path if sys.version_info >= (3, 12): from typing import override diff --git a/mapillary_tools/geotag/geotag_images_from_exiftool.py b/mapillary_tools/geotag/geotag_images_from_exiftool.py index 1c2e5f38d..e5c42ac76 100644 --- a/mapillary_tools/geotag/geotag_images_from_exiftool.py +++ b/mapillary_tools/geotag/geotag_images_from_exiftool.py @@ -17,6 +17,7 @@ from .geotag_images_from_video import GeotagImagesFromVideo from .geotag_videos_from_exiftool import GeotagVideosFromExifToolXML from .image_extractors.exiftool import ImageExifToolExtractor +from .utils import index_rdf_description_by_path LOG = logging.getLogger(__name__) @@ -30,34 +31,37 @@ def __init__( self.xml_path = xml_path super().__init__(num_processes=num_processes) - @override - def _generate_image_extractors( - self, image_paths: T.Sequence[Path] - ) -> T.Sequence[ImageExifToolExtractor | types.ErrorMetadata]: - rdf_description_by_path = exiftool_read.index_rdf_description_by_path( - [self.xml_path] - ) - + @classmethod + def build_image_extractors( + cls, + rdf_by_path: dict[str, ET.Element], + image_paths: T.Iterable[Path], + ) -> list[ImageExifToolExtractor | types.ErrorMetadata]: results: list[ImageExifToolExtractor | types.ErrorMetadata] = [] for path in image_paths: - rdf_description = rdf_description_by_path.get( - exiftool_read.canonical_path(path) - ) - if rdf_description is None: - exc = exceptions.MapillaryEXIFNotFoundError( - f"The {exiftool_read.DESCRIPTION_TAG} XML element for the image not found" + rdf = rdf_by_path.get(exiftool_read.canonical_path(path)) + if rdf is None: + ex = exceptions.MapillaryExifToolXMLNotFoundError( + "Cannot find the image in the ExifTool XML" ) results.append( types.describe_error_metadata( - exc, path, filetype=types.FileType.IMAGE + ex, path, filetype=types.FileType.IMAGE ) ) else: - results.append(ImageExifToolExtractor(path, rdf_description)) + results.append(ImageExifToolExtractor(path, rdf)) return results + @override + def _generate_image_extractors( + self, image_paths: T.Sequence[Path] + ) -> T.Sequence[ImageExifToolExtractor | types.ErrorMetadata]: + rdf_by_path = index_rdf_description_by_path([self.xml_path]) + return self.build_image_extractors(rdf_by_path, image_paths) + class GeotagImagesFromExifToolRunner(GeotagImagesFromGeneric): @override @@ -67,7 +71,7 @@ def _generate_image_extractors( runner = ExiftoolRunner(constants.EXIFTOOL_PATH) LOG.debug( - "Extracting XML from %d images with exiftool command: %s", + "Extracting XML from %d images with ExifTool command: %s", len(image_paths), " ".join(runner._build_args_read_stdin()), ) @@ -76,31 +80,23 @@ def _generate_image_extractors( except FileNotFoundError as ex: raise exceptions.MapillaryExiftoolNotFoundError(ex) from ex - rdf_description_by_path = ( - exiftool_read.index_rdf_description_by_path_from_xml_element( - ET.fromstring(xml) + try: + xml_element = ET.fromstring(xml) + except ET.ParseError as ex: + LOG.warning( + "Failed to parse ExifTool XML: %s", + str(ex), + exc_info=LOG.getEffectiveLevel() <= logging.DEBUG, ) - ) - - results: list[ImageExifToolExtractor | types.ErrorMetadata] = [] - - for path in image_paths: - rdf_description = rdf_description_by_path.get( - exiftool_read.canonical_path(path) + rdf_by_path = {} + else: + rdf_by_path = exiftool_read.index_rdf_description_by_path_from_xml_element( + xml_element ) - if rdf_description is None: - exc = exceptions.MapillaryEXIFNotFoundError( - f"The {exiftool_read.DESCRIPTION_TAG} XML element for the image not found" - ) - results.append( - types.describe_error_metadata( - exc, path, filetype=types.FileType.IMAGE - ) - ) - else: - results.append(ImageExifToolExtractor(path, rdf_description)) - return results + return GeotagImagesFromExifToolXML.build_image_extractors( + rdf_by_path, image_paths + ) class GeotagImagesFromExifToolWithSamples(GeotagImagesFromGeneric): @@ -118,11 +114,9 @@ def geotag_samples( self, image_paths: T.Sequence[Path] ) -> list[types.ImageMetadataOrError]: # Find all video paths in self.xml_path - rdf_description_by_path = exiftool_read.index_rdf_description_by_path( - [self.xml_path] - ) + rdf_by_path = index_rdf_description_by_path([self.xml_path]) video_paths = utils.find_videos( - [Path(pathstr) for pathstr in rdf_description_by_path.keys()], + [Path(pathstr) for pathstr in rdf_by_path.keys()], skip_subfolders=True, ) # Find all video paths that have sample images diff --git a/mapillary_tools/geotag/geotag_videos_from_exiftool.py b/mapillary_tools/geotag/geotag_videos_from_exiftool.py index 9a343916b..5b7de6839 100644 --- a/mapillary_tools/geotag/geotag_videos_from_exiftool.py +++ b/mapillary_tools/geotag/geotag_videos_from_exiftool.py @@ -14,6 +14,7 @@ from .. import constants, exceptions, exiftool_read, types from ..exiftool_runner import ExiftoolRunner from .base import GeotagVideosFromGeneric +from .utils import index_rdf_description_by_path from .video_extractors.exiftool import VideoExifToolExtractor LOG = logging.getLogger(__name__) @@ -28,34 +29,37 @@ def __init__( super().__init__(num_processes=num_processes) self.xml_path = xml_path - @override - def _generate_video_extractors( - self, video_paths: T.Sequence[Path] - ) -> T.Sequence[VideoExifToolExtractor | types.ErrorMetadata]: - rdf_description_by_path = exiftool_read.index_rdf_description_by_path( - [self.xml_path] - ) - + @classmethod + def build_image_extractors( + cls, + rdf_by_path: dict[str, ET.Element], + video_paths: T.Iterable[Path], + ) -> list[VideoExifToolExtractor | types.ErrorMetadata]: results: list[VideoExifToolExtractor | types.ErrorMetadata] = [] for path in video_paths: - rdf_description = rdf_description_by_path.get( - exiftool_read.canonical_path(path) - ) - if rdf_description is None: - exc = exceptions.MapillaryEXIFNotFoundError( - f"The {exiftool_read.DESCRIPTION_TAG} XML element for the video not found" + rdf = rdf_by_path.get(exiftool_read.canonical_path(path)) + if rdf is None: + ex = exceptions.MapillaryExifToolXMLNotFoundError( + "Cannot find the video in the ExifTool XML" ) results.append( types.describe_error_metadata( - exc, path, filetype=types.FileType.VIDEO + ex, path, filetype=types.FileType.VIDEO ) ) else: - results.append(VideoExifToolExtractor(path, rdf_description)) + results.append(VideoExifToolExtractor(path, rdf)) return results + @override + def _generate_video_extractors( + self, video_paths: T.Sequence[Path] + ) -> T.Sequence[VideoExifToolExtractor | types.ErrorMetadata]: + rdf_by_path = index_rdf_description_by_path([self.xml_path]) + return self.build_image_extractors(rdf_by_path, video_paths) + class GeotagVideosFromExifToolRunner(GeotagVideosFromGeneric): @override @@ -65,38 +69,29 @@ def _generate_video_extractors( runner = ExiftoolRunner(constants.EXIFTOOL_PATH) LOG.debug( - "Extracting XML from %d videos with exiftool command: %s", + "Extracting XML from %d videos with ExifTool command: %s", len(video_paths), " ".join(runner._build_args_read_stdin()), ) - try: xml = runner.extract_xml(video_paths) except FileNotFoundError as ex: raise exceptions.MapillaryExiftoolNotFoundError(ex) from ex - rdf_description_by_path = ( - exiftool_read.index_rdf_description_by_path_from_xml_element( - ET.fromstring(xml) + try: + xml_element = ET.fromstring(xml) + except ET.ParseError as ex: + LOG.warning( + "Failed to parse ExifTool XML: %s", + str(ex), + exc_info=LOG.getEffectiveLevel() <= logging.DEBUG, ) - ) - - results: list[VideoExifToolExtractor | types.ErrorMetadata] = [] - - for path in video_paths: - rdf_description = rdf_description_by_path.get( - exiftool_read.canonical_path(path) + rdf_by_path = {} + else: + rdf_by_path = exiftool_read.index_rdf_description_by_path_from_xml_element( + xml_element ) - if rdf_description is None: - exc = exceptions.MapillaryEXIFNotFoundError( - f"The {exiftool_read.DESCRIPTION_TAG} XML element for the video not found" - ) - results.append( - types.describe_error_metadata( - exc, path, filetype=types.FileType.VIDEO - ) - ) - else: - results.append(VideoExifToolExtractor(path, rdf_description)) - return results + return GeotagVideosFromExifToolXML.build_image_extractors( + rdf_by_path, video_paths + ) diff --git a/mapillary_tools/geotag/utils.py b/mapillary_tools/geotag/utils.py index 103667a01..2d7a4f49c 100644 --- a/mapillary_tools/geotag/utils.py +++ b/mapillary_tools/geotag/utils.py @@ -1,11 +1,14 @@ +import logging import typing as T +import xml.etree.ElementTree as ET from pathlib import Path import gpxpy -from .. import geo +from .. import exiftool_read, geo, utils Track = T.List[geo.Point] +LOG = logging.getLogger(__name__) def parse_gpx(gpx_file: Path) -> list[Track]: @@ -30,3 +33,28 @@ def parse_gpx(gpx_file: Path) -> list[Track]: ) return tracks + + +def index_rdf_description_by_path( + xml_paths: T.Sequence[Path], +) -> dict[str, ET.Element]: + rdf_description_by_path: dict[str, ET.Element] = {} + + for xml_path in utils.find_xml_files(xml_paths): + try: + etree = ET.parse(xml_path) + except ET.ParseError as ex: + verbose = LOG.getEffectiveLevel() <= logging.DEBUG + if verbose: + LOG.warning("Failed to parse %s", xml_path, exc_info=True) + else: + LOG.warning("Failed to parse %s: %s", xml_path, ex) + continue + + rdf_description_by_path.update( + exiftool_read.index_rdf_description_by_path_from_xml_element( + etree.getroot() + ) + ) + + return rdf_description_by_path From 543f9abe2fb49153a38ef236feef0b48569daece Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Thu, 10 Apr 2025 22:15:15 -0700 Subject: [PATCH 09/10] add the missing modules --- setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.py b/setup.py index df3aa5a59..c67fd8830 100644 --- a/setup.py +++ b/setup.py @@ -47,6 +47,8 @@ def readme(): "mapillary_tools.camm", "mapillary_tools.commands", "mapillary_tools.geotag", + "mapillary_tools.geotag.image_extractors", + "mapillary_tools.geotag.video_extractors", "mapillary_tools.gpmf", "mapillary_tools.mp4", ], From a1e2899183520b169849b294892cfcd28f85ec99 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Thu, 10 Apr 2025 22:19:09 -0700 Subject: [PATCH 10/10] fix types --- mapillary_tools/geotag/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mapillary_tools/geotag/utils.py b/mapillary_tools/geotag/utils.py index 2d7a4f49c..ef311e437 100644 --- a/mapillary_tools/geotag/utils.py +++ b/mapillary_tools/geotag/utils.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import logging import typing as T import xml.etree.ElementTree as ET