Skip to content

Commit a8713d8

Browse files
authored
improve: geotag (#743)
* git mv mapillary_tools/geotag/geotag_videos_from_exiftool_video.py mapillary_tools/geotag/geotag_videos_from_exiftool.py * update * move around * add override and fix a bug of overriding wrong method * move image_paths to to_description() * add exiftool as alias for exiftool_runtime * rename DESCRIPTION_TAG * geotag from exiftool * add the missing modules * fix types
1 parent b25e160 commit a8713d8

27 files changed

+882
-773
lines changed

mapillary_tools/exceptions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ def __init__(
8787
self.angle_diff = angle_diff
8888

8989

90-
class MapillaryEXIFNotFoundError(MapillaryDescriptionError):
90+
class MapillaryExifToolXMLNotFoundError(MapillaryDescriptionError):
9191
pass
9292

9393

mapillary_tools/exiftool_read.py

Lines changed: 3 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import xml.etree.ElementTree as ET
77
from pathlib import Path
88

9-
from . import exif_read, utils
9+
from . import exif_read
1010

1111

1212
EXIFTOOL_NAMESPACES: dict[str, str] = {
@@ -53,8 +53,8 @@
5353

5454

5555
LOG = logging.getLogger(__name__)
56+
DESCRIPTION_TAG = "rdf:Description"
5657
_FIELD_TYPE = T.TypeVar("_FIELD_TYPE", int, float, str)
57-
_DESCRIPTION_TAG = "rdf:Description"
5858

5959

6060
def expand_tag(ns_tag: str, namespaces: dict[str, str]) -> str:
@@ -79,35 +79,12 @@ def find_rdf_description_path(element: ET.Element) -> Path | None:
7979
return Path(about)
8080

8181

82-
def index_rdf_description_by_path(
83-
xml_paths: T.Sequence[Path],
84-
) -> dict[str, ET.Element]:
85-
rdf_description_by_path: dict[str, ET.Element] = {}
86-
87-
for xml_path in utils.find_xml_files(xml_paths):
88-
try:
89-
etree = ET.parse(xml_path)
90-
except ET.ParseError as ex:
91-
verbose = LOG.getEffectiveLevel() <= logging.DEBUG
92-
if verbose:
93-
LOG.warning(f"Failed to parse {xml_path}", exc_info=verbose)
94-
else:
95-
LOG.warning(f"Failed to parse {xml_path}: {ex}", exc_info=verbose)
96-
continue
97-
98-
rdf_description_by_path.update(
99-
index_rdf_description_by_path_from_xml_element(etree.getroot())
100-
)
101-
102-
return rdf_description_by_path
103-
104-
10582
def index_rdf_description_by_path_from_xml_element(
10683
element: ET.Element,
10784
) -> dict[str, ET.Element]:
10885
rdf_description_by_path: dict[str, ET.Element] = {}
10986

110-
elements = element.iterfind(_DESCRIPTION_TAG, namespaces=EXIFTOOL_NAMESPACES)
87+
elements = element.iterfind(DESCRIPTION_TAG, namespaces=EXIFTOOL_NAMESPACES)
11188
for element in elements:
11289
path = find_rdf_description_path(element)
11390
if path is not None:

mapillary_tools/geotag/__init__.py

Lines changed: 0 additions & 1 deletion
This file was deleted.

mapillary_tools/geotag/geotag_from_generic.py renamed to mapillary_tools/geotag/base.py

Lines changed: 32 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -8,41 +8,30 @@
88
from tqdm import tqdm
99

1010
from .. import exceptions, types, utils
11+
from .image_extractors.base import BaseImageExtractor
12+
from .video_extractors.base import BaseVideoExtractor
1113

1214

1315
LOG = logging.getLogger(__name__)
1416

1517

16-
class GenericImageExtractor(abc.ABC):
17-
"""
18-
Extracts metadata from an image file.
19-
"""
20-
21-
def __init__(self, image_path: Path):
22-
self.image_path = image_path
23-
24-
def extract(self) -> types.ImageMetadataOrError:
25-
raise NotImplementedError
26-
27-
28-
TImageExtractor = T.TypeVar("TImageExtractor", bound=GenericImageExtractor)
18+
TImageExtractor = T.TypeVar("TImageExtractor", bound=BaseImageExtractor)
2919

3020

3121
class GeotagImagesFromGeneric(abc.ABC, T.Generic[TImageExtractor]):
3222
"""
3323
Extracts metadata from a list of image files with multiprocessing.
3424
"""
3525

36-
def __init__(
37-
self, image_paths: T.Sequence[Path], num_processes: int | None = None
38-
) -> None:
39-
self.image_paths = image_paths
26+
def __init__(self, num_processes: int | None = None) -> None:
4027
self.num_processes = num_processes
4128

42-
def to_description(self) -> list[types.ImageMetadataOrError]:
43-
extractor_or_errors = self._generate_image_extractors()
29+
def to_description(
30+
self, image_paths: T.Sequence[Path]
31+
) -> list[types.ImageMetadataOrError]:
32+
extractor_or_errors = self._generate_image_extractors(image_paths)
4433

45-
assert len(extractor_or_errors) == len(self.image_paths)
34+
assert len(extractor_or_errors) == len(image_paths)
4635

4736
extractors, error_metadatas = types.separate_errors(extractor_or_errors)
4837

@@ -64,11 +53,6 @@ def to_description(self) -> list[types.ImageMetadataOrError]:
6453

6554
return results + error_metadatas
6655

67-
def _generate_image_extractors(
68-
self,
69-
) -> T.Sequence[TImageExtractor | types.ErrorMetadata]:
70-
raise NotImplementedError
71-
7256
# This method is passed to multiprocessing
7357
# so it has to be classmethod or staticmethod to avoid pickling the instance
7458
@classmethod
@@ -81,43 +65,39 @@ def run_extraction(cls, extractor: TImageExtractor) -> types.ImageMetadataOrErro
8165
return types.describe_error_metadata(
8266
ex, image_path, filetype=types.FileType.IMAGE
8367
)
68+
except exceptions.MapillaryUserError as ex:
69+
# Considered as fatal error if not MapillaryDescriptionError
70+
raise ex
8471
except Exception as ex:
72+
# TODO: hide details if not verbose mode
8573
LOG.exception("Unexpected error extracting metadata from %s", image_path)
8674
return types.describe_error_metadata(
8775
ex, image_path, filetype=types.FileType.IMAGE
8876
)
8977

90-
91-
class GenericVideoExtractor(abc.ABC):
92-
"""
93-
Extracts metadata from a video file.
94-
"""
95-
96-
def __init__(self, video_path: Path):
97-
self.video_path = video_path
98-
99-
def extract(self) -> types.VideoMetadataOrError:
78+
def _generate_image_extractors(
79+
self, image_paths: T.Sequence[Path]
80+
) -> T.Sequence[TImageExtractor | types.ErrorMetadata]:
10081
raise NotImplementedError
10182

10283

103-
TVideoExtractor = T.TypeVar("TVideoExtractor", bound=GenericVideoExtractor)
84+
TVideoExtractor = T.TypeVar("TVideoExtractor", bound=BaseVideoExtractor)
10485

10586

10687
class GeotagVideosFromGeneric(abc.ABC, T.Generic[TVideoExtractor]):
10788
"""
10889
Extracts metadata from a list of video files with multiprocessing.
10990
"""
11091

111-
def __init__(
112-
self, video_paths: T.Sequence[Path], num_processes: int | None = None
113-
) -> None:
114-
self.video_paths = video_paths
92+
def __init__(self, num_processes: int | None = None) -> None:
11593
self.num_processes = num_processes
11694

117-
def to_description(self) -> list[types.VideoMetadataOrError]:
118-
extractor_or_errors = self._generate_video_extractors()
95+
def to_description(
96+
self, video_paths: T.Sequence[Path]
97+
) -> list[types.VideoMetadataOrError]:
98+
extractor_or_errors = self._generate_video_extractors(video_paths)
11999

120-
assert len(extractor_or_errors) == len(self.video_paths)
100+
assert len(extractor_or_errors) == len(video_paths)
121101

122102
extractors, error_metadatas = types.separate_errors(extractor_or_errors)
123103

@@ -139,11 +119,6 @@ def to_description(self) -> list[types.VideoMetadataOrError]:
139119

140120
return results + error_metadatas
141121

142-
def _generate_video_extractors(
143-
self,
144-
) -> T.Sequence[TVideoExtractor | types.ErrorMetadata]:
145-
raise NotImplementedError
146-
147122
# This method is passed to multiprocessing
148123
# so it has to be classmethod or staticmethod to avoid pickling the instance
149124
@classmethod
@@ -156,8 +131,17 @@ def run_extraction(cls, extractor: TVideoExtractor) -> types.VideoMetadataOrErro
156131
return types.describe_error_metadata(
157132
ex, video_path, filetype=types.FileType.VIDEO
158133
)
134+
except exceptions.MapillaryUserError as ex:
135+
# Considered as fatal error if not MapillaryDescriptionError
136+
raise ex
159137
except Exception as ex:
138+
# TODO: hide details if not verbose mode
160139
LOG.exception("Unexpected error extracting metadata from %s", video_path)
161140
return types.describe_error_metadata(
162141
ex, video_path, filetype=types.FileType.VIDEO
163142
)
143+
144+
def _generate_video_extractors(
145+
self, video_paths: T.Sequence[Path]
146+
) -> T.Sequence[TVideoExtractor | types.ErrorMetadata]:
147+
raise NotImplementedError

mapillary_tools/geotag/factory.py

Lines changed: 27 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,13 @@
88
from .. import exceptions, types, utils
99
from ..types import FileType
1010
from . import (
11-
geotag_from_generic,
11+
base,
1212
geotag_images_from_exif,
1313
geotag_images_from_exiftool,
14-
geotag_images_from_exiftool_both_image_and_video,
1514
geotag_images_from_gpx_file,
1615
geotag_images_from_nmea_file,
1716
geotag_images_from_video,
18-
geotag_videos_from_exiftool_video,
17+
geotag_videos_from_exiftool,
1918
geotag_videos_from_gpx,
2019
geotag_videos_from_video,
2120
)
@@ -106,7 +105,7 @@ def _is_reprocessable(metadata: types.MetadataOrError) -> bool:
106105

107106

108107
def _filter_images_and_videos(
109-
file_paths: T.Iterable[Path],
108+
paths: T.Iterable[Path],
110109
filetypes: set[types.FileType] | None = None,
111110
) -> tuple[list[Path], list[Path]]:
112111
image_paths = []
@@ -121,7 +120,7 @@ def _filter_images_and_videos(
121120
include_images = types.FileType.IMAGE in filetypes
122121
include_videos = bool(filetypes & ALL_VIDEO_TYPES)
123122

124-
for path in file_paths:
123+
for path in paths:
125124
if utils.is_image_file(path):
126125
if include_images:
127126
image_paths.append(path)
@@ -154,60 +153,57 @@ def _geotag_images(
154153
else:
155154
interpolation = option.interpolation
156155

157-
geotag: geotag_from_generic.GeotagImagesFromGeneric
156+
geotag: base.GeotagImagesFromGeneric
158157

159158
if option.source is SourceType.NATIVE:
160159
geotag = geotag_images_from_exif.GeotagImagesFromEXIF(
161-
image_paths, num_processes=option.num_processes
160+
num_processes=option.num_processes
162161
)
163-
return geotag.to_description()
162+
return geotag.to_description(image_paths)
164163

165164
if option.source is SourceType.EXIFTOOL_RUNTIME:
166165
geotag = geotag_images_from_exiftool.GeotagImagesFromExifToolRunner(
167-
image_paths, num_processes=option.num_processes
166+
num_processes=option.num_processes
168167
)
169168
try:
170-
return geotag.to_description()
169+
return geotag.to_description(image_paths)
171170
except exceptions.MapillaryExiftoolNotFoundError as ex:
172171
LOG.warning('Skip "%s" because: %s', option.source.value, ex)
173172
return []
174173

175174
elif option.source is SourceType.EXIFTOOL_XML:
176175
# This is to ensure 'video_process --geotag={"source": "exiftool_xml", "source_path": "/tmp/xml_path"}'
177176
# to work
178-
geotag = geotag_images_from_exiftool_both_image_and_video.GeotagImagesFromExifToolBothImageAndVideo(
179-
image_paths,
177+
geotag = geotag_images_from_exiftool.GeotagImagesFromExifToolWithSamples(
180178
xml_path=_ensure_source_path(option),
181179
num_processes=option.num_processes,
182180
)
183-
return geotag.to_description()
181+
return geotag.to_description(image_paths)
184182

185183
elif option.source is SourceType.GPX:
186184
geotag = geotag_images_from_gpx_file.GeotagImagesFromGPXFile(
187-
image_paths,
188185
source_path=_ensure_source_path(option),
189186
use_gpx_start_time=interpolation.use_gpx_start_time,
190187
offset_time=interpolation.offset_time,
191188
num_processes=option.num_processes,
192189
)
193-
return geotag.to_description()
190+
return geotag.to_description(image_paths)
194191

195192
elif option.source is SourceType.NMEA:
196193
geotag = geotag_images_from_nmea_file.GeotagImagesFromNMEAFile(
197-
image_paths,
198194
source_path=_ensure_source_path(option),
199195
use_gpx_start_time=interpolation.use_gpx_start_time,
200196
offset_time=interpolation.offset_time,
201197
num_processes=option.num_processes,
202198
)
203199

204-
return geotag.to_description()
200+
return geotag.to_description(image_paths)
205201

206202
elif option.source is SourceType.EXIF:
207203
geotag = geotag_images_from_exif.GeotagImagesFromEXIF(
208-
image_paths, num_processes=option.num_processes
204+
num_processes=option.num_processes
209205
)
210-
return geotag.to_description()
206+
return geotag.to_description(image_paths)
211207

212208
elif option.source in [
213209
SourceType.GOPRO,
@@ -225,17 +221,15 @@ def _geotag_images(
225221
)
226222
video_paths_with_image_samples = list(image_samples_by_video_path.keys())
227223
video_metadatas = geotag_videos_from_video.GeotagVideosFromVideo(
228-
video_paths_with_image_samples,
229224
filetypes={map_geotag_source_to_filetype[option.source]},
230225
num_processes=option.num_processes,
231-
).to_description()
226+
).to_description(video_paths_with_image_samples)
232227
geotag = geotag_images_from_video.GeotagImagesFromVideo(
233-
image_paths,
234228
video_metadatas,
235229
offset_time=interpolation.offset_time,
236230
num_processes=option.num_processes,
237231
)
238-
return geotag.to_description()
232+
return geotag.to_description(image_paths)
239233

240234
else:
241235
raise ValueError(f"Invalid geotag source {option.source}")
@@ -249,34 +243,33 @@ def _geotag_videos(
249243
if not video_paths:
250244
return []
251245

252-
geotag: geotag_from_generic.GeotagVideosFromGeneric
246+
geotag: base.GeotagVideosFromGeneric
253247

254248
if option.source is SourceType.NATIVE:
255249
geotag = geotag_videos_from_video.GeotagVideosFromVideo(
256-
video_paths, num_processes=option.num_processes, filetypes=option.filetypes
250+
num_processes=option.num_processes, filetypes=option.filetypes
257251
)
258-
return geotag.to_description()
252+
return geotag.to_description(video_paths)
259253

260254
if option.source is SourceType.EXIFTOOL_RUNTIME:
261-
geotag = geotag_videos_from_exiftool_video.GeotagVideosFromExifToolRunner(
262-
video_paths, num_processes=option.num_processes
255+
geotag = geotag_videos_from_exiftool.GeotagVideosFromExifToolRunner(
256+
num_processes=option.num_processes
263257
)
264258
try:
265-
return geotag.to_description()
259+
return geotag.to_description(video_paths)
266260
except exceptions.MapillaryExiftoolNotFoundError as ex:
267261
LOG.warning('Skip "%s" because: %s', option.source.value, ex)
268262
return []
269263

270264
elif option.source is SourceType.EXIFTOOL_XML:
271-
geotag = geotag_videos_from_exiftool_video.GeotagVideosFromExifToolVideo(
272-
video_paths,
265+
geotag = geotag_videos_from_exiftool.GeotagVideosFromExifToolXML(
273266
xml_path=_ensure_source_path(option),
274267
)
275-
return geotag.to_description()
268+
return geotag.to_description(video_paths)
276269

277270
elif option.source is SourceType.GPX:
278-
geotag = geotag_videos_from_gpx.GeotagVideosFromGPX(video_paths)
279-
return geotag.to_description()
271+
geotag = geotag_videos_from_gpx.GeotagVideosFromGPX()
272+
return geotag.to_description(video_paths)
280273

281274
elif option.source is SourceType.NMEA:
282275
# TODO: geotag videos from NMEA

0 commit comments

Comments
 (0)