Skip to content

Commit 2c14e95

Browse files
authored
improve logging (#769)
* better error message in sequence processing * improve process summary * capture invalid video * add markers * add humanize * logging for upload * log checksum * update tests
1 parent aa18889 commit 2c14e95

File tree

12 files changed

+132
-68
lines changed

12 files changed

+132
-68
lines changed

mapillary_tools/constants.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,8 @@ def _parse_scaled_integers(
120120
CUTOFF_TIME = float(os.getenv(_ENV_PREFIX + "CUTOFF_TIME", 60))
121121
DUPLICATE_DISTANCE = float(os.getenv(_ENV_PREFIX + "DUPLICATE_DISTANCE", 0.1))
122122
DUPLICATE_ANGLE = float(os.getenv(_ENV_PREFIX + "DUPLICATE_ANGLE", 5))
123-
MAX_AVG_SPEED = float(
124-
os.getenv(_ENV_PREFIX + "MAX_AVG_SPEED", 400_000 / 3600)
123+
MAX_CAPTURE_SPEED_KMH = float(
124+
os.getenv(_ENV_PREFIX + "MAX_CAPTURE_SPEED_KMH", 400)
125125
) # 400 KM/h
126126
# WARNING: Changing the following envvars might result in failed uploads
127127
# Max number of images per sequence

mapillary_tools/exceptions.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,10 @@ class MapillaryVideoGPSNotFoundError(MapillaryDescriptionError):
5151
pass
5252

5353

54+
class MapillaryInvalidVideoError(MapillaryDescriptionError):
55+
pass
56+
57+
5458
class MapillaryGPXEmptyError(MapillaryDescriptionError):
5559
pass
5660

mapillary_tools/geotag/factory.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,14 @@ def process(
6767
reprocessable_paths = set(paths)
6868

6969
for idx, option in enumerate(options):
70-
LOG.debug("Processing %d files with %s", len(reprocessable_paths), option)
70+
if LOG.getEffectiveLevel() <= logging.DEBUG:
71+
LOG.info(
72+
f"==> Processing {len(reprocessable_paths)} files with source {option}..."
73+
)
74+
else:
75+
LOG.info(
76+
f"==> Processing {len(reprocessable_paths)} files with source {option.source.value}..."
77+
)
7178

7279
image_videos, video_paths = _filter_images_and_videos(
7380
reprocessable_paths, option.filetypes

mapillary_tools/geotag/video_extractors/native.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from ... import blackvue_parser, exceptions, geo, telemetry, types, utils
1313
from ...camm import camm_parser
1414
from ...gpmf import gpmf_gps_filter, gpmf_parser
15+
from ...mp4 import construct_mp4_parser, simple_mp4_parser
1516
from .base import BaseVideoExtractor
1617

1718

@@ -113,20 +114,44 @@ def extract(self) -> types.VideoMetadata:
113114
extractor = GoProVideoExtractor(self.video_path)
114115
try:
115116
return extractor.extract()
117+
except simple_mp4_parser.BoxNotFoundError as ex:
118+
raise exceptions.MapillaryInvalidVideoError(
119+
f"Invalid video: {ex}"
120+
) from ex
121+
except construct_mp4_parser.BoxNotFoundError as ex:
122+
raise exceptions.MapillaryInvalidVideoError(
123+
f"Invalid video: {ex}"
124+
) from ex
116125
except exceptions.MapillaryVideoGPSNotFoundError:
117126
pass
118127

119128
if ft is None or types.FileType.VIDEO in ft or types.FileType.CAMM in ft:
120129
extractor = CAMMVideoExtractor(self.video_path)
121130
try:
122131
return extractor.extract()
132+
except simple_mp4_parser.BoxNotFoundError as ex:
133+
raise exceptions.MapillaryInvalidVideoError(
134+
f"Invalid video: {ex}"
135+
) from ex
136+
except construct_mp4_parser.BoxNotFoundError as ex:
137+
raise exceptions.MapillaryInvalidVideoError(
138+
f"Invalid video: {ex}"
139+
) from ex
123140
except exceptions.MapillaryVideoGPSNotFoundError:
124141
pass
125142

126143
if ft is None or types.FileType.VIDEO in ft or types.FileType.BLACKVUE in ft:
127144
extractor = BlackVueVideoExtractor(self.video_path)
128145
try:
129146
return extractor.extract()
147+
except simple_mp4_parser.BoxNotFoundError as ex:
148+
raise exceptions.MapillaryInvalidVideoError(
149+
f"Invalid video: {ex}"
150+
) from ex
151+
except construct_mp4_parser.BoxNotFoundError as ex:
152+
raise exceptions.MapillaryInvalidVideoError(
153+
f"Invalid video: {ex}"
154+
) from ex
130155
except exceptions.MapillaryVideoGPSNotFoundError:
131156
pass
132157

mapillary_tools/mp4/construct_mp4_parser.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,10 @@ class BoxDict(T.TypedDict, total=True):
370370
SwitchMapType = T.Dict[BoxType, T.Union[C.Construct, "SwitchMapType"]]
371371

372372

373+
class BoxNotFoundError(Exception):
374+
pass
375+
376+
373377
class Box64ConstructBuilder:
374378
"""
375379
Build a box struct that **parses** MP4 boxes with both 32-bit and 64-bit sizes.
@@ -591,7 +595,7 @@ def find_box_at_pathx(
591595
) -> BoxDict:
592596
found = find_box_at_path(box, path)
593597
if found is None:
594-
raise ValueError(f"box at path {path} not found")
598+
raise BoxNotFoundError(f"box at path {path} not found")
595599
return found
596600

597601

mapillary_tools/process_geotag_properties.py

Lines changed: 22 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
from __future__ import annotations
22

3-
import collections
43
import datetime
54
import logging
65
import typing as T
76
from pathlib import Path
87

8+
import humanize
99
from tqdm import tqdm
1010

1111
from . import constants, exceptions, exif_write, types, utils
@@ -217,17 +217,19 @@ def _write_metadatas(
217217
LOG.info("Check the description file for details: %s", desc_path)
218218

219219

220-
def _is_error_skipped(error_type: str, skipped_process_errors: set[T.Type[Exception]]):
221-
skipped_process_error_names = set(err.__name__ for err in skipped_process_errors)
222-
skip_all = Exception in skipped_process_errors
223-
return skip_all or error_type in skipped_process_error_names
220+
def _is_error_skipped(
221+
error_type: type[Exception], skipped_process_errors: set[type[Exception]]
222+
):
223+
return (Exception in skipped_process_errors) or (
224+
error_type in skipped_process_errors
225+
)
224226

225227

226228
def _show_stats(
227229
metadatas: T.Sequence[types.MetadataOrError],
228230
skipped_process_errors: set[T.Type[Exception]],
229231
) -> None:
230-
LOG.info("========== Process summary ==========")
232+
LOG.info("==> Process summary")
231233

232234
metadatas_by_filetype: dict[types.FileType, list[types.MetadataOrError]] = {}
233235
for metadata in metadatas:
@@ -244,9 +246,7 @@ def _show_stats(
244246
metadata
245247
for metadata in metadatas
246248
if isinstance(metadata, types.ErrorMetadata)
247-
and not _is_error_skipped(
248-
metadata.error.__class__.__name__, skipped_process_errors
249-
)
249+
and not _is_error_skipped(type(metadata.error), skipped_process_errors)
250250
]
251251
if critical_error_metadatas:
252252
raise exceptions.MapillaryProcessError(
@@ -262,38 +262,35 @@ def _show_stats_per_filetype(
262262
good_metadatas: list[types.Metadata]
263263
good_metadatas, error_metadatas = types.separate_errors(metadatas)
264264

265-
filesize_to_upload = sum(
266-
[0 if m.filesize is None else m.filesize for m in good_metadatas]
267-
)
268-
269-
LOG.info("%8d %s(s) read in total", len(metadatas), filetype.value)
265+
LOG.info(f"{len(metadatas)} {filetype.value} read in total")
270266
if good_metadatas:
267+
total_filesize = sum(
268+
[0 if m.filesize is None else m.filesize for m in good_metadatas]
269+
)
271270
LOG.info(
272-
"\t %8d %s(s) (%s MB) are ready to be uploaded",
273-
len(good_metadatas),
274-
filetype.value,
275-
round(filesize_to_upload / 1024 / 1024, 1),
271+
f"\t{len(good_metadatas)} ({humanize.naturalsize(total_filesize)}) ready"
276272
)
277273

278-
error_counter = collections.Counter(
279-
metadata.error.__class__.__name__ for metadata in error_metadatas
280-
)
274+
errors_by_type: dict[type[Exception], list[types.ErrorMetadata]] = {}
275+
for metadata in error_metadatas:
276+
errors_by_type.setdefault(type(metadata.error), []).append(metadata)
281277

282-
for error_type, count in error_counter.items():
278+
for error_type, errors in errors_by_type.items():
279+
total_filesize = sum([utils.get_file_size_quietly(m.filename) for m in errors])
283280
if _is_error_skipped(error_type, skipped_process_errors):
284281
LOG.warning(
285-
"\t %8d %s(s) skipped due to %s", count, filetype.value, error_type
282+
f"\t{len(errors)} ({humanize.naturalsize(total_filesize)}) {error_type.__name__}"
286283
)
287284
else:
288285
LOG.error(
289-
"\t %8d %s(s) failed due to %s", count, filetype.value, error_type
286+
f"\t{len(errors)} ({humanize.naturalsize(total_filesize)}) {error_type.__name__}"
290287
)
291288

292289

293290
def _validate_metadatas(
294291
metadatas: T.Collection[types.MetadataOrError], num_processes: int | None
295292
) -> list[types.MetadataOrError]:
296-
LOG.debug("Validating %d metadatas", len(metadatas))
293+
LOG.info(f"==> Validating {len(metadatas)} metadatas...")
297294

298295
# validating metadatas is slow, hence multiprocessing
299296

mapillary_tools/process_sequence_properties.py

Lines changed: 33 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
import os
88
import typing as T
99

10+
import humanize
11+
1012
from . import constants, exceptions, geo, types, utils
1113
from .serializer.description import DescriptionJSONSerializer
1214

@@ -215,7 +217,7 @@ def _is_video_stationary(
215217
def _check_video_limits(
216218
video_metadatas: T.Iterable[types.VideoMetadata],
217219
max_sequence_filesize_in_bytes: int | None,
218-
max_avg_speed: float,
220+
max_capture_speed_kmh: float,
219221
max_radius_for_stationary_check: float,
220222
) -> tuple[list[types.VideoMetadata], list[types.ErrorMetadata]]:
221223
output_video_metadatas: list[types.VideoMetadata] = []
@@ -238,7 +240,7 @@ def _check_video_limits(
238240
)
239241
if video_filesize > max_sequence_filesize_in_bytes:
240242
raise exceptions.MapillaryFileTooLargeError(
241-
f"Video file size exceeds the maximum allowed file size ({max_sequence_filesize_in_bytes} bytes)",
243+
f"Video file size {humanize.naturalsize(video_filesize)} exceeds max allowed {humanize.naturalsize(max_sequence_filesize_in_bytes)}",
242244
)
243245

244246
contains_null_island = any(
@@ -249,15 +251,19 @@ def _check_video_limits(
249251
"GPS coordinates in Null Island (0, 0)"
250252
)
251253

254+
avg_speed_kmh = (
255+
geo.avg_speed(video_metadata.points) * 3.6
256+
) # Convert m/s to km/h
252257
too_fast = (
253258
len(video_metadata.points) >= 2
254-
and geo.avg_speed(video_metadata.points) > max_avg_speed
259+
and avg_speed_kmh > max_capture_speed_kmh
255260
)
256261
if too_fast:
257262
raise exceptions.MapillaryCaptureSpeedTooFastError(
258-
f"Capture speed too fast (exceeds {round(max_avg_speed, 3)} m/s)",
263+
f"Capture speed {avg_speed_kmh:.3f} km/h exceeds max allowed {max_capture_speed_kmh:.3f} km/h",
259264
)
260265
except exceptions.MapillaryDescriptionError as ex:
266+
LOG.error(f"{_video_name(video_metadata)}: {ex}")
261267
error_metadatas.append(
262268
types.describe_error_metadata(
263269
exc=ex,
@@ -268,18 +274,17 @@ def _check_video_limits(
268274
else:
269275
output_video_metadatas.append(video_metadata)
270276

271-
if error_metadatas:
272-
LOG.info(
273-
f"Video validation: {len(output_video_metadatas)} valid, {len(error_metadatas)} errors"
274-
)
275-
276277
return output_video_metadatas, error_metadatas
277278

278279

280+
def _video_name(video_metadata: types.VideoMetadata) -> str:
281+
return video_metadata.filename.name
282+
283+
279284
def _check_sequences_by_limits(
280285
input_sequences: T.Sequence[PointSequence],
281286
max_sequence_filesize_in_bytes: int | None,
282-
max_avg_speed: float,
287+
max_capture_speed_kmh: float,
283288
) -> tuple[list[PointSequence], list[types.ErrorMetadata]]:
284289
output_sequences: list[PointSequence] = []
285290
output_errors: list[types.ErrorMetadata] = []
@@ -295,7 +300,7 @@ def _check_sequences_by_limits(
295300
)
296301
if sequence_filesize > max_sequence_filesize_in_bytes:
297302
raise exceptions.MapillaryFileTooLargeError(
298-
f"Sequence file size exceeds the maximum allowed file size ({max_sequence_filesize_in_bytes} bytes)",
303+
f"Sequence file size {humanize.naturalsize(sequence_filesize)} exceeds max allowed {humanize.naturalsize(max_sequence_filesize_in_bytes)}",
299304
)
300305

301306
contains_null_island = any(
@@ -306,12 +311,14 @@ def _check_sequences_by_limits(
306311
"GPS coordinates in Null Island (0, 0)"
307312
)
308313

309-
too_fast = len(sequence) >= 2 and geo.avg_speed(sequence) > max_avg_speed
314+
avg_speed_kmh = geo.avg_speed(sequence) * 3.6 # Convert m/s to km/h
315+
too_fast = len(sequence) >= 2 and avg_speed_kmh > max_capture_speed_kmh
310316
if too_fast:
311317
raise exceptions.MapillaryCaptureSpeedTooFastError(
312-
f"Capture speed too fast (exceeds {round(max_avg_speed, 3)} m/s)",
318+
f"Capture speed {avg_speed_kmh:.3f} km/h exceeds max allowed {max_capture_speed_kmh:.3f} km/h",
313319
)
314320
except exceptions.MapillaryDescriptionError as ex:
321+
LOG.error(f"{_sequence_name(sequence)}: {ex}")
315322
for image in sequence:
316323
output_errors.append(
317324
types.describe_error_metadata(
@@ -326,14 +333,16 @@ def _check_sequences_by_limits(
326333
len(s) for s in input_sequences
327334
)
328335

329-
if output_errors:
330-
LOG.info(
331-
f"Sequence validation: {len(output_sequences)} valid, {len(output_errors)} errors"
332-
)
333-
334336
return output_sequences, output_errors
335337

336338

339+
def _sequence_name(sequence: T.Sequence[types.ImageMetadata]) -> str:
340+
if not sequence:
341+
return "N/A"
342+
image = sequence[0]
343+
return f"{image.filename.parent.name}/{image.filename.name}"
344+
345+
337346
def _group_by_folder_and_camera(
338347
image_metadatas: list[types.ImageMetadata],
339348
) -> list[list[types.ImageMetadata]]:
@@ -594,8 +603,10 @@ def process_sequence_properties(
594603
interpolate_directions: bool = False,
595604
duplicate_distance: float = constants.DUPLICATE_DISTANCE,
596605
duplicate_angle: float = constants.DUPLICATE_ANGLE,
597-
max_avg_speed: float = constants.MAX_AVG_SPEED,
606+
max_capture_speed_kmh: float = constants.MAX_CAPTURE_SPEED_KMH,
598607
) -> list[types.MetadataOrError]:
608+
LOG.info("==> Processing sequences...")
609+
599610
max_sequence_filesize_in_bytes = constants.MAX_SEQUENCE_FILESIZE
600611
max_sequence_pixels = constants.MAX_SEQUENCE_PIXELS
601612

@@ -611,14 +622,14 @@ def process_sequence_properties(
611622
elif isinstance(metadata, types.VideoMetadata):
612623
video_metadatas.append(metadata)
613624
else:
614-
raise RuntimeError(f"invalid metadata type: {metadata}")
625+
raise ValueError(f"invalid metadata type: {metadata}")
615626

616627
if video_metadatas:
617628
# Check limits for videos
618629
video_metadatas, video_error_metadatas = _check_video_limits(
619630
video_metadatas,
620631
max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes,
621-
max_avg_speed=max_avg_speed,
632+
max_capture_speed_kmh=max_capture_speed_kmh,
622633
max_radius_for_stationary_check=10.0,
623634
)
624635
error_metadatas.extend(video_error_metadatas)
@@ -668,7 +679,7 @@ def process_sequence_properties(
668679
sequences, errors = _check_sequences_by_limits(
669680
sequences,
670681
max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes,
671-
max_avg_speed=max_avg_speed,
682+
max_capture_speed_kmh=max_capture_speed_kmh,
672683
)
673684
error_metadatas.extend(errors)
674685

0 commit comments

Comments
 (0)