diff --git a/mapillary_tools/constants.py b/mapillary_tools/constants.py index 64be1202a..8f88fb781 100644 --- a/mapillary_tools/constants.py +++ b/mapillary_tools/constants.py @@ -7,10 +7,15 @@ ANSI_BOLD = "\033[1m" ANSI_RESET_ALL = "\033[0m" +# In meters CUTOFF_DISTANCE = float(os.getenv(_ENV_PREFIX + "CUTOFF_DISTANCE", 600)) +# In seconds CUTOFF_TIME = float(os.getenv(_ENV_PREFIX + "CUTOFF_TIME", 60)) DUPLICATE_DISTANCE = float(os.getenv(_ENV_PREFIX + "DUPLICATE_DISTANCE", 0.1)) DUPLICATE_ANGLE = float(os.getenv(_ENV_PREFIX + "DUPLICATE_ANGLE", 5)) +MAX_AVG_SPEED = float( + os.getenv(_ENV_PREFIX + "MAX_AVG_SPEED", 400_000 / 3600) +) # 400 KM/h # in seconds VIDEO_SAMPLE_INTERVAL = float(os.getenv(_ENV_PREFIX + "VIDEO_SAMPLE_INTERVAL", -1)) # in meters diff --git a/mapillary_tools/exceptions.py b/mapillary_tools/exceptions.py index 16c3f6e6a..45d1491b9 100644 --- a/mapillary_tools/exceptions.py +++ b/mapillary_tools/exceptions.py @@ -118,3 +118,15 @@ class MapillaryUploadUnauthorizedError(MapillaryUserError): class MapillaryMetadataValidationError(MapillaryUserError, MapillaryDescriptionError): exit_code = 15 + + +class MapillaryFileTooLargeError(MapillaryDescriptionError): + pass + + +class MapillaryCaptureSpeedTooFastError(MapillaryDescriptionError): + pass + + +class MapillaryNullIslandError(MapillaryDescriptionError): + pass diff --git a/mapillary_tools/process_sequence_properties.py b/mapillary_tools/process_sequence_properties.py index ff039a41e..7f77a7d5e 100644 --- a/mapillary_tools/process_sequence_properties.py +++ b/mapillary_tools/process_sequence_properties.py @@ -4,20 +4,19 @@ import os import typing as T -from . import constants, geo, types -from .exceptions import MapillaryBadParameterError, MapillaryDuplicationError +from . import constants, exceptions, geo, types, utils LOG = logging.getLogger(__name__) -Point = T.TypeVar("Point", bound=geo.Point) -PointSequence = T.List[Point] +PointLike = T.TypeVar("PointLike", bound=geo.Point) +PointSequence = T.List[PointLike] -def cut_sequence_by_time_distance( +def cut_sequence_by_time_or_distance( sequence: PointSequence, - cutoff_distance: float, - cutoff_time: float, + cutoff_distance: T.Optional[float] = None, + cutoff_time: T.Optional[float] = None, ) -> T.List[PointSequence]: sequences: T.List[PointSequence] = [] @@ -26,18 +25,41 @@ def cut_sequence_by_time_distance( for prev, cur in geo.pairwise(sequence): # invariant: prev is processed + + # Cut by distance distance = geo.gps_distance( (prev.lat, prev.lon), (cur.lat, cur.lon), ) - if cutoff_distance <= distance: - sequences.append([cur]) - continue + if cutoff_distance is not None: + if cutoff_distance <= distance: + LOG.debug( + "Cut the sequence because the distance gap between two images (%s meters) exceeds the cutoff distance (%s meters): %s: %s -> %s", + round(distance, 2), + round(cutoff_distance, 2), + prev.filename.parent, + prev.filename.name, + cur.filename.name, + ) + sequences.append([cur]) + continue + + # Cut by time time_diff = cur.time - prev.time assert 0 <= time_diff, "sequence must be sorted by capture times" - if cutoff_time <= time_diff: - sequences.append([cur]) - continue + if cutoff_time is not None: + if cutoff_time <= time_diff: + LOG.debug( + "Cut the sequence because the time gap between two images (%s seconds) exceeds the cutoff time (%s seconds): %s: %s -> %s", + round(time_diff, 2), + round(cutoff_time, 2), + prev.filename.parent, + prev.filename.name, + cur.filename.name, + ) + sequences.append([cur]) + continue + sequences[-1].append(cur) # invariant: cur is processed @@ -76,7 +98,7 @@ def duplication_check( msg = f"Duplicate of its previous image in terms of distance <= {max_duplicate_distance} and angle <= {max_duplicate_angle}" dups.append( types.describe_error_metadata( - MapillaryDuplicationError( + exceptions.MapillaryDuplicationError( msg, types.as_desc(cur), distance=distance, @@ -159,22 +181,14 @@ def cut_sequence( return sequences -def _group_sort_images_by_folder( +def _group_by( image_metadatas: T.List[types.ImageMetadata], -) -> T.List[T.List[types.ImageMetadata]]: - # group images by parent directory - sequences_by_parent: T.Dict[str, T.List[types.ImageMetadata]] = {} - for image_metadata in image_metadatas: - filename = image_metadata.filename.resolve() - sequences_by_parent.setdefault(str(filename.parent), []).append(image_metadata) - - sequences = list(sequences_by_parent.values()) - for sequence in sequences: - sequence.sort( - key=lambda metadata: metadata.sort_key(), - ) - - return sequences + group_key_func=T.Callable[[types.ImageMetadata], T.Hashable], +) -> T.Dict[T.Hashable, T.List[types.ImageMetadata]]: + grouped: T.Dict[T.Hashable, T.List[types.ImageMetadata]] = {} + for metadata in image_metadatas: + grouped.setdefault(group_key_func(metadata), []).append(metadata) + return grouped def _interpolate_subsecs_for_sorting(sequence: PointSequence) -> None: @@ -218,54 +232,181 @@ def _interpolate_subsecs_for_sorting(sequence: PointSequence) -> None: def _parse_filesize_in_bytes(filesize_str: str) -> int: filesize_str = filesize_str.strip().upper() - if filesize_str.endswith("B"): - return int(filesize_str[:-1]) - elif filesize_str.endswith("K"): - return int(filesize_str[:-1]) * 1024 - elif filesize_str.endswith("M"): - return int(filesize_str[:-1]) * 1024 * 1024 - elif filesize_str.endswith("G"): - return int(filesize_str[:-1]) * 1024 * 1024 * 1024 - else: - return int(filesize_str) + try: + if filesize_str.endswith("B"): + return int(filesize_str[:-1]) + elif filesize_str.endswith("K"): + return int(filesize_str[:-1]) * 1024 + elif filesize_str.endswith("M"): + return int(filesize_str[:-1]) * 1024 * 1024 + elif filesize_str.endswith("G"): + return int(filesize_str[:-1]) * 1024 * 1024 * 1024 + else: + return int(filesize_str) + except ValueError: + raise exceptions.MapillaryBadParameterError( + f"Expect valid file size that ends with B, K, M, or G, but got {filesize_str}" + ) def _parse_pixels(pixels_str: str) -> int: pixels_str = pixels_str.strip().upper() - if pixels_str.endswith("K"): - return int(pixels_str[:-1]) * 1000 - elif pixels_str.endswith("M"): - return int(pixels_str[:-1]) * 1000 * 1000 - elif pixels_str.endswith("G"): - return int(pixels_str[:-1]) * 1000 * 1000 * 1000 + try: + if pixels_str.endswith("K"): + return int(pixels_str[:-1]) * 1000 + elif pixels_str.endswith("M"): + return int(pixels_str[:-1]) * 1000 * 1000 + elif pixels_str.endswith("G"): + return int(pixels_str[:-1]) * 1000 * 1000 * 1000 + else: + return int(pixels_str) + except ValueError: + raise exceptions.MapillaryBadParameterError( + f"Expect valid number of pixels that ends with K, M, or G, but got {pixels_str}" + ) + + +def _avg_speed(sequence: T.Sequence[PointLike]) -> float: + total_distance = 0.0 + for cur, nxt in geo.pairwise(sequence): + total_distance += geo.gps_distance( + (cur.lat, cur.lon), + (nxt.lat, nxt.lon), + ) + + if sequence: + time_diff = sequence[-1].time - sequence[0].time else: - return int(pixels_str) + time_diff = 0.0 + + if time_diff == 0.0: + return float("inf") + + return total_distance / time_diff + + +def _check_video_limits( + video_metadatas: T.Sequence[types.VideoMetadata], + max_sequence_filesize_in_bytes: int, + max_avg_speed: float, +) -> T.Tuple[T.List[types.VideoMetadata], T.List[types.ErrorMetadata]]: + error_metadatas: T.List[types.ErrorMetadata] = [] + output_video_metadatas: T.List[types.VideoMetadata] = [] + + for video_metadata in video_metadatas: + if video_metadata.filesize is None: + filesize = utils.get_file_size(video_metadata.filename) + else: + filesize = video_metadata.filesize + + if filesize > max_sequence_filesize_in_bytes: + error_metadatas.append( + types.describe_error_metadata( + exc=exceptions.MapillaryFileTooLargeError( + f"Video file size exceeds the maximum allowed file size ({max_sequence_filesize_in_bytes} bytes)", + ), + filename=video_metadata.filename, + filetype=video_metadata.filetype, + ) + ) + elif any(p.lat == 0 and p.lon == 0 for p in video_metadata.points): + error_metadatas.append( + types.describe_error_metadata( + exc=exceptions.MapillaryNullIslandError( + "Found GPS coordinates in Null Island (0, 0)", + ), + filename=video_metadata.filename, + filetype=video_metadata.filetype, + ) + ) + elif ( + len(video_metadata.points) >= 2 + and _avg_speed(video_metadata.points) > max_avg_speed + ): + error_metadatas.append( + types.describe_error_metadata( + exc=exceptions.MapillaryCaptureSpeedTooFastError( + f"Capture speed is too fast (exceeds {round(max_avg_speed, 3)} m/s)", + ), + filename=video_metadata.filename, + filetype=video_metadata.filetype, + ) + ) + else: + output_video_metadatas.append(video_metadata) + + return output_video_metadatas, error_metadatas + + +def _check_sequence_limits( + sequences: T.Sequence[PointSequence], + max_sequence_filesize_in_bytes: int, + max_avg_speed: float, +) -> T.Tuple[T.List[PointSequence], T.List[types.ErrorMetadata]]: + error_metadatas: T.List[types.ErrorMetadata] = [] + output_sequences: T.List[PointSequence] = [] + + for sequence in sequences: + filesize = 0 + for image in sequence: + if image.filesize is None: + filesize += utils.get_file_size(image.filename) + else: + filesize += image.filesize + + if filesize > max_sequence_filesize_in_bytes: + for image in sequence: + error_metadatas.append( + types.describe_error_metadata( + exc=exceptions.MapillaryFileTooLargeError( + f"Sequence file size exceeds the maximum allowed file size ({max_sequence_filesize_in_bytes} bytes)", + ), + filename=image.filename, + filetype=types.FileType.IMAGE, + ) + ) + elif any(image.lat == 0 and image.lon == 0 for image in sequence): + for image in sequence: + error_metadatas.append( + types.describe_error_metadata( + exc=exceptions.MapillaryNullIslandError( + "Found GPS coordinates in Null Island (0, 0)", + ), + filename=image.filename, + filetype=types.FileType.IMAGE, + ) + ) + elif len(sequence) >= 2 and _avg_speed(sequence) > max_avg_speed: + for image in sequence: + error_metadatas.append( + types.describe_error_metadata( + exc=exceptions.MapillaryCaptureSpeedTooFastError( + f"Capture speed is too fast (exceeds {round(max_avg_speed, 3)} m/s)", + ), + filename=image.filename, + filetype=types.FileType.IMAGE, + ) + ) + else: + output_sequences.append(sequence) + + return output_sequences, error_metadatas def process_sequence_properties( metadatas: T.Sequence[types.MetadataOrError], - cutoff_distance=constants.CUTOFF_DISTANCE, - cutoff_time=constants.CUTOFF_TIME, - interpolate_directions=False, - duplicate_distance=constants.DUPLICATE_DISTANCE, - duplicate_angle=constants.DUPLICATE_ANGLE, + cutoff_distance: float = constants.CUTOFF_DISTANCE, + cutoff_time: float = constants.CUTOFF_TIME, + interpolate_directions: bool = False, + duplicate_distance: float = constants.DUPLICATE_DISTANCE, + duplicate_angle: float = constants.DUPLICATE_ANGLE, + max_avg_speed: float = constants.MAX_AVG_SPEED, ) -> T.List[types.MetadataOrError]: - try: - max_sequence_filesize_in_bytes = _parse_filesize_in_bytes( - constants.MAX_SEQUENCE_FILESIZE - ) - except ValueError: - raise MapillaryBadParameterError( - f"Expect the envvar {constants._ENV_PREFIX}MAX_SEQUENCE_FILESIZE to be a valid filesize that ends with B, K, M, or G, but got {constants.MAX_SEQUENCE_FILESIZE}" - ) - - try: - max_sequence_pixels = _parse_pixels(constants.MAX_SEQUENCE_PIXELS) - except ValueError: - raise MapillaryBadParameterError( - f"Expect the envvar {constants._ENV_PREFIX}MAX_SEQUENCE_PIXELS to be a valid number of pixels that ends with K, M, or G, but got {constants.MAX_SEQUENCE_PIXELS}" - ) + max_sequence_filesize_in_bytes = _parse_filesize_in_bytes( + constants.MAX_SEQUENCE_FILESIZE + ) + max_sequence_pixels = _parse_pixels(constants.MAX_SEQUENCE_PIXELS) error_metadatas: T.List[types.ErrorMetadata] = [] image_metadatas: T.List[types.ImageMetadata] = [] @@ -281,58 +422,134 @@ def process_sequence_properties( else: raise RuntimeError(f"invalid metadata type: {metadata}") - sequences_by_folder = _group_sort_images_by_folder(image_metadatas) - # make sure they are sorted - for sequence in sequences_by_folder: - for cur, nxt in geo.pairwise(sequence): - assert cur.time <= nxt.time, "sequence must be sorted" - - for s in sequences_by_folder: - _interpolate_subsecs_for_sorting(s) - - # cut sequences - sequences_after_cut: T.List[PointSequence] = [] - for sequence in sequences_by_folder: - cut = cut_sequence_by_time_distance(sequence, cutoff_distance, cutoff_time) - sequences_after_cut.extend(cut) - assert len(image_metadatas) == sum(len(s) for s in sequences_after_cut) - - # reuse imaeg_metadatas to store processed image metadatas - image_metadatas = [] + # Check limits for videos + video_metadatas, video_error_metadatas = _check_video_limits( + video_metadatas, + max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes, + max_avg_speed=max_avg_speed, + ) + error_metadatas.extend(video_error_metadatas) + + input_sequences: T.List[PointSequence] + output_sequences: T.List[PointSequence] + + # Group by folder and camera + grouped = _group_by( + image_metadatas, + lambda metadata: ( + str(metadata.filename.parent), + metadata.MAPDeviceMake, + metadata.MAPDeviceModel, + metadata.width, + metadata.height, + ), + ) + for key in grouped: + LOG.debug("Group sequences by %s: %s images", key, len(grouped[key])) + output_sequences = list(grouped.values()) + LOG.info( + "Found %s sequences from different folders and cameras", len(output_sequences) + ) - sequence_idx = 0 + # Make sure each sequence is sorted (in-place update) + input_sequences = output_sequences + for sequence in input_sequences: + sequence.sort( + key=lambda metadata: metadata.sort_key(), + ) + output_sequences = input_sequences + + # Interpolate subseconds for same timestamps (in-place update) + input_sequences = output_sequences + for sequence in input_sequences: + _interpolate_subsecs_for_sorting(sequence) + output_sequences = input_sequences + + # Cut sequences by cutoff time + # NOTE: Do not cut by distance here because it affects the speed limit check + input_sequences = output_sequences + output_sequences = [] + for sequence in input_sequences: + output_sequences.extend( + cut_sequence_by_time_or_distance(sequence, cutoff_time=cutoff_time) + ) + LOG.info( + "Found %s sequences after cut by cutoff time %d seconds", + len(output_sequences), + cutoff_time, + ) - for sequence in sequences_after_cut: - # duplication check - dedups, dups = duplication_check( + # Duplication check + input_sequences = output_sequences + output_sequences = [] + for sequence in input_sequences: + output_sequence, errors = duplication_check( sequence, max_duplicate_distance=duplicate_distance, max_duplicate_angle=duplicate_angle, ) - assert len(sequence) == len(dedups) + len(dups) - error_metadatas.extend(dups) + assert len(sequence) == len(output_sequence) + len(errors) + output_sequences.append(output_sequence) + error_metadatas.extend(errors) - # interpolate angles + # Interpolate angles (in-place update) + input_sequences = output_sequences + for sequence in input_sequences: if interpolate_directions: - for p in dedups: - p.angle = None - geo.interpolate_directions_if_none(dedups) - - # cut sequence per MAX_SEQUENCE_LENGTH images - cut = cut_sequence( - dedups, - constants.MAX_SEQUENCE_LENGTH, - max_sequence_filesize_in_bytes, - max_sequence_pixels, + for image in sequence: + image.angle = None + geo.interpolate_directions_if_none(sequence) + output_sequences = input_sequences + + # Cut sequences by max number of images, max filesize, and max pixels + input_sequences = output_sequences + output_sequences = [] + for sequence in input_sequences: + output_sequences.extend( + cut_sequence( + sequence, + constants.MAX_SEQUENCE_LENGTH, + max_sequence_filesize_in_bytes, + max_sequence_pixels, + ) ) + LOG.info("Found %s sequences after cut by sequence limits", len(output_sequences)) - # assign sequence UUIDs - for c in cut: - for p in c: - # using incremental id as shorter "uuid", so we can save some space for the desc file - p.MAPSequenceUUID = str(sequence_idx) - image_metadatas.append(p) - sequence_idx += 1 + # Check limits for sequences + input_sequences = output_sequences + output_sequences, errors = _check_sequence_limits( + input_sequences, max_sequence_filesize_in_bytes, max_avg_speed + ) + error_metadatas.extend(errors) + LOG.info("Found %s sequences after sequence limit checks", len(output_sequences)) + + # Cut sequences by cutoff distance + # NOTE: The speed limit check probably rejects most of anomalies + input_sequences = output_sequences + output_sequences = [] + for sequence in input_sequences: + output_sequences.extend( + cut_sequence_by_time_or_distance(sequence, cutoff_distance=cutoff_distance) + ) + LOG.info( + "Found %s sequences after cut by cutoff distance %d meters", + len(output_sequences), + cutoff_distance, + ) + + # Assign sequence UUIDs (in-place update) + sequence_idx = 0 + input_sequences = output_sequences + for sequence in input_sequences: + for image in sequence: + # using incremental id as shorter "uuid", so we can save some space for the desc file + image.MAPSequenceUUID = str(sequence_idx) + sequence_idx += 1 + output_sequences = input_sequences + + image_metadatas = [] + for sequence in input_sequences: + image_metadatas.extend(sequence) results = error_metadatas + image_metadatas + video_metadatas diff --git a/tests/data/gpx/sf_30km_h.gpx b/tests/data/gpx/sf_30km_h.gpx new file mode 100644 index 000000000..b56b45faf --- /dev/null +++ b/tests/data/gpx/sf_30km_h.gpx @@ -0,0 +1,638 @@ + + + + New file 1 + + gpx.studio + + + + + New file 1 + + + 94.75 + + + + + tertiary + asphalt + shared_lane + lane + yes + 5 + + + + + + 93 + + + + + tertiary + asphalt + shared_lane + lane + yes + 5 + + + + + + 92.5 + + + + + tertiary + asphalt + shared_lane + lane + yes + 5 + + + + + + 92 + + + + + tertiary + asphalt + shared_lane + lane + yes + 5 + + + + + + 91.25 + + + + + tertiary + asphalt + shared_lane + lane + yes + 5 + + + + + + 90.75 + + + + + tertiary + asphalt + shared_lane + lane + yes + 5 + + + + + + 89 + + + + + tertiary + asphalt + shared_lane + lane + yes + 5 + + + + + + 85.5 + + + + + tertiary + asphalt + shared_lane + lane + yes + 5 + + + + + + 80 + + + + + tertiary + asphalt + shared_lane + lane + yes + 5 + + + + + + 78 + + + + + service + + + + + + 77.25 + + + + + service + + + + + + 76 + + + + + service + + + + + + 74.75 + + + + + yes + residential + + + + + + 77.5 + + + + + yes + residential + + + + + + 79.5 + + + + + yes + residential + + + + + + 81 + + + + + yes + residential + + + + + + 82.25 + + + + + yes + residential + + + + + + 83 + + + + + yes + residential + + + + + + 83.25 + + + + + yes + residential + + + + + + 83.25 + + + + + yes + residential + + + + + + 82.5 + + + + + yes + residential + + + + + + 81.75 + + + + + yes + residential + + + + + + 80.75 + + + + + yes + residential + + + + + + 80.25 + + + + + yes + residential + + + + + + 79.5 + + + + + yes + residential + + + + + + 76 + + + + + yes + residential + + + + + + 72 + + + + + yes + residential + + + + + + 66.25 + + + + + yes + residential + + + + + + 63.5 + + + + + yes + residential + + + + + + 62 + + + + + yes + residential + + + + + + 59.75 + + + + + yes + residential + + + + + + 56.75 + + + + + yes + residential + + + + + + 53.25 + + + + + yes + residential + + + + + + 49 + + + + + yes + residential + + + + + + 49.25 + + + + + yes + residential + + + + + + 50.5 + + + + + yes + residential + + + + + + 51.5 + + + + + yes + residential + + + + + + 52.75 + + + + + yes + residential + + + + + + 54.25 + + + + + yes + residential + + + + + + 59.75 + + + + + yes + residential + + + + + + 60.75 + + + + + yes + residential + + + + + + 61 + + + + + yes + residential + + + + + + 60.25 + + + + + yes + residential + + + + + + 58 + + + + + yes + residential + + + + + + 56.75 + + + + + yes + residential + + + + + + 55 + + + + + yes + residential + + + + + + 50.75 + + + + + yes + residential + + + + + + 46.25 + + + + + yes + residential + yes + + + + + + 47.75 + + + + + yes + residential + yes + + + + + + + diff --git a/tests/integration/test_gopro.py b/tests/integration/test_gopro.py index 0624dbacd..5ebe90698 100644 --- a/tests/integration/test_gopro.py +++ b/tests/integration/test_gopro.py @@ -18,7 +18,12 @@ IMPORT_PATH = "tests/data/gopro_data" - +TEST_ENVS = { + "MAPILLARY_TOOLS_GOPRO_GPS_FIXES": "0,2,3", + "MAPILLARY_TOOLS_GOPRO_MAX_DOP100": "100000", + "MAPILLARY_TOOLS_GOPRO_GPS_PRECISION": "10000000", + "MAPILLARY_TOOLS_MAX_AVG_SPEED": "200000", # km/h +} EXPECTED_DESCS: T.List[T.Any] = [ { "MAPAltitude": 9540.24, @@ -111,22 +116,8 @@ def setup_data(tmpdir: py.path.local): tmpdir.remove(ignore_errors=True) -@pytest.fixture -def setup_envvars(): - # this sample hero8.mp4 doesn't have any good GPS points, - # so we do not filter out bad GPS points - os.environ["MAPILLARY_TOOLS_GOPRO_GPS_FIXES"] = "0,2,3" - os.environ["MAPILLARY_TOOLS_GOPRO_MAX_DOP100"] = "100000" - os.environ["MAPILLARY_TOOLS_GOPRO_GPS_PRECISION"] = "10000000" - yield - del os.environ["MAPILLARY_TOOLS_GOPRO_GPS_FIXES"] - del os.environ["MAPILLARY_TOOLS_GOPRO_MAX_DOP100"] - del os.environ["MAPILLARY_TOOLS_GOPRO_GPS_PRECISION"] - - @pytest.mark.usefixtures("setup_config") @pytest.mark.usefixtures("setup_upload") -@pytest.mark.usefixtures("setup_envvars") def test_process_gopro_hero8( setup_data: py.path.local, use_exiftool: bool = False, @@ -137,7 +128,9 @@ def test_process_gopro_hero8( args = f"{EXECUTABLE} video_process --video_sample_interval=2 --video_sample_distance=-1 --geotag_source=gopro_videos {str(video_path)}" if use_exiftool: args = run_exiftool_and_generate_geotag_args(setup_data, args) - x = subprocess.run(args, shell=True) + env = os.environ.copy() + env.update(TEST_ENVS) + x = subprocess.run(args, shell=True, env=env) assert x.returncode == 0, x.stderr sample_dir = setup_data.join("mapillary_sampled_video_frames") desc_path = sample_dir.join("mapillary_image_description.json") @@ -150,14 +143,12 @@ def test_process_gopro_hero8( @pytest.mark.usefixtures("setup_config") @pytest.mark.usefixtures("setup_upload") -@pytest.mark.usefixtures("setup_envvars") def test_process_gopro_hero8_with_exiftool(setup_data: py.path.local): return test_process_gopro_hero8(setup_data, use_exiftool=True) @pytest.mark.usefixtures("setup_config") @pytest.mark.usefixtures("setup_upload") -@pytest.mark.usefixtures("setup_envvars") def test_process_gopro_hero8_with_exiftool_multiple_videos_with_the_same_name( setup_data: py.path.local, ): diff --git a/tests/integration/test_process.py b/tests/integration/test_process.py index 2a00d5ba8..d829f1de7 100644 --- a/tests/integration/test_process.py +++ b/tests/integration/test_process.py @@ -14,8 +14,6 @@ run_exiftool_and_generate_geotag_args, setup_config, setup_data, - setup_upload, - USERNAME, validate_and_extract_zip, verify_descs, ) @@ -72,12 +70,6 @@ } -def test_basic(): - for option in ["--version", "--help"]: - x = subprocess.run(f"{EXECUTABLE} {option}", shell=True) - assert x.returncode == 0, x.stderr - - def _local_to_utc(ct: str): return ( datetime.datetime.fromisoformat(ct) @@ -86,6 +78,12 @@ def _local_to_utc(ct: str): ) +def test_basic(): + for option in ["--version", "--help"]: + x = subprocess.run(f"{EXECUTABLE} {option}", shell=True) + assert x.returncode == 0, x.stderr + + def test_process_images_with_defaults( setup_data: py.path.local, use_exiftool: bool = False, @@ -371,15 +369,22 @@ def filter_out_errors(descs): return [desc for desc in descs if "error" not in desc] -def test_geotagging_from_gpx(setup_data: py.path.local): +def test_geotagging_images_from_gpx(setup_data: py.path.local): gpx_file = setup_data.join("test.gpx") with gpx_file.open("w") as fp: fp.write(GPX_CONTENT) + images = setup_data.join("images") + x = subprocess.run( - f"{EXECUTABLE} process --file_types=image {PROCESS_FLAGS} {setup_data} --geotag_source gpx --geotag_source_path {gpx_file} --skip_process_errors", + f"""{EXECUTABLE} process {PROCESS_FLAGS} \ + --file_types=image \ + --geotag_source=gpx \ + --geotag_source_path={gpx_file} \ + --skip_process_errors \ + {images} +""", shell=True, ) - assert x.returncode == 0, x.stderr verify_descs( [ { @@ -405,14 +410,15 @@ def test_geotagging_from_gpx(setup_data: py.path.local): }, }, ], - Path(setup_data, "mapillary_image_description.json"), + Path(images, "mapillary_image_description.json"), ) -def test_geotagging_from_gpx_with_offset(setup_data: py.path.local): +def test_geotagging_images_from_gpx_with_offset(setup_data: py.path.local): gpx_file = setup_data.join("test.gpx") with gpx_file.open("w") as fp: fp.write(GPX_CONTENT) + x = subprocess.run( f"{EXECUTABLE} process --file_types=image {PROCESS_FLAGS} {setup_data} --geotag_source gpx --geotag_source_path {gpx_file} --interpolation_offset_time=-20 --skip_process_errors", shell=True, @@ -449,7 +455,7 @@ def test_geotagging_from_gpx_with_offset(setup_data: py.path.local): ) -def test_geotagging_from_gpx_use_gpx_start_time(setup_data: py.path.local): +def test_geotagging_images_from_gpx_use_gpx_start_time(setup_data: py.path.local): gpx_file = setup_data.join("test.gpx") with gpx_file.open("w") as fp: fp.write(GPX_CONTENT) @@ -489,7 +495,9 @@ def test_geotagging_from_gpx_use_gpx_start_time(setup_data: py.path.local): ) -def test_geotagging_from_gpx_use_gpx_start_time_with_offset(setup_data: py.path.local): +def test_geotagging_images_from_gpx_use_gpx_start_time_with_offset( + setup_data: py.path.local, +): gpx_file = setup_data.join("test.gpx") with gpx_file.open("w") as fp: fp.write(GPX_CONTENT) @@ -643,19 +651,28 @@ def test_video_process(setup_data: py.path.local): pytest.skip("skip because ffmpeg not installed") video_dir = setup_data.join("videos") - gpx_file = video_dir.join("test.gpx") + gpx_file = setup_data.join("gpx").join("sf_30km_h.gpx") + gpx_start_time = "2025_03_14_07_00_00_000" + gpx_end_time = "2025_03_14_07_01_33_624" + video_start_time = "2025_03_14_07_00_00_000" desc_path = video_dir.join("my_samples").join("mapillary_image_description.json") - with gpx_file.open("w") as fp: - fp.write(GPX_CONTENT) x = subprocess.run( - f"{EXECUTABLE} --verbose video_process --video_sample_interval=2 --video_sample_distance=-1 {PROCESS_FLAGS} --skip_process_errors --video_start_time 2018_06_08_20_23_34_123 --geotag_source gpx --geotag_source_path {gpx_file} {video_dir} {video_dir.join('my_samples')}", + f"""{EXECUTABLE} --verbose video_process \ + {PROCESS_FLAGS} \ + --video_sample_interval=2 \ + --video_sample_distance=-1 \ + --skip_process_errors \ + --video_start_time {video_start_time} \ + --geotag_source gpx \ + --geotag_source_path {gpx_file} {video_dir} {video_dir.join("my_samples")} +""", shell=True, ) assert x.returncode == 0, x.stderr with open(desc_path) as fp: descs = json.load(fp) - assert 1 == len(find_desc_errors(descs)) - assert 2 == len(filter_out_errors(descs)) + assert 0 == len(find_desc_errors(descs)) + assert 3 == len(filter_out_errors(descs)) def test_video_process_sample_with_multiple_distances(setup_data: py.path.local): @@ -767,54 +784,33 @@ def test_video_process_sample_with_distance(setup_data: py.path.local): ) -@pytest.mark.usefixtures("setup_config") -def test_video_process_and_upload( - setup_upload: py.path.local, setup_data: py.path.local -): - if not IS_FFMPEG_INSTALLED: - pytest.skip("skip because ffmpeg not installed") - - video_dir = setup_data.join("videos") - gpx_file = video_dir.join("test.gpx") - with gpx_file.open("w") as fp: - fp.write(GPX_CONTENT) - x = subprocess.run( - f"{EXECUTABLE} video_process_and_upload {PROCESS_FLAGS} --video_sample_interval=2 --video_sample_distance=-1 --video_start_time 2018_06_08_20_23_34_123 --geotag_source gpx --geotag_source_path {gpx_file} --dry_run --user_name={USERNAME} {video_dir} {video_dir.join('my_samples')}", - shell=True, - ) - assert x.returncode != 0, x.stderr - assert 0 == len(setup_upload.listdir()) - - x = subprocess.run( - f"{EXECUTABLE} video_process_and_upload {PROCESS_FLAGS} --video_sample_interval=2 --video_sample_distance=-1 --video_start_time 2018_06_08_20_23_34_123 --geotag_source gpx --geotag_source_path {gpx_file} --skip_process_errors --dry_run --user_name={USERNAME} {video_dir} {video_dir.join('my_samples')}", - shell=True, - ) - assert x.returncode == 0, x.stderr - assert 2 == len(setup_upload.listdir()) - for z in setup_upload.listdir(): - validate_and_extract_zip(str(z)) - - def test_video_process_multiple_videos(setup_data: py.path.local): if not IS_FFMPEG_INSTALLED: pytest.skip("skip because ffmpeg not installed") - gpx_file = setup_data.join("test.gpx") desc_path = setup_data.join("my_samples").join("mapillary_image_description.json") sub_folder = setup_data.join("video_sub_folder").mkdir() video_path = setup_data.join("videos").join("sample-5s.mp4") video_path.copy(sub_folder) - with gpx_file.open("w") as fp: - fp.write(GPX_CONTENT) + gpx_file = setup_data.join("gpx").join("sf_30km_h.gpx") + gpx_start_time = "2025_03_14_07_00_00_000" + gpx_end_time = "2025_03_14_07_01_33_624" x = subprocess.run( - f"{EXECUTABLE} video_process {PROCESS_FLAGS} --video_sample_interval=2 --video_sample_distance=-1 --video_start_time 2018_06_08_20_23_34_123 --geotag_source gpx --geotag_source_path {gpx_file} {video_path} {setup_data.join('my_samples')}", + f"""{EXECUTABLE} video_process {PROCESS_FLAGS} \ + --video_sample_interval=2 \ + --video_sample_distance=-1 \ + --video_start_time={gpx_start_time} \ + --geotag_source=gpx \ + --geotag_source_path={gpx_file} \ + {video_path} {setup_data.join("my_samples")} +""", shell=True, ) - assert x.returncode != 0, x.stderr + assert x.returncode == 0, x.stderr with open(desc_path) as fp: descs = json.load(fp) for d in descs: assert Path(d["filename"]).is_file(), d["filename"] assert "sample-5s.mp4" in d["filename"] - assert 1 == len(find_desc_errors(descs)) - assert 2 == len(filter_out_errors(descs)) + assert 0 == len(find_desc_errors(descs)) + assert 3 == len(filter_out_errors(descs)) diff --git a/tests/integration/test_process_and_upload.py b/tests/integration/test_process_and_upload.py index ccf7d6633..edbea8685 100644 --- a/tests/integration/test_process_and_upload.py +++ b/tests/integration/test_process_and_upload.py @@ -129,7 +129,7 @@ } -def _validate_output(upload_dir: py.path.local, expected): +def _validate_uploads(upload_dir: py.path.local, expected): descs = [] for file in upload_dir.listdir(): if str(file).endswith(".mp4"): @@ -138,13 +138,13 @@ def _validate_output(upload_dir: py.path.local, expected): descs.extend(validate_and_extract_zip(str(file))) else: raise Exception(f"invalid file {file}") + + excludes = ["filename", "filesize", "md5sum", "MAPMetaTags", "MAPSequenceUUID"] + actual = {} for desc in descs: actual[os.path.basename(desc["filename"])] = { - k: v - for k, v in desc.items() - if k - not in ["filename", "filesize", "md5sum", "MAPMetaTags", "MAPSequenceUUID"] + k: v for k, v in desc.items() if k not in excludes } assert expected == actual @@ -166,11 +166,11 @@ def test_process_and_upload(setup_data: py.path.local, setup_upload: py.path.loc ) assert x.returncode == 0, x.stderr if IS_FFMPEG_INSTALLED: - _validate_output( + _validate_uploads( setup_upload, {**EXPECTED_DESCS["gopro"], **EXPECTED_DESCS["image"]} ) else: - _validate_output(setup_upload, {**EXPECTED_DESCS["image"]}) + _validate_uploads(setup_upload, {**EXPECTED_DESCS["image"]}) @pytest.mark.usefixtures("setup_config") @@ -179,8 +179,111 @@ def test_process_and_upload_images_only( setup_upload: py.path.local, ): x = subprocess.run( - f"{EXECUTABLE} --verbose process_and_upload --filetypes=image {UPLOAD_FLAGS} {PROCESS_FLAGS} {setup_data}/images {setup_data}/images {setup_data}/images/DSC00001.JPG --desc_path=-", + f"""{EXECUTABLE} --verbose process_and_upload \ + {UPLOAD_FLAGS} {PROCESS_FLAGS} \ + --filetypes=image \ + --desc_path=- \ + {setup_data}/images {setup_data}/images {setup_data}/images/DSC00001.JPG +""", + shell=True, + ) + assert x.returncode == 0, x.stderr + _validate_uploads(setup_upload, EXPECTED_DESCS["image"]) + + +@pytest.mark.usefixtures("setup_config") +def test_video_process_and_upload( + setup_upload: py.path.local, setup_data: py.path.local +): + if not IS_FFMPEG_INSTALLED: + pytest.skip("skip because ffmpeg not installed") + + video_dir = setup_data.join("videos") + gpx_start_time = "2025_03_14_07_00_00_000" + gpx_end_time = "2025_03_14_07_01_33_624" + gpx_file = setup_data.join("gpx").join("sf_30km_h.gpx") + x = subprocess.run( + f"""{EXECUTABLE} video_process_and_upload \ + {PROCESS_FLAGS} {UPLOAD_FLAGS} \ + --video_sample_interval=2 \ + --video_sample_distance=-1 \ + --video_start_time {gpx_start_time} \ + --geotag_source gpx \ + --geotag_source_path {gpx_file} \ + --desc_path - \ + {video_dir} {video_dir.join("my_samples")} +""", + shell=True, + ) + assert x.returncode == 0, x.stderr + assert 1 == len(setup_upload.listdir()) + expected = { + "sample-5s_NA_000001.jpg": { + "MAPAltitude": 94.75, + "MAPCaptureTime": "2025_03_14_07_00_00_000", + "MAPCompassHeading": { + "MagneticHeading": 0.484, + "TrueHeading": 0.484, + }, + "MAPLatitude": 37.793585, + "MAPLongitude": -122.461396, + "MAPOrientation": 1, + "filetype": "image", + }, + "sample-5s_NA_000002.jpg": { + "MAPAltitude": 93.347, + "MAPCaptureTime": "2025_03_14_07_00_02_000", + "MAPCompassHeading": { + "MagneticHeading": 0.484, + "TrueHeading": 0.484, + }, + "MAPLatitude": 37.7937349, + "MAPLongitude": -122.4613944, + "MAPOrientation": 1, + "filetype": "image", + }, + "sample-5s_NA_000003.jpg": { + "MAPAltitude": 92.492, + "MAPCaptureTime": "2025_03_14_07_00_04_000", + "MAPCompassHeading": { + "MagneticHeading": 343.286, + "TrueHeading": 343.286, + }, + "MAPLatitude": 37.7938825, + "MAPLongitude": -122.4614226, + "MAPOrientation": 1, + "filetype": "image", + }, + } + _validate_uploads(setup_upload, expected) + + +@pytest.mark.usefixtures("setup_config") +def xtest_video_process_and_upload_after_gpx( + setup_upload: py.path.local, setup_data: py.path.local +): + if not IS_FFMPEG_INSTALLED: + pytest.skip("skip because ffmpeg not installed") + + video_dir = setup_data.join("videos") + gpx_start_time = "2025_03_14_07_00_00_000" + gpx_end_time = "2025_03_14_07_01_33_624" + video_start_time = "2025_03_14_07_01_34_624" + gpx_file = setup_data.join("gpx").join("sf_30km_h.gpx") + x = subprocess.run( + f"""{EXECUTABLE} video_process_and_upload \ + {PROCESS_FLAGS} {UPLOAD_FLAGS} \ + --video_sample_interval=2 \ + --video_sample_distance=-1 \ + --video_start_time {video_start_time} \ + --geotag_source gpx \ + --geotag_source_path {gpx_file} \ + --skip_process_errors \ + --desc_path - \ + {video_dir} {video_dir.join("my_samples")} +""", shell=True, ) assert x.returncode == 0, x.stderr - _validate_output(setup_upload, EXPECTED_DESCS["image"]) + assert 0 == len(setup_upload.listdir()) + _validate_uploads(setup_upload, {}) diff --git a/tests/unit/test_sequence_processing.py b/tests/unit/test_sequence_processing.py index 401857b98..f1370ff5d 100644 --- a/tests/unit/test_sequence_processing.py +++ b/tests/unit/test_sequence_processing.py @@ -3,7 +3,6 @@ from pathlib import Path import py.path - import pytest from mapillary_tools import ( @@ -22,8 +21,7 @@ def _make_image_metadata( lat: float, time: float, angle: T.Optional[float] = None, - width: int = 0, - height: int = 0, + **kwargs, ) -> types.ImageMetadata: filename = filename.resolve() if not filename.exists(): @@ -37,9 +35,8 @@ def _make_image_metadata( lat=lat, time=time, alt=None, + **kwargs, angle=angle, - width=width, - height=height, ) @@ -117,6 +114,66 @@ def test_find_sequences_by_folder(tmpdir: py.path.local): ] == [d.filename for d in actual_sequences[2]] +def test_find_sequences_by_camera(tmpdir: py.path.local): + curdir = tmpdir.mkdir("hello1").mkdir("world2") + sequence: T.List[types.MetadataOrError] = [ + # s1 + _make_image_metadata( + Path(curdir) / Path("hello.jpg"), + 1.00002, + 1.00002, + 2, + 11, + MAPDeviceMake="foo", + MAPDeviceModel="bar", + width=1, + height=1, + ), + _make_image_metadata( + Path(curdir) / Path("foo.jpg"), + 1.00001, + 1.00001, + 3, + 22, + MAPDeviceMake="foo", + MAPDeviceModel="bar", + width=1, + height=1, + ), + # s2 + _make_image_metadata( + Path(curdir) / Path("a.jpg"), + 1.00002, + 1.00002, + 1, + 33, + MAPDeviceMake="foo", + MAPDeviceModel="bar2", + width=1, + height=1, + ), + # s3 + _make_image_metadata( + Path(curdir) / Path("b.jpg"), + 1.00001, + 1.00001, + 1, + 33, + MAPDeviceMake="foo", + MAPDeviceModel="bar2", + width=1, + height=2, + ), + ] + metadatas = psp.process_sequence_properties( + sequence, + ) + uuids = set( + d.MAPSequenceUUID for d in metadatas if isinstance(d, types.ImageMetadata) + ) + assert len(uuids) == 3 + + def test_sequences_sorted(tmpdir: py.path.local): curdir = tmpdir.mkdir("hello1").mkdir("world2") sequence: T.List[types.ImageMetadata] = [ @@ -249,11 +306,21 @@ def test_interpolation(tmpdir: py.path.local): curdir = tmpdir.mkdir("hello222").mkdir("world333") sequence: T.List[types.Metadata] = [ # s1 - _make_image_metadata(Path(curdir) / Path("./a.jpg"), 1, 1, 3, angle=344), - _make_image_metadata(Path(curdir) / Path("./b.jpg"), 0, 1, 4, angle=22), - _make_image_metadata(Path(curdir) / Path("./c.jpg"), 0, 0, 5, angle=-123), - _make_image_metadata(Path(curdir) / Path("./d.jpg"), 0, 0, 1, angle=2), - _make_image_metadata(Path(curdir) / Path("./e.jpg"), 1, 0, 2, angle=123), + _make_image_metadata( + Path(curdir) / Path("./a.jpg"), 0.00002, 0.00001, 3, angle=344 + ), + _make_image_metadata( + Path(curdir) / Path("./b.jpg"), 0.00001, 0.00001, 4, angle=22 + ), + _make_image_metadata( + Path(curdir) / Path("./c.jpg"), 0.00001, 0.00000, 5, angle=-123 + ), + _make_image_metadata( + Path(curdir) / Path("./d.jpg"), 0.00001, 0.00000, 1, angle=2 + ), + _make_image_metadata( + Path(curdir) / Path("./e.jpg"), 0.00002, 0.00000, 2, angle=123 + ), types.VideoMetadata( Path("test_video.mp4"), None, @@ -261,6 +328,7 @@ def test_interpolation(tmpdir: py.path.local): points=[], make="hello", model="world", + filesize=123, ), ] metadatas = psp.process_sequence_properties( @@ -286,11 +354,11 @@ def test_subsec_interpolation(tmpdir: py.path.local): curdir = tmpdir.mkdir("hello222").mkdir("world333") sequence: T.List[types.Metadata] = [ # s1 - _make_image_metadata(Path(curdir) / Path("./a.jpg"), 1, 1, 0.0, 1), - _make_image_metadata(Path(curdir) / Path("./b.jpg"), 0, 1, 1.0, 11), - _make_image_metadata(Path(curdir) / Path("./c.jpg"), 0, 0, 1.0, 22), - _make_image_metadata(Path(curdir) / Path("./d.jpg"), 0, 0, 1.0, 33), - _make_image_metadata(Path(curdir) / Path("./e.jpg"), 1, 0, 2.0, 44), + _make_image_metadata(Path(curdir) / Path("./a.jpg"), 0.00001, 0.00001, 0.0, 1), + _make_image_metadata(Path(curdir) / Path("./b.jpg"), 0.00000, 0.00001, 1.0, 11), + _make_image_metadata(Path(curdir) / Path("./c.jpg"), 0.00001, 0.00001, 1.0, 22), + _make_image_metadata(Path(curdir) / Path("./d.jpg"), 0.00001, 0.00001, 1.0, 33), + _make_image_metadata(Path(curdir) / Path("./e.jpg"), 0.00001, 0.00000, 2.0, 44), ] metadatas = psp.process_sequence_properties( sequence, @@ -314,7 +382,7 @@ def test_interpolation_single(tmpdir: py.path.local): curdir = tmpdir.mkdir("hello77").mkdir("world88") sequence = [ # s1 - _make_image_metadata(Path(curdir) / Path("./a.jpg"), 0, 0, 1, angle=123), + _make_image_metadata(Path(curdir) / Path("./a.jpg"), 0.2, 0.3, 1, angle=123), ] metadatas = psp.process_sequence_properties( sequence, @@ -433,16 +501,22 @@ def test_cut_by_pixels(tmpdir: py.path.local): ), _make_image_metadata( Path(curdir) / Path("./b.jpg"), - 9, - 9, - 2, + 2.00001, + 2.00001, + 20, angle=344, width=2, height=2, ), # s1 _make_image_metadata( - Path(curdir) / Path("./c.jpg"), 1, 1, 3, angle=344, width=int(6e9), height=2 + Path(curdir) / Path("./c.jpg"), + 2.00002, + 2.00002, + 30, + angle=344, + width=int(6e9), + height=2, ), ] metadatas = psp.process_sequence_properties( @@ -450,7 +524,7 @@ def test_cut_by_pixels(tmpdir: py.path.local): cutoff_distance=1000000000, cutoff_time=100, interpolate_directions=True, - duplicate_distance=100, + duplicate_distance=1, duplicate_angle=5, ) assert ( @@ -463,3 +537,61 @@ def test_cut_by_pixels(tmpdir: py.path.local): ) == 2 ) + + +def test_video_error(tmpdir: py.path.local): + curdir = tmpdir.mkdir("hello222").mkdir("videos") + sequence: T.List[types.Metadata] = [ + types.VideoMetadata( + Path(curdir) / Path("test_video_null_island.mp4"), + None, + types.FileType.VIDEO, + points=[ + geo.Point(1, -0.00001, -0.00001, 1, angle=None), + geo.Point(1, 0, 0, 1, angle=None), + geo.Point(1, 0.00001, 0.00001, 1, angle=None), + ], + make="hello", + model="world", + filesize=123, + ), + types.VideoMetadata( + Path(curdir) / Path("test_video_too_fast.mp4"), + None, + types.FileType.VIDEO, + points=[ + geo.Point(1, 1, 1, 1, angle=None), + geo.Point(1.1, 1.00001, 1.00001, 1, angle=None), + geo.Point(10, 1, 3, 1, angle=None), + ], + make="hello", + model="world", + filesize=123, + ), + types.VideoMetadata( + Path(curdir) / Path("test_video_file_too_large.mp4"), + None, + types.FileType.VIDEO, + points=[geo.Point(1, 1, 1, 1, angle=None)], + make="hello", + model="world", + filesize=1024 * 1024 * 1024 * 200, + ), + types.VideoMetadata( + Path(curdir) / Path("test_good.mp4"), + None, + types.FileType.VIDEO, + points=[geo.Point(1, 1, 1, 1, angle=None)], + make="hello", + model="world", + filesize=123, + ), + ] + metadatas = psp.process_sequence_properties( + sequence, + cutoff_distance=1000000000, + cutoff_time=100, + interpolate_directions=True, + duplicate_distance=100, + duplicate_angle=5, + )