diff --git a/mapillary_tools/geotag/geotag_images_from_exif.py b/mapillary_tools/geotag/geotag_images_from_exif.py index 474a303fe..4e46f2198 100644 --- a/mapillary_tools/geotag/geotag_images_from_exif.py +++ b/mapillary_tools/geotag/geotag_images_from_exif.py @@ -6,7 +6,7 @@ from tqdm import tqdm -from .. import exceptions, exif_write, geo, types +from .. import exceptions, exif_write, geo, types, utils from ..exif_read import ExifRead, ExifReadABC from .geotag_from_generic import GeotagImagesFromGeneric @@ -64,6 +64,7 @@ def build_image_metadata( image_metadata = types.ImageMetadata( filename=image_path, md5sum=None, + filesize=utils.get_file_size(image_path), time=geo.as_unix_time(capture_time), lat=lat, lon=lon, diff --git a/mapillary_tools/geotag/geotag_videos_from_exiftool_video.py b/mapillary_tools/geotag/geotag_videos_from_exiftool_video.py index 3ed8465ed..8f67aef89 100644 --- a/mapillary_tools/geotag/geotag_videos_from_exiftool_video.py +++ b/mapillary_tools/geotag/geotag_videos_from_exiftool_video.py @@ -4,6 +4,8 @@ from multiprocessing import Pool from pathlib import Path +from mapillary_tools import utils + from tqdm import tqdm from .. import exceptions, exiftool_read, geo, types @@ -66,6 +68,7 @@ def geotag_video(element: ET.Element) -> types.VideoMetadataOrError: video_metadata = types.VideoMetadata( video_path, md5sum=None, + filesize=utils.get_file_size(video_path), filetype=types.FileType.VIDEO, points=points, make=exif.extract_make(), diff --git a/mapillary_tools/geotag/geotag_videos_from_video.py b/mapillary_tools/geotag/geotag_videos_from_video.py index b5ac06316..7374ed965 100644 --- a/mapillary_tools/geotag/geotag_videos_from_video.py +++ b/mapillary_tools/geotag/geotag_videos_from_video.py @@ -4,6 +4,8 @@ from multiprocessing import Pool from pathlib import Path +from mapillary_tools import utils + from tqdm import tqdm from .. import exceptions, geo, types @@ -82,6 +84,7 @@ def _extract_video_metadata( return types.VideoMetadata( filename=video_path, md5sum=None, + filesize=utils.get_file_size(video_path), filetype=types.FileType.CAMM, points=points, make=make, @@ -105,6 +108,7 @@ def _extract_video_metadata( return types.VideoMetadata( filename=video_path, md5sum=None, + filesize=utils.get_file_size(video_path), filetype=types.FileType.GOPRO, points=T.cast(T.List[geo.Point], points_with_fix), make=make, @@ -128,6 +132,7 @@ def _extract_video_metadata( return types.VideoMetadata( filename=video_path, md5sum=None, + filesize=utils.get_file_size(video_path), filetype=types.FileType.BLACKVUE, points=points, make=make, diff --git a/mapillary_tools/process_geotag_properties.py b/mapillary_tools/process_geotag_properties.py index 59a4cf345..054a8cbd3 100644 --- a/mapillary_tools/process_geotag_properties.py +++ b/mapillary_tools/process_geotag_properties.py @@ -413,19 +413,22 @@ def _show_stats_per_filetype( skipped_process_errors: T.Set[T.Type[Exception]], ): good_metadatas: T.List[T.Union[types.VideoMetadata, types.ImageMetadata]] = [] + filesize_to_upload = 0 error_metadatas: T.List[types.ErrorMetadata] = [] for metadata in metadatas: if isinstance(metadata, types.ErrorMetadata): error_metadatas.append(metadata) else: good_metadatas.append(metadata) + filesize_to_upload += metadata.filesize or 0 LOG.info("%8d %s(s) read in total", len(metadatas), filetype.value) if good_metadatas: LOG.info( - "\t %8d %s(s) are ready to be uploaded", + "\t %8d %s(s) (%s MB) are ready to be uploaded", len(good_metadatas), filetype.value, + round(filesize_to_upload / 1024 / 1024, 1), ) error_counter = collections.Counter( diff --git a/mapillary_tools/types.py b/mapillary_tools/types.py index a7616a216..738ee556a 100644 --- a/mapillary_tools/types.py +++ b/mapillary_tools/types.py @@ -57,6 +57,7 @@ class ImageMetadata(geo.Point): MAPMetaTags: T.Optional[T.Dict] = None # deprecated since v0.10.0; keep here for compatibility MAPFilename: T.Optional[str] = None + filesize: T.Optional[int] = None def update_md5sum(self, image_data: T.Optional[T.BinaryIO] = None) -> None: if self.md5sum is None: @@ -82,6 +83,7 @@ class VideoMetadata: points: T.Sequence[geo.Point] make: T.Optional[str] = None model: T.Optional[str] = None + filesize: T.Optional[int] = None def update_md5sum(self) -> None: if self.md5sum is None: @@ -144,6 +146,7 @@ class ImageDescription(_SequenceOnly, _Image, MetaProperties, total=True): # if None or absent, it will be calculated md5sum: T.Optional[str] filetype: Literal["image"] + filesize: T.Optional[int] class _VideoDescriptionRequired(TypedDict, total=True): @@ -157,6 +160,7 @@ class _VideoDescriptionRequired(TypedDict, total=True): class VideoDescription(_VideoDescriptionRequired, total=False): MAPDeviceMake: str MAPDeviceModel: str + filesize: T.Optional[int] class _ErrorDescription(TypedDict, total=False): @@ -369,6 +373,10 @@ def merge_schema(*schemas: T.Dict) -> T.Dict: "type": ["string", "null"], "description": "MD5 checksum of the image content. If not provided, the uploader will compute it", }, + "filesize": { + "type": ["number", "null"], + "description": "File size", + }, "filetype": { "type": "string", "enum": [FileType.IMAGE.value], @@ -395,6 +403,10 @@ def merge_schema(*schemas: T.Dict) -> T.Dict: "type": ["string", "null"], "description": "MD5 checksum of the video content. If not provided, the uploader will compute it", }, + "filesize": { + "type": ["number", "null"], + "description": "File size", + }, "filetype": { "type": "string", "enum": [ @@ -485,6 +497,7 @@ def _as_video_desc(metadata: VideoMetadata) -> VideoDescription: "filename": str(metadata.filename.resolve()), "md5sum": metadata.md5sum, "filetype": metadata.filetype.value, + "filesize": metadata.filesize, "MAPGPSTrack": [_encode_point(p) for p in metadata.points], } if metadata.make: @@ -498,6 +511,7 @@ def _as_image_desc(metadata: ImageMetadata) -> ImageDescription: desc: ImageDescription = { "filename": str(metadata.filename.resolve()), "md5sum": metadata.md5sum, + "filesize": metadata.filesize, "filetype": FileType.IMAGE.value, "MAPLatitude": round(metadata.lat, _COORDINATES_PRECISION), "MAPLongitude": round(metadata.lon, _COORDINATES_PRECISION), @@ -543,6 +557,7 @@ def _from_image_desc(desc) -> ImageMetadata: if k not in [ "filename", "md5sum", + "filesize", "filetype", "MAPLatitude", "MAPLongitude", @@ -555,6 +570,7 @@ def _from_image_desc(desc) -> ImageMetadata: return ImageMetadata( filename=Path(desc["filename"]), md5sum=desc.get("md5sum"), + filesize=desc.get("filesize"), lat=desc["MAPLatitude"], lon=desc["MAPLongitude"], alt=desc.get("MAPAltitude"), @@ -586,6 +602,7 @@ def _from_video_desc(desc: VideoDescription) -> VideoMetadata: return VideoMetadata( filename=Path(desc["filename"]), md5sum=desc["md5sum"], + filesize=desc["filesize"], filetype=FileType(desc["filetype"]), points=[_decode_point(entry) for entry in desc["MAPGPSTrack"]], make=desc.get("MAPDeviceMake"), diff --git a/mapillary_tools/utils.py b/mapillary_tools/utils.py index 58b53bfc4..5d166563c 100644 --- a/mapillary_tools/utils.py +++ b/mapillary_tools/utils.py @@ -190,3 +190,7 @@ def find_xml_files(import_paths: T.Sequence[Path]) -> T.List[Path]: if path.suffix.lower() in [".xml"]: xml_paths.append(path) return list(deduplicate_paths(xml_paths)) + + +def get_file_size(path: Path) -> int: + return os.path.getsize(path) diff --git a/mapillary_tools/video_data_extraction/extract_video_data.py b/mapillary_tools/video_data_extraction/extract_video_data.py index 311286505..442a4bfcd 100644 --- a/mapillary_tools/video_data_extraction/extract_video_data.py +++ b/mapillary_tools/video_data_extraction/extract_video_data.py @@ -92,6 +92,7 @@ def process_file(self, file: Path) -> VideoMetadataOrError: filename=file, filetype=FileType.VIDEO, md5sum=None, + filesize=utils.get_file_size(file), points=points, make=make, model=model, diff --git a/schema/image_description_schema.json b/schema/image_description_schema.json index d26d54002..2415e3ffa 100644 --- a/schema/image_description_schema.json +++ b/schema/image_description_schema.json @@ -57,6 +57,13 @@ ], "description": "MD5 checksum of the video content. If not provided, the uploader will compute it" }, + "filesize": { + "type": [ + "number", + "null" + ], + "description": "File size" + }, "filetype": { "type": "string", "enum": [ @@ -154,6 +161,13 @@ ], "description": "MD5 checksum of the image content. If not provided, the uploader will compute it" }, + "filesize": { + "type": [ + "number", + "null" + ], + "description": "File size" + }, "filetype": { "type": "string", "enum": [ diff --git a/tests/integration/fixtures.py b/tests/integration/fixtures.py index 52b8d9c36..5d19b10dd 100644 --- a/tests/integration/fixtures.py +++ b/tests/integration/fixtures.py @@ -156,13 +156,14 @@ def run_exiftool_and_generate_geotag_args( def validate_and_extract_image(image_path: str): with open(image_path, "rb") as fp: tags = exifread.process_file(fp) - desc_tag = tags.get("Image ImageDescription") - assert desc_tag is not None, (tags, image_path) - desc = json.loads(str(desc_tag.values)) - desc["filename"] = image_path - desc["filetype"] = "image" - jsonschema.validate(desc, image_description_schema) - return desc + + desc_tag = tags.get("Image ImageDescription") + assert desc_tag is not None, (tags, image_path) + desc = json.loads(str(desc_tag.values)) + desc["filename"] = image_path + desc["filetype"] = "image" + jsonschema.validate(desc, image_description_schema) + return desc def validate_and_extract_zip(zip_path: str) -> T.List[T.Dict]: diff --git a/tests/integration/test_process_and_upload.py b/tests/integration/test_process_and_upload.py index 5d74b3cf7..ccf7d6633 100644 --- a/tests/integration/test_process_and_upload.py +++ b/tests/integration/test_process_and_upload.py @@ -143,7 +143,8 @@ def _validate_output(upload_dir: py.path.local, expected): actual[os.path.basename(desc["filename"])] = { k: v for k, v in desc.items() - if k not in ["filename", "md5sum", "MAPMetaTags", "MAPSequenceUUID"] + if k + not in ["filename", "filesize", "md5sum", "MAPMetaTags", "MAPSequenceUUID"] } assert expected == actual diff --git a/tests/unit/test_sequence_processing.py b/tests/unit/test_sequence_processing.py index 053ee2fbe..79e35db70 100644 --- a/tests/unit/test_sequence_processing.py +++ b/tests/unit/test_sequence_processing.py @@ -374,6 +374,7 @@ def test_process_finalize(setup_data): { "filename": str(test_exif), "filetype": "image", + "filesize": None, "MAPLatitude": 1, "MAPLongitude": 1, "MAPCaptureTime": "1970_01_01_00_00_02_000",