Skip to content

Commit de080e3

Browse files
authored
Merge pull request #703 from mapillary/feat-file-sizes
Add file sizes
2 parents f790b1a + c15a016 commit de080e3

File tree

11 files changed

+61
-10
lines changed

11 files changed

+61
-10
lines changed

mapillary_tools/geotag/geotag_images_from_exif.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
from tqdm import tqdm
88

9-
from .. import exceptions, exif_write, geo, types
9+
from .. import exceptions, exif_write, geo, types, utils
1010
from ..exif_read import ExifRead, ExifReadABC
1111
from .geotag_from_generic import GeotagImagesFromGeneric
1212

@@ -64,6 +64,7 @@ def build_image_metadata(
6464
image_metadata = types.ImageMetadata(
6565
filename=image_path,
6666
md5sum=None,
67+
filesize=utils.get_file_size(image_path),
6768
time=geo.as_unix_time(capture_time),
6869
lat=lat,
6970
lon=lon,

mapillary_tools/geotag/geotag_videos_from_exiftool_video.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
from multiprocessing import Pool
55
from pathlib import Path
66

7+
from mapillary_tools import utils
8+
79
from tqdm import tqdm
810

911
from .. import exceptions, exiftool_read, geo, types
@@ -66,6 +68,7 @@ def geotag_video(element: ET.Element) -> types.VideoMetadataOrError:
6668
video_metadata = types.VideoMetadata(
6769
video_path,
6870
md5sum=None,
71+
filesize=utils.get_file_size(video_path),
6972
filetype=types.FileType.VIDEO,
7073
points=points,
7174
make=exif.extract_make(),

mapillary_tools/geotag/geotag_videos_from_video.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
from multiprocessing import Pool
55
from pathlib import Path
66

7+
from mapillary_tools import utils
8+
79
from tqdm import tqdm
810

911
from .. import exceptions, geo, types
@@ -82,6 +84,7 @@ def _extract_video_metadata(
8284
return types.VideoMetadata(
8385
filename=video_path,
8486
md5sum=None,
87+
filesize=utils.get_file_size(video_path),
8588
filetype=types.FileType.CAMM,
8689
points=points,
8790
make=make,
@@ -105,6 +108,7 @@ def _extract_video_metadata(
105108
return types.VideoMetadata(
106109
filename=video_path,
107110
md5sum=None,
111+
filesize=utils.get_file_size(video_path),
108112
filetype=types.FileType.GOPRO,
109113
points=T.cast(T.List[geo.Point], points_with_fix),
110114
make=make,
@@ -128,6 +132,7 @@ def _extract_video_metadata(
128132
return types.VideoMetadata(
129133
filename=video_path,
130134
md5sum=None,
135+
filesize=utils.get_file_size(video_path),
131136
filetype=types.FileType.BLACKVUE,
132137
points=points,
133138
make=make,

mapillary_tools/process_geotag_properties.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -413,19 +413,22 @@ def _show_stats_per_filetype(
413413
skipped_process_errors: T.Set[T.Type[Exception]],
414414
):
415415
good_metadatas: T.List[T.Union[types.VideoMetadata, types.ImageMetadata]] = []
416+
filesize_to_upload = 0
416417
error_metadatas: T.List[types.ErrorMetadata] = []
417418
for metadata in metadatas:
418419
if isinstance(metadata, types.ErrorMetadata):
419420
error_metadatas.append(metadata)
420421
else:
421422
good_metadatas.append(metadata)
423+
filesize_to_upload += metadata.filesize or 0
422424

423425
LOG.info("%8d %s(s) read in total", len(metadatas), filetype.value)
424426
if good_metadatas:
425427
LOG.info(
426-
"\t %8d %s(s) are ready to be uploaded",
428+
"\t %8d %s(s) (%s MB) are ready to be uploaded",
427429
len(good_metadatas),
428430
filetype.value,
431+
round(filesize_to_upload / 1024 / 1024, 1),
429432
)
430433

431434
error_counter = collections.Counter(

mapillary_tools/types.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ class ImageMetadata(geo.Point):
5757
MAPMetaTags: T.Optional[T.Dict] = None
5858
# deprecated since v0.10.0; keep here for compatibility
5959
MAPFilename: T.Optional[str] = None
60+
filesize: T.Optional[int] = None
6061

6162
def update_md5sum(self, image_data: T.Optional[T.BinaryIO] = None) -> None:
6263
if self.md5sum is None:
@@ -82,6 +83,7 @@ class VideoMetadata:
8283
points: T.Sequence[geo.Point]
8384
make: T.Optional[str] = None
8485
model: T.Optional[str] = None
86+
filesize: T.Optional[int] = None
8587

8688
def update_md5sum(self) -> None:
8789
if self.md5sum is None:
@@ -144,6 +146,7 @@ class ImageDescription(_SequenceOnly, _Image, MetaProperties, total=True):
144146
# if None or absent, it will be calculated
145147
md5sum: T.Optional[str]
146148
filetype: Literal["image"]
149+
filesize: T.Optional[int]
147150

148151

149152
class _VideoDescriptionRequired(TypedDict, total=True):
@@ -157,6 +160,7 @@ class _VideoDescriptionRequired(TypedDict, total=True):
157160
class VideoDescription(_VideoDescriptionRequired, total=False):
158161
MAPDeviceMake: str
159162
MAPDeviceModel: str
163+
filesize: T.Optional[int]
160164

161165

162166
class _ErrorDescription(TypedDict, total=False):
@@ -369,6 +373,10 @@ def merge_schema(*schemas: T.Dict) -> T.Dict:
369373
"type": ["string", "null"],
370374
"description": "MD5 checksum of the image content. If not provided, the uploader will compute it",
371375
},
376+
"filesize": {
377+
"type": ["number", "null"],
378+
"description": "File size",
379+
},
372380
"filetype": {
373381
"type": "string",
374382
"enum": [FileType.IMAGE.value],
@@ -395,6 +403,10 @@ def merge_schema(*schemas: T.Dict) -> T.Dict:
395403
"type": ["string", "null"],
396404
"description": "MD5 checksum of the video content. If not provided, the uploader will compute it",
397405
},
406+
"filesize": {
407+
"type": ["number", "null"],
408+
"description": "File size",
409+
},
398410
"filetype": {
399411
"type": "string",
400412
"enum": [
@@ -485,6 +497,7 @@ def _as_video_desc(metadata: VideoMetadata) -> VideoDescription:
485497
"filename": str(metadata.filename.resolve()),
486498
"md5sum": metadata.md5sum,
487499
"filetype": metadata.filetype.value,
500+
"filesize": metadata.filesize,
488501
"MAPGPSTrack": [_encode_point(p) for p in metadata.points],
489502
}
490503
if metadata.make:
@@ -498,6 +511,7 @@ def _as_image_desc(metadata: ImageMetadata) -> ImageDescription:
498511
desc: ImageDescription = {
499512
"filename": str(metadata.filename.resolve()),
500513
"md5sum": metadata.md5sum,
514+
"filesize": metadata.filesize,
501515
"filetype": FileType.IMAGE.value,
502516
"MAPLatitude": round(metadata.lat, _COORDINATES_PRECISION),
503517
"MAPLongitude": round(metadata.lon, _COORDINATES_PRECISION),
@@ -543,6 +557,7 @@ def _from_image_desc(desc) -> ImageMetadata:
543557
if k not in [
544558
"filename",
545559
"md5sum",
560+
"filesize",
546561
"filetype",
547562
"MAPLatitude",
548563
"MAPLongitude",
@@ -555,6 +570,7 @@ def _from_image_desc(desc) -> ImageMetadata:
555570
return ImageMetadata(
556571
filename=Path(desc["filename"]),
557572
md5sum=desc.get("md5sum"),
573+
filesize=desc.get("filesize"),
558574
lat=desc["MAPLatitude"],
559575
lon=desc["MAPLongitude"],
560576
alt=desc.get("MAPAltitude"),
@@ -586,6 +602,7 @@ def _from_video_desc(desc: VideoDescription) -> VideoMetadata:
586602
return VideoMetadata(
587603
filename=Path(desc["filename"]),
588604
md5sum=desc["md5sum"],
605+
filesize=desc["filesize"],
589606
filetype=FileType(desc["filetype"]),
590607
points=[_decode_point(entry) for entry in desc["MAPGPSTrack"]],
591608
make=desc.get("MAPDeviceMake"),

mapillary_tools/utils.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,3 +190,7 @@ def find_xml_files(import_paths: T.Sequence[Path]) -> T.List[Path]:
190190
if path.suffix.lower() in [".xml"]:
191191
xml_paths.append(path)
192192
return list(deduplicate_paths(xml_paths))
193+
194+
195+
def get_file_size(path: Path) -> int:
196+
return os.path.getsize(path)

mapillary_tools/video_data_extraction/extract_video_data.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ def process_file(self, file: Path) -> VideoMetadataOrError:
9292
filename=file,
9393
filetype=FileType.VIDEO,
9494
md5sum=None,
95+
filesize=utils.get_file_size(file),
9596
points=points,
9697
make=make,
9798
model=model,

schema/image_description_schema.json

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,13 @@
5757
],
5858
"description": "MD5 checksum of the video content. If not provided, the uploader will compute it"
5959
},
60+
"filesize": {
61+
"type": [
62+
"number",
63+
"null"
64+
],
65+
"description": "File size"
66+
},
6067
"filetype": {
6168
"type": "string",
6269
"enum": [
@@ -154,6 +161,13 @@
154161
],
155162
"description": "MD5 checksum of the image content. If not provided, the uploader will compute it"
156163
},
164+
"filesize": {
165+
"type": [
166+
"number",
167+
"null"
168+
],
169+
"description": "File size"
170+
},
157171
"filetype": {
158172
"type": "string",
159173
"enum": [

tests/integration/fixtures.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -156,13 +156,14 @@ def run_exiftool_and_generate_geotag_args(
156156
def validate_and_extract_image(image_path: str):
157157
with open(image_path, "rb") as fp:
158158
tags = exifread.process_file(fp)
159-
desc_tag = tags.get("Image ImageDescription")
160-
assert desc_tag is not None, (tags, image_path)
161-
desc = json.loads(str(desc_tag.values))
162-
desc["filename"] = image_path
163-
desc["filetype"] = "image"
164-
jsonschema.validate(desc, image_description_schema)
165-
return desc
159+
160+
desc_tag = tags.get("Image ImageDescription")
161+
assert desc_tag is not None, (tags, image_path)
162+
desc = json.loads(str(desc_tag.values))
163+
desc["filename"] = image_path
164+
desc["filetype"] = "image"
165+
jsonschema.validate(desc, image_description_schema)
166+
return desc
166167

167168

168169
def validate_and_extract_zip(zip_path: str) -> T.List[T.Dict]:

tests/integration/test_process_and_upload.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,8 @@ def _validate_output(upload_dir: py.path.local, expected):
143143
actual[os.path.basename(desc["filename"])] = {
144144
k: v
145145
for k, v in desc.items()
146-
if k not in ["filename", "md5sum", "MAPMetaTags", "MAPSequenceUUID"]
146+
if k
147+
not in ["filename", "filesize", "md5sum", "MAPMetaTags", "MAPSequenceUUID"]
147148
}
148149

149150
assert expected == actual

0 commit comments

Comments
 (0)