diff --git a/mapillary_tools/geotag/geotag_images_from_exif.py b/mapillary_tools/geotag/geotag_images_from_exif.py
index 474a303fe..4e46f2198 100644
--- a/mapillary_tools/geotag/geotag_images_from_exif.py
+++ b/mapillary_tools/geotag/geotag_images_from_exif.py
@@ -6,7 +6,7 @@
 
 from tqdm import tqdm
 
-from .. import exceptions, exif_write, geo, types
+from .. import exceptions, exif_write, geo, types, utils
 from ..exif_read import ExifRead, ExifReadABC
 from .geotag_from_generic import GeotagImagesFromGeneric
 
@@ -64,6 +64,7 @@ def build_image_metadata(
         image_metadata = types.ImageMetadata(
             filename=image_path,
             md5sum=None,
+            filesize=utils.get_file_size(image_path),
             time=geo.as_unix_time(capture_time),
             lat=lat,
             lon=lon,
diff --git a/mapillary_tools/geotag/geotag_videos_from_exiftool_video.py b/mapillary_tools/geotag/geotag_videos_from_exiftool_video.py
index 3ed8465ed..8f67aef89 100644
--- a/mapillary_tools/geotag/geotag_videos_from_exiftool_video.py
+++ b/mapillary_tools/geotag/geotag_videos_from_exiftool_video.py
@@ -4,6 +4,8 @@
 from multiprocessing import Pool
 from pathlib import Path
 
+from mapillary_tools import utils
+
 from tqdm import tqdm
 
 from .. import exceptions, exiftool_read, geo, types
@@ -66,6 +68,7 @@ def geotag_video(element: ET.Element) -> types.VideoMetadataOrError:
             video_metadata = types.VideoMetadata(
                 video_path,
                 md5sum=None,
+                filesize=utils.get_file_size(video_path),
                 filetype=types.FileType.VIDEO,
                 points=points,
                 make=exif.extract_make(),
diff --git a/mapillary_tools/geotag/geotag_videos_from_video.py b/mapillary_tools/geotag/geotag_videos_from_video.py
index b5ac06316..7374ed965 100644
--- a/mapillary_tools/geotag/geotag_videos_from_video.py
+++ b/mapillary_tools/geotag/geotag_videos_from_video.py
@@ -4,6 +4,8 @@
 from multiprocessing import Pool
 from pathlib import Path
 
+from mapillary_tools import utils
+
 from tqdm import tqdm
 
 from .. import exceptions, geo, types
@@ -82,6 +84,7 @@ def _extract_video_metadata(
                     return types.VideoMetadata(
                         filename=video_path,
                         md5sum=None,
+                        filesize=utils.get_file_size(video_path),
                         filetype=types.FileType.CAMM,
                         points=points,
                         make=make,
@@ -105,6 +108,7 @@ def _extract_video_metadata(
                     return types.VideoMetadata(
                         filename=video_path,
                         md5sum=None,
+                        filesize=utils.get_file_size(video_path),
                         filetype=types.FileType.GOPRO,
                         points=T.cast(T.List[geo.Point], points_with_fix),
                         make=make,
@@ -128,6 +132,7 @@ def _extract_video_metadata(
                     return types.VideoMetadata(
                         filename=video_path,
                         md5sum=None,
+                        filesize=utils.get_file_size(video_path),
                         filetype=types.FileType.BLACKVUE,
                         points=points,
                         make=make,
diff --git a/mapillary_tools/process_geotag_properties.py b/mapillary_tools/process_geotag_properties.py
index 59a4cf345..054a8cbd3 100644
--- a/mapillary_tools/process_geotag_properties.py
+++ b/mapillary_tools/process_geotag_properties.py
@@ -413,19 +413,22 @@ def _show_stats_per_filetype(
     skipped_process_errors: T.Set[T.Type[Exception]],
 ):
     good_metadatas: T.List[T.Union[types.VideoMetadata, types.ImageMetadata]] = []
+    filesize_to_upload = 0
     error_metadatas: T.List[types.ErrorMetadata] = []
     for metadata in metadatas:
         if isinstance(metadata, types.ErrorMetadata):
             error_metadatas.append(metadata)
         else:
             good_metadatas.append(metadata)
+            filesize_to_upload += metadata.filesize or 0
 
     LOG.info("%8d %s(s) read in total", len(metadatas), filetype.value)
     if good_metadatas:
         LOG.info(
-            "\t %8d %s(s) are ready to be uploaded",
+            "\t %8d %s(s) (%s MB) are ready to be uploaded",
             len(good_metadatas),
             filetype.value,
+            round(filesize_to_upload / 1024 / 1024, 1),
         )
 
     error_counter = collections.Counter(
diff --git a/mapillary_tools/types.py b/mapillary_tools/types.py
index a7616a216..738ee556a 100644
--- a/mapillary_tools/types.py
+++ b/mapillary_tools/types.py
@@ -57,6 +57,7 @@ class ImageMetadata(geo.Point):
     MAPMetaTags: T.Optional[T.Dict] = None
     # deprecated since v0.10.0; keep here for compatibility
     MAPFilename: T.Optional[str] = None
+    filesize: T.Optional[int] = None
 
     def update_md5sum(self, image_data: T.Optional[T.BinaryIO] = None) -> None:
         if self.md5sum is None:
@@ -82,6 +83,7 @@ class VideoMetadata:
     points: T.Sequence[geo.Point]
     make: T.Optional[str] = None
     model: T.Optional[str] = None
+    filesize: T.Optional[int] = None
 
     def update_md5sum(self) -> None:
         if self.md5sum is None:
@@ -144,6 +146,7 @@ class ImageDescription(_SequenceOnly, _Image, MetaProperties, total=True):
     # if None or absent, it will be calculated
     md5sum: T.Optional[str]
     filetype: Literal["image"]
+    filesize: T.Optional[int]
 
 
 class _VideoDescriptionRequired(TypedDict, total=True):
@@ -157,6 +160,7 @@ class _VideoDescriptionRequired(TypedDict, total=True):
 class VideoDescription(_VideoDescriptionRequired, total=False):
     MAPDeviceMake: str
     MAPDeviceModel: str
+    filesize: T.Optional[int]
 
 
 class _ErrorDescription(TypedDict, total=False):
@@ -369,6 +373,10 @@ def merge_schema(*schemas: T.Dict) -> T.Dict:
                 "type": ["string", "null"],
                 "description": "MD5 checksum of the image content. If not provided, the uploader will compute it",
             },
+            "filesize": {
+                "type": ["number", "null"],
+                "description": "File size",
+            },
             "filetype": {
                 "type": "string",
                 "enum": [FileType.IMAGE.value],
@@ -395,6 +403,10 @@ def merge_schema(*schemas: T.Dict) -> T.Dict:
                 "type": ["string", "null"],
                 "description": "MD5 checksum of the video content. If not provided, the uploader will compute it",
             },
+            "filesize": {
+                "type": ["number", "null"],
+                "description": "File size",
+            },
             "filetype": {
                 "type": "string",
                 "enum": [
@@ -485,6 +497,7 @@ def _as_video_desc(metadata: VideoMetadata) -> VideoDescription:
         "filename": str(metadata.filename.resolve()),
         "md5sum": metadata.md5sum,
         "filetype": metadata.filetype.value,
+        "filesize": metadata.filesize,
         "MAPGPSTrack": [_encode_point(p) for p in metadata.points],
     }
     if metadata.make:
@@ -498,6 +511,7 @@ def _as_image_desc(metadata: ImageMetadata) -> ImageDescription:
     desc: ImageDescription = {
         "filename": str(metadata.filename.resolve()),
         "md5sum": metadata.md5sum,
+        "filesize": metadata.filesize,
         "filetype": FileType.IMAGE.value,
         "MAPLatitude": round(metadata.lat, _COORDINATES_PRECISION),
         "MAPLongitude": round(metadata.lon, _COORDINATES_PRECISION),
@@ -543,6 +557,7 @@ def _from_image_desc(desc) -> ImageMetadata:
         if k not in [
             "filename",
             "md5sum",
+            "filesize",
             "filetype",
             "MAPLatitude",
             "MAPLongitude",
@@ -555,6 +570,7 @@ def _from_image_desc(desc) -> ImageMetadata:
     return ImageMetadata(
         filename=Path(desc["filename"]),
         md5sum=desc.get("md5sum"),
+        filesize=desc.get("filesize"),
         lat=desc["MAPLatitude"],
         lon=desc["MAPLongitude"],
         alt=desc.get("MAPAltitude"),
@@ -586,6 +602,7 @@ def _from_video_desc(desc: VideoDescription) -> VideoMetadata:
     return VideoMetadata(
         filename=Path(desc["filename"]),
         md5sum=desc["md5sum"],
+        filesize=desc["filesize"],
         filetype=FileType(desc["filetype"]),
         points=[_decode_point(entry) for entry in desc["MAPGPSTrack"]],
         make=desc.get("MAPDeviceMake"),
diff --git a/mapillary_tools/utils.py b/mapillary_tools/utils.py
index 58b53bfc4..5d166563c 100644
--- a/mapillary_tools/utils.py
+++ b/mapillary_tools/utils.py
@@ -190,3 +190,7 @@ def find_xml_files(import_paths: T.Sequence[Path]) -> T.List[Path]:
             if path.suffix.lower() in [".xml"]:
                 xml_paths.append(path)
     return list(deduplicate_paths(xml_paths))
+
+
+def get_file_size(path: Path) -> int:
+    return os.path.getsize(path)
diff --git a/mapillary_tools/video_data_extraction/extract_video_data.py b/mapillary_tools/video_data_extraction/extract_video_data.py
index 311286505..442a4bfcd 100644
--- a/mapillary_tools/video_data_extraction/extract_video_data.py
+++ b/mapillary_tools/video_data_extraction/extract_video_data.py
@@ -92,6 +92,7 @@ def process_file(self, file: Path) -> VideoMetadataOrError:
                 filename=file,
                 filetype=FileType.VIDEO,
                 md5sum=None,
+                filesize=utils.get_file_size(file),
                 points=points,
                 make=make,
                 model=model,
diff --git a/schema/image_description_schema.json b/schema/image_description_schema.json
index d26d54002..2415e3ffa 100644
--- a/schema/image_description_schema.json
+++ b/schema/image_description_schema.json
@@ -57,6 +57,13 @@
                     ],
                     "description": "MD5 checksum of the video content. If not provided, the uploader will compute it"
                 },
+                "filesize": {
+                    "type": [
+                        "number",
+                        "null"
+                    ],
+                    "description": "File size"
+                },
                 "filetype": {
                     "type": "string",
                     "enum": [
@@ -154,6 +161,13 @@
                     ],
                     "description": "MD5 checksum of the image content. If not provided, the uploader will compute it"
                 },
+                "filesize": {
+                    "type": [
+                        "number",
+                        "null"
+                    ],
+                    "description": "File size"
+                },
                 "filetype": {
                     "type": "string",
                     "enum": [
diff --git a/tests/integration/fixtures.py b/tests/integration/fixtures.py
index 52b8d9c36..5d19b10dd 100644
--- a/tests/integration/fixtures.py
+++ b/tests/integration/fixtures.py
@@ -156,13 +156,14 @@ def run_exiftool_and_generate_geotag_args(
 def validate_and_extract_image(image_path: str):
     with open(image_path, "rb") as fp:
         tags = exifread.process_file(fp)
-        desc_tag = tags.get("Image ImageDescription")
-        assert desc_tag is not None, (tags, image_path)
-        desc = json.loads(str(desc_tag.values))
-        desc["filename"] = image_path
-        desc["filetype"] = "image"
-        jsonschema.validate(desc, image_description_schema)
-        return desc
+
+    desc_tag = tags.get("Image ImageDescription")
+    assert desc_tag is not None, (tags, image_path)
+    desc = json.loads(str(desc_tag.values))
+    desc["filename"] = image_path
+    desc["filetype"] = "image"
+    jsonschema.validate(desc, image_description_schema)
+    return desc
 
 
 def validate_and_extract_zip(zip_path: str) -> T.List[T.Dict]:
diff --git a/tests/integration/test_process_and_upload.py b/tests/integration/test_process_and_upload.py
index 5d74b3cf7..ccf7d6633 100644
--- a/tests/integration/test_process_and_upload.py
+++ b/tests/integration/test_process_and_upload.py
@@ -143,7 +143,8 @@ def _validate_output(upload_dir: py.path.local, expected):
         actual[os.path.basename(desc["filename"])] = {
             k: v
             for k, v in desc.items()
-            if k not in ["filename", "md5sum", "MAPMetaTags", "MAPSequenceUUID"]
+            if k
+            not in ["filename", "filesize", "md5sum", "MAPMetaTags", "MAPSequenceUUID"]
         }
 
     assert expected == actual
diff --git a/tests/unit/test_sequence_processing.py b/tests/unit/test_sequence_processing.py
index 053ee2fbe..79e35db70 100644
--- a/tests/unit/test_sequence_processing.py
+++ b/tests/unit/test_sequence_processing.py
@@ -374,6 +374,7 @@ def test_process_finalize(setup_data):
         {
             "filename": str(test_exif),
             "filetype": "image",
+            "filesize": None,
             "MAPLatitude": 1,
             "MAPLongitude": 1,
             "MAPCaptureTime": "1970_01_01_00_00_02_000",