diff --git a/mapillary_tools/api_v4.py b/mapillary_tools/api_v4.py index 317aef850..d2258a110 100644 --- a/mapillary_tools/api_v4.py +++ b/mapillary_tools/api_v4.py @@ -162,18 +162,20 @@ def request_post( url: str, data: T.Any | None = None, json: dict | None = None, + disable_debug=False, **kwargs, ) -> requests.Response: global USE_SYSTEM_CERTS - _log_debug_request( - "POST", - url, - json=json, - params=kwargs.get("params"), - headers=kwargs.get("headers"), - timeout=kwargs.get("timeout"), - ) + if not disable_debug: + _log_debug_request( + "POST", + url, + json=json, + params=kwargs.get("params"), + headers=kwargs.get("headers"), + timeout=kwargs.get("timeout"), + ) if USE_SYSTEM_CERTS: with requests.Session() as session: @@ -193,25 +195,25 @@ def request_post( ) return request_post(url, data=data, json=json, **kwargs) - _log_debug_response(resp) + if not disable_debug: + _log_debug_response(resp) return resp def request_get( - url: str, - params: dict | None = None, - **kwargs, + url: str, params: dict | None = None, disable_debug=False, **kwargs ) -> requests.Response: global USE_SYSTEM_CERTS - _log_debug_request( - "GET", - url, - params=kwargs.get("params"), - headers=kwargs.get("headers"), - timeout=kwargs.get("timeout"), - ) + if not disable_debug: + _log_debug_request( + "GET", + url, + params=kwargs.get("params"), + headers=kwargs.get("headers"), + timeout=kwargs.get("timeout"), + ) if USE_SYSTEM_CERTS: with requests.Session() as session: @@ -230,7 +232,8 @@ def request_get( ) resp = request_get(url, params=params, **kwargs) - _log_debug_response(resp) + if not disable_debug: + _log_debug_response(resp) return resp @@ -302,8 +305,7 @@ def fetch_organization( def fetch_user_or_me( - user_access_token: str, - user_id: int | str | None = None, + user_access_token: str, user_id: int | str | None = None ) -> requests.Response: if user_id is None: url = f"{MAPILLARY_GRAPH_API_ENDPOINT}/me" @@ -341,6 +343,7 @@ def log_event(action_type: ActionType, properties: dict) -> requests.Response: "Authorization": f"OAuth {MAPILLARY_CLIENT_TOKEN}", }, timeout=REQUESTS_TIMEOUT, + disable_debug=True, ) resp.raise_for_status() return resp diff --git a/mapillary_tools/authenticate.py b/mapillary_tools/authenticate.py index 8fb22ff87..35bafc3e7 100644 --- a/mapillary_tools/authenticate.py +++ b/mapillary_tools/authenticate.py @@ -87,8 +87,7 @@ def authenticate( def fetch_user_items( - user_name: str | None = None, - organization_key: str | None = None, + user_name: str | None = None, organization_key: str | None = None ) -> config.UserItem: """ Read user information from the config file, diff --git a/mapillary_tools/commands/upload.py b/mapillary_tools/commands/upload.py index 418ce0451..636753452 100644 --- a/mapillary_tools/commands/upload.py +++ b/mapillary_tools/commands/upload.py @@ -8,13 +8,13 @@ class Command: name = "upload" - help = "upload images to Mapillary" + help = "Upload processed data to Mapillary" @staticmethod def add_common_upload_options(group): group.add_argument( "--user_name", - help="The Mapillary user account to upload to. If you only have one account authorized, it will upload to that account by default.", + help="The Mapillary user account to upload to.", required=False, ) group.add_argument( @@ -23,9 +23,31 @@ def add_common_upload_options(group): default=None, required=False, ) + group.add_argument( + "--reupload", + help="Re-upload data that has already been uploaded.", + action="store_true", + default=False, + required=False, + ) group.add_argument( "--dry_run", - help='Instead of uploading to the Mapillary server, simulate uploading to the local directory "mapillary_public_uploads" for debugging purposes.', + "--dryrun", + help="[DEVELOPMENT] Simulate upload by sending data to a local directory instead of Mapillary servers. Uses a temporary directory by default unless specified by MAPILLARY_UPLOAD_ENDPOINT environment variable.", + action="store_true", + default=False, + required=False, + ) + group.add_argument( + "--nofinish", + help="[DEVELOPMENT] Upload data without finalizing. The data will NOT be stored permanently or appear on the Mapillary website.", + action="store_true", + default=False, + required=False, + ) + group.add_argument( + "--noresume", + help="[DEVELOPMENT] Start upload from the beginning, ignoring any previously interrupted upload sessions.", action="store_true", default=False, required=False, @@ -35,7 +57,7 @@ def add_basic_arguments(self, parser): group = parser.add_argument_group(bold_text("UPLOAD OPTIONS")) group.add_argument( "--desc_path", - help=f'Path to the description file generated by the process command. The hyphen "-" indicates STDIN. [default: {{IMPORT_PATH}}/{constants.IMAGE_DESCRIPTION_FILENAME}]', + help=f'Path to the description file with processed image and video metadata (from process command). Use "-" for STDIN. [default: {{IMPORT_PATH}}/{constants.IMAGE_DESCRIPTION_FILENAME}]', default=None, required=False, ) diff --git a/mapillary_tools/exif_write.py b/mapillary_tools/exif_write.py index c3737d8b9..33c1b6bde 100644 --- a/mapillary_tools/exif_write.py +++ b/mapillary_tools/exif_write.py @@ -42,7 +42,9 @@ def decimal_to_dms( def add_image_description(self, data: dict) -> None: """Add a dict to image description.""" - self._ef["0th"][piexif.ImageIFD.ImageDescription] = json.dumps(data) + self._ef["0th"][piexif.ImageIFD.ImageDescription] = json.dumps( + data, sort_keys=True, separators=(",", ":") + ) def add_orientation(self, orientation: int) -> None: """Add image orientation to image.""" diff --git a/mapillary_tools/history.py b/mapillary_tools/history.py index 62b028ef0..9ec188956 100644 --- a/mapillary_tools/history.py +++ b/mapillary_tools/history.py @@ -36,10 +36,21 @@ def history_desc_path(md5sum: str) -> Path: ) -def is_uploaded(md5sum: str) -> bool: +def read_history_record(md5sum: str) -> None | T.Dict[str, T.Any]: if not constants.MAPILLARY_UPLOAD_HISTORY_PATH: - return False - return history_desc_path(md5sum).is_file() + return None + + path = history_desc_path(md5sum) + + if not path.is_file(): + return None + + with path.open("r") as fp: + try: + return json.load(fp) + except json.JSONDecodeError as ex: + LOG.error(f"Failed to read upload history {path}: {ex}") + return None def write_history( @@ -53,10 +64,7 @@ def write_history( path = history_desc_path(md5sum) LOG.debug("Writing upload history: %s", path) path.resolve().parent.mkdir(parents=True, exist_ok=True) - history: dict[str, T.Any] = { - "params": params, - "summary": summary, - } + history: dict[str, T.Any] = {"params": params, "summary": summary} if metadatas is not None: history["descs"] = [ DescriptionJSONSerializer.as_desc(metadata) for metadata in metadatas diff --git a/mapillary_tools/mp4/construct_mp4_parser.py b/mapillary_tools/mp4/construct_mp4_parser.py index 6cd0f3d46..0c9f5b337 100644 --- a/mapillary_tools/mp4/construct_mp4_parser.py +++ b/mapillary_tools/mp4/construct_mp4_parser.py @@ -567,7 +567,9 @@ def _new_cmap_without_boxes( # pyre-ignore[9]: pyre does not support recursive type SwitchMapType MP4_WITHOUT_STBL_CMAP: SwitchMapType = { # pyre-ignore[6]: pyre does not support recursive type SwitchMapType - b"moov": _new_cmap_without_boxes(CMAP[b"moov"], [b"stbl"]), + b"moov": _new_cmap_without_boxes( + CMAP[b"moov"], T.cast(T.Sequence[BoxType], [b"stbl"]) + ), } # for parsing mp4 only diff --git a/mapillary_tools/process_geotag_properties.py b/mapillary_tools/process_geotag_properties.py index e7942078e..3182a11f2 100644 --- a/mapillary_tools/process_geotag_properties.py +++ b/mapillary_tools/process_geotag_properties.py @@ -227,6 +227,8 @@ def _show_stats( metadatas: T.Sequence[types.MetadataOrError], skipped_process_errors: set[T.Type[Exception]], ) -> None: + LOG.info("========== Process summary ==========") + metadatas_by_filetype: dict[types.FileType, list[types.MetadataOrError]] = {} for metadata in metadatas: if isinstance(metadata, types.ImageMetadata): diff --git a/mapillary_tools/serializer/description.py b/mapillary_tools/serializer/description.py index 198e7c108..4c0c56d72 100644 --- a/mapillary_tools/serializer/description.py +++ b/mapillary_tools/serializer/description.py @@ -86,22 +86,20 @@ class VideoDescription(_SharedDescription, total=False): MAPDeviceModel: str -class _ErrorDescription(TypedDict, total=False): - # type and message are required +class _ErrorObject(TypedDict, total=False): type: Required[str] - message: str - # vars is optional + message: Required[str] vars: dict -class ImageDescriptionError(TypedDict, total=False): +class ErrorDescription(TypedDict, total=False): filename: Required[str] - error: Required[_ErrorDescription] + error: Required[_ErrorObject] filetype: str Description = T.Union[ImageDescription, VideoDescription] -DescriptionOrError = T.Union[ImageDescription, VideoDescription, ImageDescriptionError] +DescriptionOrError = T.Union[ImageDescription, VideoDescription, ErrorDescription] ImageDescriptionEXIFSchema = { @@ -307,7 +305,7 @@ class DescriptionJSONSerializer(BaseSerializer): @classmethod def serialize(cls, metadatas: T.Sequence[MetadataOrError]) -> bytes: descs = [cls.as_desc(m) for m in metadatas] - return json.dumps(descs).encode("utf-8") + return json.dumps(descs, sort_keys=True, separators=(",", ":")).encode("utf-8") @override @classmethod @@ -327,7 +325,7 @@ def as_desc(cls, metadata: ImageMetadata) -> ImageDescription: ... @T.overload @classmethod - def as_desc(cls, metadata: ErrorMetadata) -> ImageDescriptionError: ... + def as_desc(cls, metadata: ErrorMetadata) -> ErrorDescription: ... @T.overload @classmethod @@ -336,7 +334,7 @@ def as_desc(cls, metadata: VideoMetadata) -> VideoDescription: ... @classmethod def as_desc(cls, metadata): if isinstance(metadata, ErrorMetadata): - return _describe_error_desc( + return cls._as_error_desc( metadata.error, metadata.filename, metadata.filetype ) @@ -347,6 +345,35 @@ def as_desc(cls, metadata): assert isinstance(metadata, ImageMetadata) return cls._as_image_desc(metadata) + @classmethod + def _as_error_desc( + cls, exc: Exception, filename: Path, filetype: FileType | None + ) -> ErrorDescription: + err: _ErrorObject = { + "type": exc.__class__.__name__, + "message": str(exc), + } + + exc_vars = vars(exc) + + if exc_vars: + # handle unserializable exceptions + try: + vars_json = json.dumps(exc_vars, sort_keys=True, separators=(",", ":")) + except Exception: + vars_json = "" + if vars_json: + err["vars"] = json.loads(vars_json) + + desc: ErrorDescription = { + "error": err, + "filename": str(filename.resolve()), + } + if filetype is not None: + desc["filetype"] = filetype.value + + return desc + @classmethod def _as_video_desc(cls, metadata: VideoMetadata) -> VideoDescription: desc: VideoDescription = { @@ -354,7 +381,7 @@ def _as_video_desc(cls, metadata: VideoMetadata) -> VideoDescription: "md5sum": metadata.md5sum, "filetype": metadata.filetype.value, "filesize": metadata.filesize, - "MAPGPSTrack": [cls._encode_point(p) for p in metadata.points], + "MAPGPSTrack": [PointEncoder.encode(p) for p in metadata.points], } if metadata.make: desc["MAPDeviceMake"] = metadata.make @@ -442,7 +469,23 @@ def _from_image_desc(cls, desc) -> ImageMetadata: ) @classmethod - def _encode_point(cls, p: geo.Point) -> T.Sequence[float | int | None]: + def _from_video_desc(cls, desc: VideoDescription) -> VideoMetadata: + validate_video_desc(desc) + + return VideoMetadata( + filename=Path(desc["filename"]), + md5sum=desc.get("md5sum"), + filesize=desc.get("filesize"), + filetype=FileType(desc["filetype"]), + points=[PointEncoder.decode(entry) for entry in desc["MAPGPSTrack"]], + make=desc.get("MAPDeviceMake"), + model=desc.get("MAPDeviceModel"), + ) + + +class PointEncoder: + @classmethod + def encode(cls, p: geo.Point) -> T.Sequence[float | int | None]: entry = [ int(p.time * 1000), round(p.lon, _COORDINATES_PRECISION), @@ -453,24 +496,10 @@ def _encode_point(cls, p: geo.Point) -> T.Sequence[float | int | None]: return entry @classmethod - def _decode_point(cls, entry: T.Sequence[T.Any]) -> geo.Point: + def decode(cls, entry: T.Sequence[T.Any]) -> geo.Point: time_ms, lon, lat, alt, angle = entry return geo.Point(time=time_ms / 1000, lon=lon, lat=lat, alt=alt, angle=angle) - @classmethod - def _from_video_desc(cls, desc: VideoDescription) -> VideoMetadata: - validate_video_desc(desc) - - return VideoMetadata( - filename=Path(desc["filename"]), - md5sum=desc.get("md5sum"), - filesize=desc.get("filesize"), - filetype=FileType(desc["filetype"]), - points=[cls._decode_point(entry) for entry in desc["MAPGPSTrack"]], - make=desc.get("MAPDeviceMake"), - model=desc.get("MAPDeviceModel"), - ) - def build_capture_time(time: datetime.datetime | int | float) -> str: if isinstance(time, (float, int)): @@ -554,34 +583,5 @@ def desc_file_to_exif(desc: ImageDescription) -> ImageDescription: return T.cast(ImageDescription, removed) -def _describe_error_desc( - exc: Exception, filename: Path, filetype: FileType | None -) -> ImageDescriptionError: - err: _ErrorDescription = { - "type": exc.__class__.__name__, - "message": str(exc), - } - - exc_vars = vars(exc) - - if exc_vars: - # handle unserializable exceptions - try: - vars_json = json.dumps(exc_vars) - except Exception: - vars_json = "" - if vars_json: - err["vars"] = json.loads(vars_json) - - desc: ImageDescriptionError = { - "error": err, - "filename": str(filename.resolve()), - } - if filetype is not None: - desc["filetype"] = filetype.value - - return desc - - if __name__ == "__main__": print(json.dumps(ImageVideoDescriptionFileSchema, indent=4)) diff --git a/mapillary_tools/serializer/gpx.py b/mapillary_tools/serializer/gpx.py index d44e69235..2dbcaa846 100644 --- a/mapillary_tools/serializer/gpx.py +++ b/mapillary_tools/serializer/gpx.py @@ -129,4 +129,4 @@ def _build_gpx_description( desc = T.cast(T.Dict, DescriptionJSONSerializer.as_desc(metadata)) for prop in excluded_properties: desc.pop(prop, None) - return json.dumps(desc) + return json.dumps(desc, sort_keys=True, separators=(",", ":")) diff --git a/mapillary_tools/upload.py b/mapillary_tools/upload.py index 99ba0ae49..be8878820 100644 --- a/mapillary_tools/upload.py +++ b/mapillary_tools/upload.py @@ -18,18 +18,13 @@ config, constants, exceptions, - geo, history, ipc, - telemetry, types, uploader, utils, VERSION, ) -from .camm import camm_builder, camm_parser -from .gpmf import gpmf_parser -from .mp4 import simple_mp4_builder from .serializer.description import DescriptionJSONSerializer from .types import FileType @@ -47,8 +42,11 @@ def upload( user_items: config.UserItem, desc_path: str | None = None, _metadatas_from_process: T.Sequence[types.MetadataOrError] | None = None, - dry_run=False, - skip_subfolders=False, + reupload: bool = False, + dry_run: bool = False, + nofinish: bool = False, + noresume: bool = False, + skip_subfolders: bool = False, ) -> None: import_paths = _normalize_import_paths(import_path) @@ -60,15 +58,8 @@ def upload( emitter = uploader.EventEmitter() - # When dry_run mode is on, we disable history by default. - # But we need dry_run for tests, so we added MAPILLARY__ENABLE_UPLOAD_HISTORY_FOR_DRY_RUN - # and when it is on, we enable history regardless of dry_run - enable_history = constants.MAPILLARY_UPLOAD_HISTORY_PATH and ( - not dry_run or constants.MAPILLARY__ENABLE_UPLOAD_HISTORY_FOR_DRY_RUN - ) - - # Put it first one to check duplications first - if enable_history: + # Check duplications first + if not _is_history_disabled(dry_run): upload_run_params: JSONDict = { # Null if multiple paths provided "import_path": str(import_path) if isinstance(import_path, Path) else None, @@ -77,7 +68,9 @@ def upload( "version": VERSION, "run_at": time.time(), } - _setup_history(emitter, upload_run_params, metadatas) + _setup_history( + emitter, upload_run_params, metadatas, reupload=reupload, nofinish=nofinish + ) # Set up tdqm _setup_tdqm(emitter) @@ -88,7 +81,13 @@ def upload( # Send the progress via IPC, and log the progress in debug mode _setup_ipc(emitter) - mly_uploader = uploader.Uploader(user_items, emitter=emitter, dry_run=dry_run) + mly_uploader = uploader.Uploader( + user_items, + emitter=emitter, + dry_run=dry_run, + nofinish=nofinish, + noresume=noresume, + ) results = _gen_upload_everything( mly_uploader, metadatas, import_paths, skip_subfolders @@ -139,38 +138,77 @@ def zip_images(import_path: Path, zip_dir: Path, desc_path: str | None = None): metadata for metadata in metadatas if isinstance(metadata, types.ImageMetadata) ] - uploader.ZipImageSequence.zip_images(image_metadatas, zip_dir) + uploader.ZipUploader.zip_images(image_metadatas, zip_dir) + + +def _is_history_disabled(dry_run: bool) -> bool: + # There is no way to read/write history if the path is not set + if not constants.MAPILLARY_UPLOAD_HISTORY_PATH: + return True + + if dry_run: + # When dry_run mode is on, we disable history by default + # However, we need dry_run for tests, so we added MAPILLARY__ENABLE_UPLOAD_HISTORY_FOR_DRY_RUN + # and when it is on, we enable history regardless of dry_run + if constants.MAPILLARY__ENABLE_UPLOAD_HISTORY_FOR_DRY_RUN: + return False + else: + return True + + return False def _setup_history( emitter: uploader.EventEmitter, upload_run_params: JSONDict, metadatas: list[types.Metadata], + reupload: bool, + nofinish: bool, ) -> None: @emitter.on("upload_start") def check_duplication(payload: uploader.Progress): md5sum = payload.get("sequence_md5sum") assert md5sum is not None, f"md5sum has to be set for {payload}" - if history.is_uploaded(md5sum): + record = history.read_history_record(md5sum) + + if record is not None: sequence_uuid = payload.get("sequence_uuid") + history_desc_path = history.history_desc_path(md5sum) + uploaded_at = record.get("summary", {}).get("upload_end_time", None) + if sequence_uuid is None: basename = os.path.basename(payload.get("import_path", "")) - LOG.info( - "File %s has been uploaded already. Check the upload history at %s", - basename, - history.history_desc_path(md5sum), - ) + name = f"file {basename}" + + else: + name = f"sequence {sequence_uuid}" + + if reupload: + if uploaded_at is not None: + LOG.info( + f"Reuploading {name} (previously uploaded at {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(uploaded_at))})" + ) + else: + LOG.info( + f"Reuploading {name} (already uploaded, see {history_desc_path})" + ) else: - LOG.info( - "Sequence %s has been uploaded already. Check the upload history at %s", - sequence_uuid, - history.history_desc_path(md5sum), - ) - raise UploadedAlreadyError() + if uploaded_at is not None: + LOG.info( + f"Skipping {name} (already uploaded at {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(uploaded_at))})" + ) + else: + LOG.info( + f"Skipping {name} (already uploaded, see {history_desc_path})" + ) + raise UploadedAlreadyError() @emitter.on("upload_finished") def write_history(payload: uploader.Progress): + if nofinish: + return + sequence_uuid = payload.get("sequence_uuid") md5sum = payload.get("sequence_md5sum") assert md5sum is not None, f"md5sum has to be set for {payload}" @@ -188,10 +226,7 @@ def write_history(payload: uploader.Progress): try: history.write_history( - md5sum, - upload_run_params, - T.cast(JSONDict, payload), - sequence, + md5sum, upload_run_params, T.cast(JSONDict, payload), sequence ) except OSError: LOG.warning("Error writing upload history %s", md5sum, exc_info=True) @@ -374,9 +409,22 @@ def _summarize(stats: T.Sequence[_APIStats]) -> dict: def _show_upload_summary(stats: T.Sequence[_APIStats], errors: T.Sequence[Exception]): - if not stats: - LOG.info("Nothing uploaded. Bye.") - else: + LOG.info("========== Upload summary ==========") + + errors_by_type: dict[str, list[Exception]] = {} + for error in errors: + errors_by_type.setdefault(error.__class__.__name__, []).append(error) + + for error_type, error_list in errors_by_type.items(): + if error_type == UploadedAlreadyError.__name__: + LOG.info( + "Skipped %d already uploaded sequences (use --reupload to force re-upload)", + len(error_list), + ) + else: + LOG.info(f"{len(error_list)} uploads failed due to {error_type}") + + if stats: grouped: dict[str, list[_APIStats]] = {} for stat in stats: grouped.setdefault(stat.get("file_type", "unknown"), []).append(stat) @@ -391,9 +439,8 @@ def _show_upload_summary(stats: T.Sequence[_APIStats], errors: T.Sequence[Except LOG.info("%8.1fM data in total", summary["size"]) LOG.info("%8.1fM data uploaded", summary["uploaded_size"]) LOG.info("%8.1fs upload time", summary["time"]) - - for error in errors: - LOG.error("Upload error: %s: %s", error.__class__.__name__, error) + else: + LOG.info("Nothing uploaded. Bye.") def _api_logging_finished(summary: dict): @@ -452,109 +499,18 @@ def _gen_upload_everything( (m for m in metadatas if isinstance(m, types.ImageMetadata)), utils.find_images(import_paths, skip_subfolders=skip_subfolders), ) - for image_result in uploader.ZipImageSequence.upload_images( - mly_uploader, - image_metadatas, - ): - yield image_result + yield from uploader.ImageUploader.upload_images(mly_uploader, image_metadatas) # Upload videos video_metadatas = _find_metadata_with_filename_existed_in( (m for m in metadatas if isinstance(m, types.VideoMetadata)), utils.find_videos(import_paths, skip_subfolders=skip_subfolders), ) - for video_result in _gen_upload_videos(mly_uploader, video_metadatas): - yield video_result + yield from uploader.VideoUploader.upload_videos(mly_uploader, video_metadatas) # Upload zip files zip_paths = utils.find_zipfiles(import_paths, skip_subfolders=skip_subfolders) - for zip_result in _gen_upload_zipfiles(mly_uploader, zip_paths): - yield zip_result - - -def _gen_upload_videos( - mly_uploader: uploader.Uploader, video_metadatas: T.Sequence[types.VideoMetadata] -) -> T.Generator[tuple[types.VideoMetadata, uploader.UploadResult], None, None]: - for idx, video_metadata in enumerate(video_metadatas): - try: - video_metadata.update_md5sum() - except Exception as ex: - yield video_metadata, uploader.UploadResult(error=ex) - continue - - assert isinstance(video_metadata.md5sum, str), "md5sum should be updated" - - # Convert video metadata to CAMMInfo - camm_info = _prepare_camm_info(video_metadata) - - # Create the CAMM sample generator - camm_sample_generator = camm_builder.camm_sample_generator2(camm_info) - - progress: uploader.SequenceProgress = { - "total_sequence_count": len(video_metadatas), - "sequence_idx": idx, - "file_type": video_metadata.filetype.value, - "import_path": str(video_metadata.filename), - "sequence_md5sum": video_metadata.md5sum, - } - - try: - with video_metadata.filename.open("rb") as src_fp: - # Build the mp4 stream with the CAMM samples - camm_fp = simple_mp4_builder.transform_mp4( - src_fp, camm_sample_generator - ) - - # Upload the mp4 stream - file_handle = mly_uploader.upload_stream( - T.cast(T.IO[bytes], camm_fp), - progress=T.cast(T.Dict[str, T.Any], progress), - ) - cluster_id = mly_uploader.finish_upload( - file_handle, - api_v4.ClusterFileType.CAMM, - progress=T.cast(T.Dict[str, T.Any], progress), - ) - except Exception as ex: - yield video_metadata, uploader.UploadResult(error=ex) - else: - yield video_metadata, uploader.UploadResult(result=cluster_id) - - -def _prepare_camm_info(video_metadata: types.VideoMetadata) -> camm_parser.CAMMInfo: - camm_info = camm_parser.CAMMInfo( - make=video_metadata.make or "", model=video_metadata.model or "" - ) - - for point in video_metadata.points: - if isinstance(point, telemetry.CAMMGPSPoint): - if camm_info.gps is None: - camm_info.gps = [] - camm_info.gps.append(point) - - elif isinstance(point, telemetry.GPSPoint): - # There is no proper CAMM entry for GoPro GPS - if camm_info.mini_gps is None: - camm_info.mini_gps = [] - camm_info.mini_gps.append(point) - - elif isinstance(point, geo.Point): - if camm_info.mini_gps is None: - camm_info.mini_gps = [] - camm_info.mini_gps.append(point) - else: - raise ValueError(f"Unknown point type: {point}") - - if constants.MAPILLARY__EXPERIMENTAL_ENABLE_IMU: - if video_metadata.filetype is FileType.GOPRO: - with video_metadata.filename.open("rb") as fp: - gopro_info = gpmf_parser.extract_gopro_info(fp, telemetry_only=True) - if gopro_info is not None: - camm_info.accl = gopro_info.accl or [] - camm_info.gyro = gopro_info.gyro or [] - camm_info.magn = gopro_info.magn or [] - - return camm_info + yield from uploader.ZipUploader.upload_zipfiles(mly_uploader, zip_paths) def _normalize_import_paths(import_path: Path | T.Sequence[Path]) -> list[Path]: @@ -615,27 +571,6 @@ def _continue_or_fail(ex: Exception) -> Exception: raise ex -def _gen_upload_zipfiles( - mly_uploader: uploader.Uploader, zip_paths: T.Sequence[Path] -) -> T.Generator[tuple[Path, uploader.UploadResult], None, None]: - for idx, zip_path in enumerate(zip_paths): - progress: uploader.SequenceProgress = { - "total_sequence_count": len(zip_paths), - "sequence_idx": idx, - "import_path": str(zip_path), - "file_type": types.FileType.ZIP.value, - "sequence_md5sum": "", # Placeholder, will be set in upload_zipfile - } - try: - cluster_id = uploader.ZipImageSequence.upload_zipfile( - mly_uploader, zip_path, progress=T.cast(T.Dict[str, T.Any], progress) - ) - except Exception as ex: - yield zip_path, uploader.UploadResult(error=ex) - else: - yield zip_path, uploader.UploadResult(result=cluster_id) - - def _load_descs( _metadatas_from_process: T.Sequence[types.MetadataOrError] | None, import_paths: T.Sequence[Path], @@ -700,9 +635,9 @@ def _find_desc_path(import_paths: T.Sequence[Path]) -> str: if 1 < len(import_paths): raise exceptions.MapillaryBadParameterError( - "The description path must be specified (with --desc_path) when uploading multiple paths", + "The description path must be specified (with --desc_path) when uploading multiple paths" ) else: raise exceptions.MapillaryBadParameterError( - "The description path must be specified (with --desc_path) when uploading a single file", + "The description path must be specified (with --desc_path) when uploading a single file" ) diff --git a/mapillary_tools/uploader.py b/mapillary_tools/uploader.py index 388fcd3fa..fe7a58971 100644 --- a/mapillary_tools/uploader.py +++ b/mapillary_tools/uploader.py @@ -25,7 +25,20 @@ import requests -from . import api_v4, config, constants, exif_write, types, upload_api_v4, utils +from . import ( + api_v4, + config, + constants, + exif_write, + geo, + telemetry, + types, + upload_api_v4, + utils, +) +from .camm import camm_builder, camm_parser +from .gpmf import gpmf_parser +from .mp4 import simple_mp4_builder from .serializer.description import ( desc_file_to_exif, DescriptionJSONSerializer, @@ -154,7 +167,122 @@ class UploadResult: error: Exception | None = None -class ZipImageSequence: +class VideoUploader: + @classmethod + def upload_videos( + cls, mly_uploader: Uploader, video_metadatas: T.Sequence[types.VideoMetadata] + ) -> T.Generator[tuple[types.VideoMetadata, UploadResult], None, None]: + for idx, video_metadata in enumerate(video_metadatas): + try: + video_metadata.update_md5sum() + except Exception as ex: + yield video_metadata, UploadResult(error=ex) + continue + + assert isinstance(video_metadata.md5sum, str), "md5sum should be updated" + + progress: SequenceProgress = { + "total_sequence_count": len(video_metadatas), + "sequence_idx": idx, + "file_type": video_metadata.filetype.value, + "import_path": str(video_metadata.filename), + "sequence_md5sum": video_metadata.md5sum, + } + + try: + with cls.build_camm_stream(video_metadata) as camm_fp: + # Upload the mp4 stream + file_handle = mly_uploader.upload_stream( + T.cast(T.IO[bytes], camm_fp), + progress=T.cast(T.Dict[str, T.Any], progress), + ) + + cluster_id = mly_uploader.finish_upload( + file_handle, + api_v4.ClusterFileType.CAMM, + progress=T.cast(T.Dict[str, T.Any], progress), + ) + except Exception as ex: + yield video_metadata, UploadResult(error=ex) + else: + yield video_metadata, UploadResult(result=cluster_id) + + @classmethod + @contextmanager + def build_camm_stream(cls, video_metadata: types.VideoMetadata): + # Convert video metadata to CAMMInfo + camm_info = cls.prepare_camm_info(video_metadata) + + # Create the CAMM sample generator + camm_sample_generator = camm_builder.camm_sample_generator2(camm_info) + + with video_metadata.filename.open("rb") as src_fp: + # Build the mp4 stream with the CAMM samples + yield simple_mp4_builder.transform_mp4(src_fp, camm_sample_generator) + + @classmethod + def prepare_camm_info( + cls, video_metadata: types.VideoMetadata + ) -> camm_parser.CAMMInfo: + camm_info = camm_parser.CAMMInfo( + make=video_metadata.make or "", model=video_metadata.model or "" + ) + + for point in video_metadata.points: + if isinstance(point, telemetry.CAMMGPSPoint): + if camm_info.gps is None: + camm_info.gps = [] + camm_info.gps.append(point) + + elif isinstance(point, telemetry.GPSPoint): + # There is no proper CAMM entry for GoPro GPS + if camm_info.mini_gps is None: + camm_info.mini_gps = [] + camm_info.mini_gps.append(point) + + elif isinstance(point, geo.Point): + if camm_info.mini_gps is None: + camm_info.mini_gps = [] + camm_info.mini_gps.append(point) + else: + raise ValueError(f"Unknown point type: {point}") + + if constants.MAPILLARY__EXPERIMENTAL_ENABLE_IMU: + if video_metadata.filetype is types.FileType.GOPRO: + with video_metadata.filename.open("rb") as fp: + gopro_info = gpmf_parser.extract_gopro_info(fp, telemetry_only=True) + if gopro_info is not None: + camm_info.accl = gopro_info.accl or [] + camm_info.gyro = gopro_info.gyro or [] + camm_info.magn = gopro_info.magn or [] + + return camm_info + + +class ZipUploader: + @classmethod + def upload_zipfiles( + cls, mly_uploader: Uploader, zip_paths: T.Sequence[Path] + ) -> T.Generator[tuple[Path, UploadResult], None, None]: + for idx, zip_path in enumerate(zip_paths): + progress: SequenceProgress = { + "total_sequence_count": len(zip_paths), + "sequence_idx": idx, + "import_path": str(zip_path), + "file_type": types.FileType.ZIP.value, + "sequence_md5sum": "", # Placeholder, will be set in upload_zipfile + } + try: + cluster_id = cls._upload_zipfile( + mly_uploader, + zip_path, + progress=T.cast(T.Dict[str, T.Any], progress), + ) + except Exception as ex: + yield zip_path, UploadResult(error=ex) + else: + yield zip_path, UploadResult(result=cluster_id) + @classmethod def zip_images( cls, metadatas: T.Sequence[types.ImageMetadata], zip_dir: Path @@ -173,38 +301,93 @@ def zip_images( ) with cls._wip_file_context(wip_zip_filename) as wip_path: with wip_path.open("wb") as wip_fp: - cls.zip_sequence_fp(sequence, wip_fp) + cls._zip_sequence_fp(sequence, wip_fp) @classmethod - @contextmanager - def _wip_file_context(cls, wip_path: Path): - try: - os.remove(wip_path) - except FileNotFoundError: - pass - try: - yield wip_path + def zip_images_and_upload( + cls, uploader: Uploader, image_metadatas: T.Sequence[types.ImageMetadata] + ) -> T.Generator[tuple[str, UploadResult], None, None]: + sequences = types.group_and_sort_images(image_metadatas) - with wip_path.open("rb") as fp: - upload_md5sum = utils.md5sum_fp(fp).hexdigest() + for sequence_idx, (sequence_uuid, sequence) in enumerate(sequences.items()): + try: + _validate_metadatas(sequence) + except Exception as ex: + yield sequence_uuid, UploadResult(error=ex) + continue - done_path = wip_path.parent.joinpath( - _session_key(upload_md5sum, api_v4.ClusterFileType.ZIP) + with tempfile.NamedTemporaryFile() as fp: + try: + sequence_md5sum = cls._zip_sequence_fp(sequence, fp) + except Exception as ex: + yield sequence_uuid, UploadResult(error=ex) + continue + + sequence_progress: SequenceProgress = { + "sequence_idx": sequence_idx, + "total_sequence_count": len(sequences), + "sequence_image_count": len(sequence), + "sequence_uuid": sequence_uuid, + "file_type": types.FileType.ZIP.value, + "sequence_md5sum": sequence_md5sum, + } + + try: + file_handle = uploader.upload_stream( + fp, progress=T.cast(T.Dict[str, T.Any], sequence_progress) + ) + cluster_id = uploader.finish_upload( + file_handle, + api_v4.ClusterFileType.ZIP, + progress=T.cast(T.Dict[str, T.Any], sequence_progress), + ) + except Exception as ex: + yield sequence_uuid, UploadResult(error=ex) + continue + + yield sequence_uuid, UploadResult(result=cluster_id) + + @classmethod + def _upload_zipfile( + cls, + uploader: Uploader, + zip_path: Path, + progress: dict[str, T.Any] | None = None, + ) -> str: + if progress is None: + progress = {} + + with zipfile.ZipFile(zip_path) as ziph: + namelist = ziph.namelist() + if not namelist: + raise InvalidMapillaryZipFileError("Zipfile has no files") + + with zip_path.open("rb") as zip_fp: + sequence_md5sum = cls._extract_sequence_md5sum(zip_fp) + + # Send the copy of the input progress to each upload session, to avoid modifying the original one + mutable_progress: SequenceProgress = { + **T.cast(SequenceProgress, progress), + "sequence_image_count": len(namelist), + "sequence_md5sum": sequence_md5sum, + "file_type": types.FileType.ZIP.value, + } + + with zip_path.open("rb") as zip_fp: + file_handle = uploader.upload_stream( + zip_fp, progress=T.cast(T.Dict[str, T.Any], mutable_progress) ) - try: - os.remove(done_path) - except FileNotFoundError: - pass - wip_path.rename(done_path) - finally: - try: - os.remove(wip_path) - except FileNotFoundError: - pass + cluster_id = uploader.finish_upload( + file_handle, + api_v4.ClusterFileType.ZIP, + progress=T.cast(T.Dict[str, T.Any], mutable_progress), + ) + + return cluster_id @classmethod - def zip_sequence_fp( + def _zip_sequence_fp( cls, sequence: T.Sequence[types.ImageMetadata], zip_fp: T.IO[bytes], @@ -226,16 +409,18 @@ def zip_sequence_fp( # Arcname should be unique, the name does not matter arcname = f"{idx}.jpg" zipinfo = zipfile.ZipInfo(arcname, date_time=(1980, 1, 1, 0, 0, 0)) - zipf.writestr(zipinfo, cls._dump_image_bytes(metadata)) + zipf.writestr(zipinfo, ImageUploader.dump_image_bytes(metadata)) assert len(sequence) == len(set(zipf.namelist())) - zipf.comment = json.dumps({"sequence_md5sum": sequence_md5sum}).encode( - "utf-8" - ) + zipf.comment = json.dumps( + {"sequence_md5sum": sequence_md5sum}, + sort_keys=True, + separators=(",", ":"), + ).encode("utf-8") return sequence_md5sum @classmethod - def extract_sequence_md5sum(cls, zip_fp: T.IO[bytes]) -> str: + def _extract_sequence_md5sum(cls, zip_fp: T.IO[bytes]) -> str: with zipfile.ZipFile(zip_fp, "r", zipfile.ZIP_DEFLATED) as ziph: comment = ziph.comment @@ -258,115 +443,84 @@ def extract_sequence_md5sum(cls, zip_fp: T.IO[bytes]) -> str: return sequence_md5sum @classmethod - def _dump_image_bytes(cls, metadata: types.ImageMetadata) -> bytes: + @contextmanager + def _wip_file_context(cls, wip_path: Path): try: - edit = exif_write.ExifEdit(metadata.filename) - except struct.error as ex: - raise ExifError(f"Failed to load EXIF: {ex}", metadata.filename) from ex - - # The cast is to fix the type checker error - edit.add_image_description( - T.cast( - T.Dict, - desc_file_to_exif(DescriptionJSONSerializer.as_desc(metadata)), - ) - ) - + os.remove(wip_path) + except FileNotFoundError: + pass try: - return edit.dump_image_bytes() - except struct.error as ex: - raise ExifError( - f"Failed to dump EXIF bytes: {ex}", metadata.filename - ) from ex - - @classmethod - def upload_zipfile( - cls, - uploader: Uploader, - zip_path: Path, - progress: dict[str, T.Any] | None = None, - ) -> str: - if progress is None: - progress = {} - - with zipfile.ZipFile(zip_path) as ziph: - namelist = ziph.namelist() - if not namelist: - raise InvalidMapillaryZipFileError("Zipfile has no files") - - with zip_path.open("rb") as zip_fp: - sequence_md5sum = cls.extract_sequence_md5sum(zip_fp) + yield wip_path - # Send the copy of the input progress to each upload session, to avoid modifying the original one - mutable_progress: SequenceProgress = { - **T.cast(SequenceProgress, progress), - "sequence_image_count": len(namelist), - "sequence_md5sum": sequence_md5sum, - "file_type": types.FileType.ZIP.value, - } + with wip_path.open("rb") as fp: + upload_md5sum = utils.md5sum_fp(fp).hexdigest() - with zip_path.open("rb") as zip_fp: - file_handle = uploader.upload_stream( - zip_fp, progress=T.cast(T.Dict[str, T.Any], mutable_progress) + done_path = wip_path.parent.joinpath( + _session_key(upload_md5sum, api_v4.ClusterFileType.ZIP) ) - cluster_id = uploader.finish_upload( - file_handle, - api_v4.ClusterFileType.ZIP, - progress=T.cast(T.Dict[str, T.Any], mutable_progress), - ) + try: + os.remove(done_path) + except FileNotFoundError: + pass + wip_path.rename(done_path) + finally: + try: + os.remove(wip_path) + except FileNotFoundError: + pass - return cluster_id +class ImageUploader: @classmethod - def zip_images_and_upload( - cls, - uploader: Uploader, - image_metadatas: T.Sequence[types.ImageMetadata], - progress: dict[str, T.Any] | None = None, + def upload_images( + cls, uploader: Uploader, image_metadatas: T.Sequence[types.ImageMetadata] ) -> T.Generator[tuple[str, UploadResult], None, None]: - if progress is None: - progress = {} - sequences = types.group_and_sort_images(image_metadatas) for sequence_idx, (sequence_uuid, sequence) in enumerate(sequences.items()): + sequence_md5sum = types.update_sequence_md5sum(sequence) + + sequence_progress: SequenceProgress = { + "sequence_idx": sequence_idx, + "total_sequence_count": len(sequences), + "sequence_image_count": len(sequence), + "sequence_uuid": sequence_uuid, + "file_type": types.FileType.IMAGE.value, + "sequence_md5sum": sequence_md5sum, + } + try: - _validate_metadatas(sequence) + cluster_id = cls._upload_sequence( + uploader, + sequence, + progress=T.cast(dict[str, T.Any], sequence_progress), + ) except Exception as ex: yield sequence_uuid, UploadResult(error=ex) - continue - - with tempfile.NamedTemporaryFile() as fp: - try: - sequence_md5sum = cls.zip_sequence_fp(sequence, fp) - except Exception as ex: - yield sequence_uuid, UploadResult(error=ex) - continue - - sequence_progress: SequenceProgress = { - "sequence_idx": sequence_idx, - "total_sequence_count": len(sequences), - "sequence_image_count": len(sequence), - "sequence_uuid": sequence_uuid, - "file_type": types.FileType.ZIP.value, - "sequence_md5sum": sequence_md5sum, - } + else: + yield sequence_uuid, UploadResult(result=cluster_id) - mutable_progress: dict[str, T.Any] = {**progress, **sequence_progress} + @classmethod + def dump_image_bytes(cls, metadata: types.ImageMetadata) -> bytes: + try: + edit = exif_write.ExifEdit(metadata.filename) + except struct.error as ex: + raise ExifError(f"Failed to load EXIF: {ex}", metadata.filename) from ex - try: - file_handle = uploader.upload_stream(fp, progress=mutable_progress) - cluster_id = uploader.finish_upload( - file_handle, - api_v4.ClusterFileType.ZIP, - progress=mutable_progress, - ) - except Exception as ex: - yield sequence_uuid, UploadResult(error=ex) - continue + # The cast is to fix the type checker error + edit.add_image_description( + T.cast( + T.Dict, desc_file_to_exif(DescriptionJSONSerializer.as_desc(metadata)) + ) + ) - yield sequence_uuid, UploadResult(result=cluster_id) + try: + return edit.dump_image_bytes() + except struct.error as ex: + raise ExifError( + f"Failed to dump EXIF bytes: {ex}", metadata.filename + ) from ex @classmethod def _upload_sequence( @@ -389,7 +543,7 @@ def _upload_image(image_metadata: types.ImageMetadata) -> str: "filename": str(image_metadata.filename), } - bytes = cls._dump_image_bytes(image_metadata) + bytes = cls.dump_image_bytes(image_metadata) file_handle = uploader_without_emitter.upload_stream( io.BytesIO(bytes), progress=mutable_progress ) @@ -421,10 +575,14 @@ def _upload_image(image_metadata: types.ImageMetadata) -> str: } with io.BytesIO() as manifest_fp: - manifest_fp.write(json.dumps(manifest).encode("utf-8")) + manifest_fp.write( + json.dumps(manifest, sort_keys=True, separators=(",", ":")).encode( + "utf-8" + ) + ) manifest_fp.seek(0, io.SEEK_SET) manifest_file_handle = uploader_without_emitter.upload_stream( - manifest_fp, session_key=f"{uuid.uuid4().hex}.json" + manifest_fp, session_key=f"uuid_{uuid.uuid4().hex}.json" ) uploader.emitter.emit("upload_end", progress) @@ -437,41 +595,6 @@ def _upload_image(image_metadata: types.ImageMetadata) -> str: return cluster_id - @classmethod - def upload_images( - cls, - uploader: Uploader, - image_metadatas: T.Sequence[types.ImageMetadata], - progress: dict[str, T.Any] | None = None, - ) -> T.Generator[tuple[str, UploadResult], None, None]: - if progress is None: - progress = {} - - sequences = types.group_and_sort_images(image_metadatas) - - for sequence_idx, (sequence_uuid, sequence) in enumerate(sequences.items()): - sequence_md5sum = types.update_sequence_md5sum(sequence) - - sequence_progress: SequenceProgress = { - "sequence_idx": sequence_idx, - "total_sequence_count": len(sequences), - "sequence_image_count": len(sequence), - "sequence_uuid": sequence_uuid, - "file_type": types.FileType.IMAGE.value, - "sequence_md5sum": sequence_md5sum, - } - - mutable_progress: dict[str, T.Any] = {**progress, **sequence_progress} - - try: - cluster_id = cls._upload_sequence( - uploader, sequence, progress=mutable_progress - ) - except Exception as ex: - yield sequence_uuid, UploadResult(error=ex) - else: - yield sequence_uuid, UploadResult(result=cluster_id) - class Uploader: def __init__( @@ -479,7 +602,9 @@ def __init__( user_items: config.UserItem, emitter: EventEmitter | None = None, chunk_size: int = int(constants.UPLOAD_CHUNK_SIZE_MB * 1024 * 1024), - dry_run=False, + dry_run: bool = False, + nofinish: bool = False, + noresume: bool = False, ): self.user_items = user_items if emitter is None: @@ -489,6 +614,8 @@ def __init__( self.emitter = emitter self.chunk_size = chunk_size self.dry_run = dry_run + self.nofinish = nofinish + self.noresume = noresume def upload_stream( self, @@ -500,13 +627,17 @@ def upload_stream( progress = {} if session_key is None: - fp.seek(0, io.SEEK_SET) - md5sum = utils.md5sum_fp(fp).hexdigest() + if self.noresume: + # Generate a unique UUID for session_key when noresume is True + # to prevent resuming from previous uploads + session_key = f"uuid_{uuid.uuid4().hex}" + else: + fp.seek(0, io.SEEK_SET) + session_key = utils.md5sum_fp(fp).hexdigest() + filetype = progress.get("file_type") if filetype is not None: - session_key = _session_key(md5sum, types.FileType(filetype)) - else: - session_key = md5sum + session_key = _session_key(session_key, types.FileType(filetype)) fp.seek(0, io.SEEK_END) entity_size = fp.tell() @@ -546,7 +677,7 @@ def finish_upload( if progress is None: progress = {} - if self.dry_run: + if self.dry_run or self.nofinish: cluster_id = "0" else: resp = api_v4.finish_upload( @@ -572,6 +703,8 @@ def copy_uploader_without_emitter(self) -> Uploader: emitter=None, chunk_size=self.chunk_size, dry_run=self.dry_run, + nofinish=self.nofinish, + noresume=self.noresume, ) def _create_upload_service(self, session_key: str) -> upload_api_v4.UploadService: diff --git a/tests/unit/test_camm_parser.py b/tests/unit/test_camm_parser.py index 361a173ec..00e44dbe2 100644 --- a/tests/unit/test_camm_parser.py +++ b/tests/unit/test_camm_parser.py @@ -3,7 +3,7 @@ import typing as T from pathlib import Path -from mapillary_tools import geo, telemetry, types, upload +from mapillary_tools import geo, telemetry, types, uploader from mapillary_tools.camm import camm_builder, camm_parser from mapillary_tools.mp4 import construct_mp4_parser as cparser, simple_mp4_builder @@ -57,7 +57,7 @@ def encode_decode_empty_camm_mp4(metadata: types.VideoMetadata) -> types.VideoMe {"type": b"moov", "data": [mvhd]}, ] src = cparser.MP4WithoutSTBLBuilderConstruct.build_boxlist(empty_mp4) - input_camm_info = upload._prepare_camm_info(metadata) + input_camm_info = uploader.VideoUploader.prepare_camm_info(metadata) target_fp = simple_mp4_builder.transform_mp4( io.BytesIO(src), camm_builder.camm_sample_generator2(input_camm_info) ) diff --git a/tests/unit/test_description.py b/tests/unit/test_description.py index f6aad7a8a..04ed5c026 100644 --- a/tests/unit/test_description.py +++ b/tests/unit/test_description.py @@ -97,18 +97,20 @@ def test_serialize_empty(): def test_serialize_image_description_ok(): desc = [ { - "MAPLatitude": 1, - "MAPLongitude": 2, - "MAPCaptureTime": "2020_01_02_11_12_13_1", + "MAPLatitude": 1.2, + "MAPLongitude": 2.33, + "MAPCaptureTime": "2020_01_02_11_12_13_100", "filename": "foo你好", "filetype": "image", } ] metadatas = DescriptionJSONSerializer.deserialize(json.dumps(desc).encode("utf-8")) - actual = json.loads(DescriptionJSONSerializer.serialize(metadatas)) - assert len(actual) == 1, actual - actual = actual[0] - assert "2020_01_02_11_12_13_100" == actual["MAPCaptureTime"] - assert "foo你好" == Path(actual["filename"]).name - assert 1 == actual["MAPLatitude"] - assert 2 == actual["MAPLongitude"] + s1 = DescriptionJSONSerializer.serialize(metadatas) + # Serialization should be deterministic + s2 = DescriptionJSONSerializer.serialize(metadatas) + assert s1 == s2 + actual_descs = json.loads(s1) + assert {**desc[0], "md5sum": None, "filesize": None} == { + **actual_descs[0], + "filename": Path(actual_descs[0]["filename"]).name, + } diff --git a/tests/unit/test_exceptions.py b/tests/unit/test_exceptions.py index d70c6e1b2..0692bb306 100644 --- a/tests/unit/test_exceptions.py +++ b/tests/unit/test_exceptions.py @@ -25,5 +25,7 @@ def test_all(): e = exc("hello") # should not raise json.dumps( - description._describe_error_desc(e, Path("test.jpg"), types.FileType.IMAGE) + description.DescriptionJSONSerializer._as_error_desc( + e, Path("test.jpg"), types.FileType.IMAGE + ) ) diff --git a/tests/unit/test_uploader.py b/tests/unit/test_uploader.py index 97eaa11c9..8f10abdeb 100644 --- a/tests/unit/test_uploader.py +++ b/tests/unit/test_uploader.py @@ -48,7 +48,7 @@ def test_upload_images(setup_unittest_data: py.path.local, setup_upload: py.path }, ] results = list( - uploader.ZipImageSequence.zip_images_and_upload( + uploader.ZipUploader.zip_images_and_upload( mly_uploader, [ description.DescriptionJSONSerializer.from_desc(T.cast(T.Any, desc)) @@ -106,7 +106,7 @@ def test_upload_images_multiple_sequences( dry_run=True, ) results = list( - uploader.ZipImageSequence.zip_images_and_upload( + uploader.ZipUploader.zip_images_and_upload( mly_uploader, [ description.DescriptionJSONSerializer.from_desc(T.cast(T.Any, desc)) @@ -157,7 +157,7 @@ def test_upload_zip( }, ] zip_dir = setup_unittest_data.mkdir("zip_dir") - uploader.ZipImageSequence.zip_images( + uploader.ZipUploader.zip_images( [ description.DescriptionJSONSerializer.from_desc(T.cast(T.Any, desc)) for desc in descs @@ -176,7 +176,7 @@ def test_upload_zip( emitter=emitter, ) for zip_path in zip_dir.listdir(): - cluster = uploader.ZipImageSequence.upload_zipfile(mly_uploader, Path(zip_path)) + cluster = uploader.ZipUploader._upload_zipfile(mly_uploader, Path(zip_path)) assert cluster == "0" actual_descs = sum(extract_all_uploaded_descs(Path(setup_upload)), []) assert 3 == len(actual_descs)