@@ -315,8 +315,12 @@ def downloaded_size(self) -> int:
315315 """Returns the total size of downloaded files."""
316316 return sum (url_info .size for url_info in self ._recorded_url_infos .values ())
317317
318- def _get_dl_path (self , url : str , checksum : str | None = None ) -> epath .Path :
319- return self ._download_dir / resource_lib .get_dl_fname (url , checksum )
318+ def _get_dl_path (
319+ self , resource : resource_lib .Resource , checksum : str | None = None
320+ ) -> epath .Path :
321+ return self ._download_dir / resource_lib .get_dl_fname (
322+ resource .url , checksum
323+ )
320324
321325 @property
322326 def register_checksums (self ):
@@ -352,7 +356,7 @@ def _get_manually_downloaded_path(
352356 @utils .build_synchronize_decorator ()
353357 @utils .memoize ()
354358 def _download (self , resource : Url ) -> promise .Promise [epath .Path ]:
355- """Download resource, returns Promise->path to downloaded file .
359+ """Downloads resource or gets downloaded cache .
356360
357361 This function:
358362
@@ -364,13 +368,12 @@ def _download(self, resource: Url) -> promise.Promise[epath.Path]:
364368 resource: The URL to download.
365369
366370 Returns:
367- path: The path to the downloaded resource.
371+ Promise of the path to the downloaded resource.
368372 """
369373 # Normalize the input
370- if isinstance (resource , str ):
371- url = resource
372- else :
373- url = resource .url
374+ if not isinstance (resource , resource_lib .Resource ):
375+ resource = resource_lib .Resource (url = resource )
376+ url = resource .url
374377 assert url is not None , 'URL is undefined from resource.'
375378
376379 expected_url_info = self ._url_infos .get (url )
@@ -382,9 +385,9 @@ def _download(self, resource: Url) -> promise.Promise[epath.Path]:
382385 manually_downloaded_path = self ._get_manually_downloaded_path (
383386 expected_url_info = expected_url_info
384387 )
385- url_path = self ._get_dl_path (url )
388+ url_path = self ._get_dl_path (resource )
386389 checksum_path = (
387- self ._get_dl_path (url , expected_url_info .checksum )
390+ self ._get_dl_path (resource , expected_url_info .checksum )
388391 if expected_url_info
389392 else None
390393 )
@@ -396,12 +399,12 @@ def _download(self, resource: Url) -> promise.Promise[epath.Path]:
396399 url_path = url_path ,
397400 expected_url_info = expected_url_info ,
398401 )
399- if dl_result . path and not self ._force_download : # Download was cached
402+ if dl_result and not self ._force_download : # Download was cached
400403 logging .info (
401404 f'Skipping download of { url } : File cached in { dl_result .path } '
402405 )
403406 # Still update the progression bar to indicate the file was downloaded
404- self ._downloader .increase_tqdm (dl_result )
407+ self ._downloader .increase_tqdm (dl_result . url_info )
405408 future = promise .Promise .resolve (dl_result )
406409 else :
407410 # Download in a tmp directory next to url_path (to avoid name collisions)
@@ -418,7 +421,7 @@ def _download(self, resource: Url) -> promise.Promise[epath.Path]:
418421 # Post-process the result
419422 return future .then (
420423 lambda dl_result : self ._register_or_validate_checksums ( # pylint: disable=g-long-lambda
421- url = url ,
424+ resource = resource ,
422425 path = dl_result .path ,
423426 computed_url_info = dl_result .url_info ,
424427 expected_url_info = expected_url_info ,
@@ -429,10 +432,10 @@ def _download(self, resource: Url) -> promise.Promise[epath.Path]:
429432
430433 def _register_or_validate_checksums (
431434 self ,
435+ resource : resource_lib .Resource ,
432436 path : epath .Path ,
433- url : str ,
434437 expected_url_info : checksums .UrlInfo | None ,
435- computed_url_info : checksums .UrlInfo | None ,
438+ computed_url_info : checksums .UrlInfo ,
436439 checksum_path : epath .Path | None ,
437440 url_path : epath .Path ,
438441 ) -> epath .Path :
@@ -443,16 +446,11 @@ def _register_or_validate_checksums(
443446 # * (cached) url_path
444447 # * `tmp_dir/file` (downloaded path)
445448
446- if computed_url_info :
447- # Used both in `.downloaded_size` and `_record_url_infos()`
448- self ._recorded_url_infos [url ] = computed_url_info
449+ url : str = resource . url # pytype: disable=annotation-type-mismatch
450+ # Used both in `.downloaded_size` and `_record_url_infos()`
451+ self ._recorded_url_infos [url ] = computed_url_info
449452
450453 if self ._register_checksums :
451- if not computed_url_info :
452- raise ValueError (
453- f'Cannot register checksums for { url } : no computed checksum. '
454- '--register_checksums with manually downloaded data not supported.'
455- )
456454 # Note:
457455 # * We save even if `expected_url_info == computed_url_info` as
458456 # `expected_url_info` might have been loaded from another dataset.
@@ -463,7 +461,7 @@ def _register_or_validate_checksums(
463461 # Checksum path should now match the new registered checksum (even if
464462 # checksums were previously registered)
465463 expected_url_info = computed_url_info
466- checksum_path = self ._get_dl_path (url , computed_url_info .checksum )
464+ checksum_path = self ._get_dl_path (resource , computed_url_info .checksum )
467465 else :
468466 # Eventually validate checksums
469467 # Note:
@@ -476,9 +474,9 @@ def _register_or_validate_checksums(
476474 # was corrupted. Note: The tmp file isn't deleted to allow inspection.
477475 self ._validate_checksums (
478476 url = url ,
479- path = path ,
480477 expected_url_info = expected_url_info ,
481478 computed_url_info = computed_url_info ,
479+ path = path ,
482480 )
483481
484482 return self ._rename_and_get_final_dl_path (
@@ -493,17 +491,14 @@ def _register_or_validate_checksums(
493491 def _validate_checksums (
494492 self ,
495493 url : str ,
496- path : epath .Path ,
497- computed_url_info : checksums .UrlInfo | None ,
498494 expected_url_info : checksums .UrlInfo | None ,
495+ computed_url_info : checksums .UrlInfo ,
496+ path : epath .Path ,
499497 ) -> None :
500498 """Validate computed_url_info match expected_url_info."""
501499 # If force-checksums validations, both expected and computed url_info
502500 # should exists
503501 if self ._force_checksums_validation :
504- # Checksum of the downloaded file unknown (for manually downloaded file)
505- if not computed_url_info :
506- computed_url_info = checksums .compute_url_info (path )
507502 # Checksums have not been registered
508503 if not expected_url_info :
509504 raise ValueError (
@@ -512,11 +507,7 @@ def _validate_checksums(
512507 'Did you forget to register checksums?'
513508 )
514509
515- if (
516- expected_url_info
517- and computed_url_info
518- and expected_url_info != computed_url_info
519- ):
510+ if expected_url_info and expected_url_info != computed_url_info :
520511 msg = (
521512 f'Artifact { url } , downloaded to { path } , has wrong checksum:\n '
522513 f'* Expected: { expected_url_info } \n '
0 commit comments