@@ -315,8 +315,12 @@ def downloaded_size(self) -> int:
315
315
"""Returns the total size of downloaded files."""
316
316
return sum (url_info .size for url_info in self ._recorded_url_infos .values ())
317
317
318
- def _get_dl_path (self , url : str , checksum : str | None = None ) -> epath .Path :
319
- return self ._download_dir / resource_lib .get_dl_fname (url , checksum )
318
+ def _get_dl_path (
319
+ self , resource : resource_lib .Resource , checksum : str | None = None
320
+ ) -> epath .Path :
321
+ return self ._download_dir / resource_lib .get_dl_fname (
322
+ resource .url , checksum
323
+ )
320
324
321
325
@property
322
326
def register_checksums (self ):
@@ -352,7 +356,7 @@ def _get_manually_downloaded_path(
352
356
@utils .build_synchronize_decorator ()
353
357
@utils .memoize ()
354
358
def _download (self , resource : Url ) -> promise .Promise [epath .Path ]:
355
- """Download resource, returns Promise->path to downloaded file .
359
+ """Downloads resource or gets downloaded cache .
356
360
357
361
This function:
358
362
@@ -364,13 +368,12 @@ def _download(self, resource: Url) -> promise.Promise[epath.Path]:
364
368
resource: The URL to download.
365
369
366
370
Returns:
367
- path: The path to the downloaded resource.
371
+ Promise of the path to the downloaded resource.
368
372
"""
369
373
# Normalize the input
370
- if isinstance (resource , str ):
371
- url = resource
372
- else :
373
- url = resource .url
374
+ if not isinstance (resource , resource_lib .Resource ):
375
+ resource = resource_lib .Resource (url = resource )
376
+ url = resource .url
374
377
assert url is not None , 'URL is undefined from resource.'
375
378
376
379
expected_url_info = self ._url_infos .get (url )
@@ -382,9 +385,9 @@ def _download(self, resource: Url) -> promise.Promise[epath.Path]:
382
385
manually_downloaded_path = self ._get_manually_downloaded_path (
383
386
expected_url_info = expected_url_info
384
387
)
385
- url_path = self ._get_dl_path (url )
388
+ url_path = self ._get_dl_path (resource )
386
389
checksum_path = (
387
- self ._get_dl_path (url , expected_url_info .checksum )
390
+ self ._get_dl_path (resource , expected_url_info .checksum )
388
391
if expected_url_info
389
392
else None
390
393
)
@@ -396,12 +399,12 @@ def _download(self, resource: Url) -> promise.Promise[epath.Path]:
396
399
url_path = url_path ,
397
400
expected_url_info = expected_url_info ,
398
401
)
399
- if dl_result . path and not self ._force_download : # Download was cached
402
+ if dl_result and not self ._force_download : # Download was cached
400
403
logging .info (
401
404
f'Skipping download of { url } : File cached in { dl_result .path } '
402
405
)
403
406
# Still update the progression bar to indicate the file was downloaded
404
- self ._downloader .increase_tqdm (dl_result )
407
+ self ._downloader .increase_tqdm (dl_result . url_info )
405
408
future = promise .Promise .resolve (dl_result )
406
409
else :
407
410
# Download in a tmp directory next to url_path (to avoid name collisions)
@@ -418,7 +421,7 @@ def _download(self, resource: Url) -> promise.Promise[epath.Path]:
418
421
# Post-process the result
419
422
return future .then (
420
423
lambda dl_result : self ._register_or_validate_checksums ( # pylint: disable=g-long-lambda
421
- url = url ,
424
+ resource = resource ,
422
425
path = dl_result .path ,
423
426
computed_url_info = dl_result .url_info ,
424
427
expected_url_info = expected_url_info ,
@@ -429,10 +432,10 @@ def _download(self, resource: Url) -> promise.Promise[epath.Path]:
429
432
430
433
def _register_or_validate_checksums (
431
434
self ,
435
+ resource : resource_lib .Resource ,
432
436
path : epath .Path ,
433
- url : str ,
434
437
expected_url_info : checksums .UrlInfo | None ,
435
- computed_url_info : checksums .UrlInfo | None ,
438
+ computed_url_info : checksums .UrlInfo ,
436
439
checksum_path : epath .Path | None ,
437
440
url_path : epath .Path ,
438
441
) -> epath .Path :
@@ -443,16 +446,11 @@ def _register_or_validate_checksums(
443
446
# * (cached) url_path
444
447
# * `tmp_dir/file` (downloaded path)
445
448
446
- if computed_url_info :
447
- # Used both in `.downloaded_size` and `_record_url_infos()`
448
- self ._recorded_url_infos [url ] = computed_url_info
449
+ url : str = resource . url # pytype: disable=annotation-type-mismatch
450
+ # Used both in `.downloaded_size` and `_record_url_infos()`
451
+ self ._recorded_url_infos [url ] = computed_url_info
449
452
450
453
if self ._register_checksums :
451
- if not computed_url_info :
452
- raise ValueError (
453
- f'Cannot register checksums for { url } : no computed checksum. '
454
- '--register_checksums with manually downloaded data not supported.'
455
- )
456
454
# Note:
457
455
# * We save even if `expected_url_info == computed_url_info` as
458
456
# `expected_url_info` might have been loaded from another dataset.
@@ -463,7 +461,7 @@ def _register_or_validate_checksums(
463
461
# Checksum path should now match the new registered checksum (even if
464
462
# checksums were previously registered)
465
463
expected_url_info = computed_url_info
466
- checksum_path = self ._get_dl_path (url , computed_url_info .checksum )
464
+ checksum_path = self ._get_dl_path (resource , computed_url_info .checksum )
467
465
else :
468
466
# Eventually validate checksums
469
467
# Note:
@@ -476,9 +474,9 @@ def _register_or_validate_checksums(
476
474
# was corrupted. Note: The tmp file isn't deleted to allow inspection.
477
475
self ._validate_checksums (
478
476
url = url ,
479
- path = path ,
480
477
expected_url_info = expected_url_info ,
481
478
computed_url_info = computed_url_info ,
479
+ path = path ,
482
480
)
483
481
484
482
return self ._rename_and_get_final_dl_path (
@@ -493,17 +491,14 @@ def _register_or_validate_checksums(
493
491
def _validate_checksums (
494
492
self ,
495
493
url : str ,
496
- path : epath .Path ,
497
- computed_url_info : checksums .UrlInfo | None ,
498
494
expected_url_info : checksums .UrlInfo | None ,
495
+ computed_url_info : checksums .UrlInfo ,
496
+ path : epath .Path ,
499
497
) -> None :
500
498
"""Validate computed_url_info match expected_url_info."""
501
499
# If force-checksums validations, both expected and computed url_info
502
500
# should exists
503
501
if self ._force_checksums_validation :
504
- # Checksum of the downloaded file unknown (for manually downloaded file)
505
- if not computed_url_info :
506
- computed_url_info = checksums .compute_url_info (path )
507
502
# Checksums have not been registered
508
503
if not expected_url_info :
509
504
raise ValueError (
@@ -512,11 +507,7 @@ def _validate_checksums(
512
507
'Did you forget to register checksums?'
513
508
)
514
509
515
- if (
516
- expected_url_info
517
- and computed_url_info
518
- and expected_url_info != computed_url_info
519
- ):
510
+ if expected_url_info and expected_url_info != computed_url_info :
520
511
msg = (
521
512
f'Artifact { url } , downloaded to { path } , has wrong checksum:\n '
522
513
f'* Expected: { expected_url_info } \n '
0 commit comments