Skip to content

Commit 1a94315

Browse files
authored
Merge pull request #1645 from dandi/bf-1594
BF: check for upload to not have our .dandidownload suffixes
2 parents 49d4886 + 28c69d7 commit 1a94315

File tree

4 files changed

+127
-3
lines changed

4 files changed

+127
-3
lines changed

dandi/consts.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,3 +230,6 @@ def urls(self) -> Iterator[str]:
230230
REQUEST_RETRIES = 12
231231

232232
DOWNLOAD_TIMEOUT = 30
233+
234+
#: Suffix used for temporary download directories
235+
DOWNLOAD_SUFFIX = ".dandidownload"

dandi/download.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
import requests
2929

3030
from . import get_logger
31-
from .consts import RETRY_STATUSES, dandiset_metadata_file
31+
from .consts import DOWNLOAD_SUFFIX, RETRY_STATUSES, dandiset_metadata_file
3232
from .dandiapi import AssetType, BaseRemoteZarrAsset, RemoteDandiset
3333
from .dandiarchive import (
3434
AssetItemURL,
@@ -863,7 +863,7 @@ def __init__(self, filepath: str | Path, digests: dict[str, str]) -> None:
863863
self.digests = digests
864864
#: The working directory in which downloaded data will be temporarily
865865
#: stored
866-
self.dirpath = self.filepath.with_name(self.filepath.name + ".dandidownload")
866+
self.dirpath = self.filepath.with_name(self.filepath.name + DOWNLOAD_SUFFIX)
867867
#: The file in `dirpath` to which data will be written as it is
868868
#: received
869869
self.writefile = self.dirpath / "file"

dandi/tests/test_upload.py

Lines changed: 94 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
from collections import defaultdict
4+
from datetime import datetime, timezone
45
import os
56
from pathlib import Path
67
from shutil import copyfile, rmtree
@@ -19,7 +20,12 @@
1920

2021
from .fixtures import SampleDandiset, sweep_embargo
2122
from .test_helpers import assert_dirtrees_eq
22-
from ..consts import ZARR_MIME_TYPE, EmbargoStatus, dandiset_metadata_file
23+
from ..consts import (
24+
DOWNLOAD_SUFFIX,
25+
ZARR_MIME_TYPE,
26+
EmbargoStatus,
27+
dandiset_metadata_file,
28+
)
2329
from ..dandiapi import AssetType, RemoteBlobAsset, RemoteZarrAsset, RESTFullAPIClient
2430
from ..dandiset import Dandiset
2531
from ..download import download
@@ -614,3 +620,90 @@ def mock_request(self, method, path, **kwargs):
614620
assert (
615621
request_attempts[url] == 1
616622
), f"URL {url} should not have been retried but had {request_attempts[url]} attempts"
623+
624+
625+
@pytest.mark.ai_generated
626+
def test_upload_rejects_dandidownload_paths(
627+
new_dandiset: SampleDandiset, tmp_path: Path
628+
) -> None:
629+
"""Test that upload rejects assets with .dandidownload paths"""
630+
dspath = new_dandiset.dspath
631+
632+
# Test 1: Regular file with .dandidownload in path
633+
badfile_path = dspath / f"test{DOWNLOAD_SUFFIX}" / "file.nwb"
634+
badfile_path.parent.mkdir(parents=True)
635+
make_nwb_file(
636+
badfile_path,
637+
session_description="test session",
638+
identifier="test123",
639+
session_start_time=datetime(2017, 4, 15, 12, tzinfo=timezone.utc),
640+
subject=pynwb.file.Subject(subject_id="test"),
641+
)
642+
643+
with pytest.raises(
644+
UploadError,
645+
match=f"contains {DOWNLOAD_SUFFIX} path which indicates incomplete download",
646+
):
647+
new_dandiset.upload(allow_any_path=True)
648+
649+
# Clean up for next test
650+
rmtree(badfile_path.parent)
651+
652+
# Test 2: Zarr asset with .dandidownload in internal path
653+
zarr_path = dspath / "test.zarr"
654+
zarr.save(zarr_path, np.arange(100))
655+
656+
# Create a .dandidownload directory inside the zarr
657+
bad_zarr_path = zarr_path / f"sub{DOWNLOAD_SUFFIX}"
658+
bad_zarr_path.mkdir()
659+
(bad_zarr_path / "badfile").write_text("bad data")
660+
661+
with pytest.raises(
662+
UploadError,
663+
match=f"Zarr asset contains {DOWNLOAD_SUFFIX} path which indicates incomplete download",
664+
):
665+
new_dandiset.upload()
666+
667+
# Clean up
668+
rmtree(bad_zarr_path)
669+
670+
# Test 3: Zarr asset with .dandidownload in filename
671+
bad_file_in_zarr = zarr_path / f"data{DOWNLOAD_SUFFIX}"
672+
bad_file_in_zarr.write_text("bad data")
673+
674+
with pytest.raises(
675+
UploadError,
676+
match=f"Zarr asset contains {DOWNLOAD_SUFFIX} path which indicates incomplete download",
677+
):
678+
new_dandiset.upload()
679+
680+
# Clean up
681+
bad_file_in_zarr.unlink()
682+
683+
# Test 4: Normal zarr should upload fine after removing bad paths
684+
new_dandiset.upload()
685+
(asset,) = new_dandiset.dandiset.get_assets()
686+
assert isinstance(asset, RemoteZarrAsset)
687+
assert asset.path == "test.zarr"
688+
689+
690+
@pytest.mark.ai_generated
691+
def test_upload_rejects_dandidownload_nwb_file(new_dandiset: SampleDandiset) -> None:
692+
"""Test that upload rejects NWB files with .dandidownload in their path"""
693+
dspath = new_dandiset.dspath
694+
695+
# Create an NWB file with .dandidownload in its name
696+
bad_nwb_path = dspath / f"test{DOWNLOAD_SUFFIX}.nwb"
697+
make_nwb_file(
698+
bad_nwb_path,
699+
session_description="test session",
700+
identifier="test456",
701+
session_start_time=datetime(2017, 4, 15, 12, tzinfo=timezone.utc),
702+
subject=pynwb.file.Subject(subject_id="test"),
703+
)
704+
705+
with pytest.raises(
706+
UploadError,
707+
match=f"contains {DOWNLOAD_SUFFIX} path which indicates incomplete download",
708+
):
709+
new_dandiset.upload(allow_any_path=True)

dandi/upload.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
from . import __version__, lgr
2121
from .consts import (
22+
DOWNLOAD_SUFFIX,
2223
DRAFT,
2324
DandiInstance,
2425
dandiset_identifier_regex,
@@ -41,6 +42,30 @@
4142
from .validate_types import Severity
4243

4344

45+
def _check_dandidownload_paths(dfile: DandiFile) -> None:
46+
"""
47+
Check if an asset contains .dandidownload paths and raise UploadError if found.
48+
49+
.dandidownload paths indicate incomplete or interrupted downloads and should not
50+
be uploaded as they represent "garbage" data in the file structure.
51+
"""
52+
# Check the main file path
53+
if DOWNLOAD_SUFFIX in str(dfile.filepath):
54+
raise UploadError(
55+
f"Asset contains {DOWNLOAD_SUFFIX} path which indicates incomplete "
56+
f"download: {dfile.filepath}"
57+
)
58+
59+
# For Zarr assets, check all internal paths
60+
if isinstance(dfile, ZarrAsset):
61+
for entry in dfile.iterfiles():
62+
if DOWNLOAD_SUFFIX in str(entry):
63+
raise UploadError(
64+
f"Zarr asset contains {DOWNLOAD_SUFFIX} path which indicates "
65+
f"incomplete download: {entry}"
66+
)
67+
68+
4469
class Uploaded(TypedDict):
4570
size: int
4671
errors: list[str]
@@ -252,6 +277,9 @@ def process_path(dfile: DandiFile) -> Iterator[dict]:
252277
# without limiting [:50] it might cause some pyout indigestion
253278
raise UploadError(str(exc)[:50])
254279

280+
# Check for .dandidownload paths that indicate incomplete downloads
281+
_check_dandidownload_paths(dfile)
282+
255283
#
256284
# Validate first, so we do not bother server at all if not kosher
257285
#

0 commit comments

Comments
 (0)