Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions dvuploader/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ class File(BaseModel):

Private Attributes:
_size (int): Size of the file in bytes.
_is_inside_zip (bool): Indicates if the file is packaged inside a zip archive.

Methods:
extract_file_name(): Extracts filename from filepath and initializes file handler.
Expand Down Expand Up @@ -57,6 +58,7 @@ class File(BaseModel):
tab_ingest: bool = Field(default=True, alias="tabIngest")

_size: int = PrivateAttr(default=0)
_is_inside_zip: bool = PrivateAttr(default=False)

def extract_file_name(self):
"""
Expand Down
4 changes: 2 additions & 2 deletions dvuploader/nativeupload.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,8 +345,8 @@ async def _update_metadata(
try:
if _tab_extension(dv_path) in file_mapping:
file_id = file_mapping[_tab_extension(dv_path)]
elif file.file_name and _is_zip(file.file_name):
# When the file is a zip it will be unpacked and thus
elif file.file_name and _is_zip(file.file_name) and not file._is_inside_zip:
# When the file is a zip package it will be unpacked and thus
# the expected file name of the zip will not be in the
# dataset, since it has been unpacked.
continue
Expand Down
1 change: 1 addition & 0 deletions dvuploader/packaging.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def zip_files(
data=file.handler.read(), # type: ignore
zinfo_or_arcname=_create_arcname(file),
)
file._is_inside_zip = True

return path

Expand Down
71 changes: 71 additions & 0 deletions tests/integration/test_native_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ def test_native_upload_with_proxy(
assert len(files) == 3
assert sorted([file["label"] for file in files]) == sorted(expected_files)

@pytest.mark.xfail(reason="See discussion in #34")
def test_native_upload_by_handler(
self,
credentials,
Expand Down Expand Up @@ -464,6 +465,76 @@ def test_zipzip_file_upload(

assert sorted([file["label"] for file in files]) == sorted(expected_files)

def test_metadata_with_zip_files_in_package(self, credentials):
BASE_URL, API_TOKEN = credentials

# Create Dataset
pid = create_dataset(
parent="Root",
server_url=BASE_URL,
api_token=API_TOKEN,
)

# Arrange
files = [
File(filepath="tests/fixtures/archive.zip",
dv_dir="subdir2",
description="This file should not be unzipped",
categories=["Test file"]
),
File(filepath="tests/fixtures/add_dir_files/somefile.txt",
dv_dir="subdir",
description="A simple text file",
categories=["Test file"]
),
]

# Act
uploader = DVUploader(files=files)
uploader.upload(
persistent_id=pid,
api_token=API_TOKEN,
dataverse_url=BASE_URL,
n_parallel_uploads=10,
)

# Assert
files = retrieve_dataset_files(
dataverse_url=BASE_URL,
persistent_id=pid,
api_token=API_TOKEN,
)

assert len(files) == 2, f"Expected 2 files, got {len(files)}"

expected_files = [
{
"label": "archive.zip",
"description": "This file should not be unzipped",
"categories": ["Test file"]
},
{
"label": "somefile.txt",
"description": "A simple text file",
"categories": ["Test file"]
},
]

files_as_expected = sorted(
[
{
k: (f[k] if k in f else None)
for k in expected_files[0].keys()
}
for f in files
],
key=lambda x: x["label"]
)
assert files_as_expected == expected_files, (
f"File metadata not as expected: {json.dumps(files, indent=2)}"
)


def test_too_many_zip_files(
self,
credentials,
Expand Down