Skip to content

Commit ae7d12e

Browse files
authored
Merge pull request #223 from dnlbauer/fix_missing_empty_file
Fix missing empty file
2 parents 9c0d8a0 + 0fca16a commit ae7d12e

File tree

5 files changed

+45
-0
lines changed

5 files changed

+45
-0
lines changed

rocrate/model/dataset.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,10 +105,16 @@ def _stream_folder_from_path(self, chunk_size=8192):
105105
for name in files:
106106
source = root / name
107107
dest = source.relative_to(Path(path).parent)
108+
is_empty = True
108109
with open(source, 'rb') as f:
109110
while chunk := f.read(chunk_size):
111+
is_empty = False
110112
yield str(dest), chunk
111113

114+
# yield once for an empty file
115+
if is_empty:
116+
yield str(dest), b""
117+
112118
def _stream_folder_from_url(self, chunk_size=8192):
113119
if not self.fetch_remote:
114120
if self.validate_url:
@@ -124,8 +130,14 @@ def _stream_folder_from_url(self, chunk_size=8192):
124130
part_uri = f"{base}/{part}"
125131
rel_out_path = Path(self.id) / part
126132

133+
is_empty = True
127134
with urlopen(part_uri) as response:
128135
while chunk := response.read(chunk_size):
136+
is_empty = False
129137
yield str(rel_out_path), chunk
138+
139+
# yield once for an empty file
140+
if is_empty:
141+
yield str(rel_out_path), b""
130142
except KeyError:
131143
warnings.warn(f"'hasPart' entry in {self.id} is missing '@id'. Skipping.")

rocrate/model/file.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,10 @@ def _stream_from_url(self, url, chunk_size=8192):
116116
yield self.id, chunk
117117
size += len(chunk)
118118

119+
# yield once for an empty file
120+
if size == 0:
121+
yield self.id, b""
122+
119123
if self.record_size:
120124
self._jsonld['contentSize'] = str(size)
121125

@@ -126,6 +130,11 @@ def _stream_from_file(self, path, chunk_size=8192):
126130
while chunk := f.read(chunk_size):
127131
yield self.id, chunk
128132
size += len(chunk)
133+
134+
# yield once for an empty file
135+
if size == 0:
136+
yield self.id, b""
137+
129138
if self.record_size:
130139
self._jsonld['contentSize'] = str(size)
131140

test/test-data/empty_file_crate/empty.txt

Whitespace-only changes.

test/test-data/empty_file_crate/folder/empty_not_listed.txt

Whitespace-only changes.

test/test_write.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -524,3 +524,27 @@ def test_percent_escape(test_data_dir, tmpdir, helpers):
524524
assert "subdir/a%20b/" in json_entities
525525
assert (out_path / "a b" / "c d.txt").is_file()
526526
assert (out_path / "subdir" / "a b" / "c d.txt").is_file()
527+
528+
529+
def test_stream_empty_file(test_data_dir, tmpdir):
530+
"""
531+
Test that empty files are written correctly to the zip file.
532+
"""
533+
crate = ROCrate()
534+
crate_dir = test_data_dir / "empty_file_crate"
535+
crate.add_file(crate_dir / "empty.txt")
536+
crate.add_directory(crate_dir / "folder")
537+
538+
# write the crate to a zip file
539+
out_path = tmpdir / 'ro_crate_out.zip'
540+
crate.write_zip(out_path)
541+
542+
# Check that the zip file contains empty files
543+
assert out_path.is_file()
544+
files_in_zip = {}
545+
with zipfile.ZipFile(out_path, "r") as zf:
546+
for info in zf.infolist():
547+
files_in_zip[info.filename] = info.file_size
548+
549+
assert files_in_zip["empty.txt"] == 0
550+
assert files_in_zip["folder/empty_not_listed.txt"] == 0

0 commit comments

Comments
 (0)