Skip to content

Commit 0fca16a

Browse files
committed
feat: handle empty file streaming by yielding empty bytes
#fixes 222
1 parent 2f9e24d commit 0fca16a

File tree

3 files changed

+24
-2
lines changed

3 files changed

+24
-2
lines changed

rocrate/model/dataset.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,10 +105,16 @@ def _stream_folder_from_path(self, chunk_size=8192):
105105
for name in files:
106106
source = root / name
107107
dest = source.relative_to(Path(path).parent)
108+
is_empty = True
108109
with open(source, 'rb') as f:
109110
while chunk := f.read(chunk_size):
111+
is_empty = False
110112
yield str(dest), chunk
111113

114+
# yield once for an empty file
115+
if is_empty:
116+
yield str(dest), b""
117+
112118
def _stream_folder_from_url(self, chunk_size=8192):
113119
if not self.fetch_remote:
114120
if self.validate_url:
@@ -124,8 +130,14 @@ def _stream_folder_from_url(self, chunk_size=8192):
124130
part_uri = f"{base}/{part}"
125131
rel_out_path = Path(self.id) / part
126132

133+
is_empty = True
127134
with urlopen(part_uri) as response:
128135
while chunk := response.read(chunk_size):
136+
is_empty = False
129137
yield str(rel_out_path), chunk
138+
139+
# yield once for an empty file
140+
if is_empty:
141+
yield str(rel_out_path), b""
130142
except KeyError:
131143
warnings.warn(f"'hasPart' entry in {self.id} is missing '@id'. Skipping.")

rocrate/model/file.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,10 @@ def _stream_from_url(self, url, chunk_size=8192):
116116
yield self.id, chunk
117117
size += len(chunk)
118118

119+
# yield once for an empty file
120+
if size == 0:
121+
yield self.id, b""
122+
119123
if self.record_size:
120124
self._jsonld['contentSize'] = str(size)
121125

@@ -126,6 +130,11 @@ def _stream_from_file(self, path, chunk_size=8192):
126130
while chunk := f.read(chunk_size):
127131
yield self.id, chunk
128132
size += len(chunk)
133+
134+
# yield once for an empty file
135+
if size == 0:
136+
yield self.id, b""
137+
129138
if self.record_size:
130139
self._jsonld['contentSize'] = str(size)
131140

test/test_write.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,7 @@ def test_percent_escape(test_data_dir, tmpdir, helpers):
525525
assert (out_path / "a b" / "c d.txt").is_file()
526526
assert (out_path / "subdir" / "a b" / "c d.txt").is_file()
527527

528+
528529
def test_stream_empty_file(test_data_dir, tmpdir):
529530
"""
530531
Test that empty files are written correctly to the zip file.
@@ -534,7 +535,7 @@ def test_stream_empty_file(test_data_dir, tmpdir):
534535
crate.add_file(crate_dir / "empty.txt")
535536
crate.add_directory(crate_dir / "folder")
536537

537-
# write the crate to a zip file
538+
# write the crate to a zip file
538539
out_path = tmpdir / 'ro_crate_out.zip'
539540
crate.write_zip(out_path)
540541

@@ -546,4 +547,4 @@ def test_stream_empty_file(test_data_dir, tmpdir):
546547
files_in_zip[info.filename] = info.file_size
547548

548549
assert files_in_zip["empty.txt"] == 0
549-
assert files_in_zip["folder/empty_not_listed.txt"] == 0
550+
assert files_in_zip["folder/empty_not_listed.txt"] == 0

0 commit comments

Comments
 (0)