Skip to content

Commit d12b626

Browse files
authored
Merge pull request #227 from simleo/fix_unescaping
Fix percent unescaping
2 parents c9ccbd8 + 44eb16c commit d12b626

File tree

5 files changed

+72
-9
lines changed

5 files changed

+72
-9
lines changed

rocrate/model/dataset.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,10 @@ def _copy_folder(self, base_path):
6969
if self.source is None:
7070
abs_out_path.mkdir(parents=True, exist_ok=True)
7171
else:
72-
path = unquote(str(self.source))
72+
if self.crate.mode == Mode.READ:
73+
path = unquote(str(self.source))
74+
else:
75+
path = self.source
7376
if not Path(path).exists():
7477
raise FileNotFoundError(
7578
errno.ENOENT, os.strerror(errno.ENOENT), path
@@ -94,7 +97,10 @@ def stream(self, chunk_size=8192):
9497
yield from self._stream_folder_from_path(chunk_size)
9598

9699
def _stream_folder_from_path(self, chunk_size=8192):
97-
path = unquote(str(self.source))
100+
if self.crate.mode == Mode.READ:
101+
path = unquote(str(self.source))
102+
else:
103+
path = self.source
98104
if not Path(path).exists():
99105
raise FileNotFoundError(
100106
errno.ENOENT, os.strerror(errno.ENOENT), str(path)
@@ -104,7 +110,7 @@ def _stream_folder_from_path(self, chunk_size=8192):
104110
root = Path(root)
105111
for name in files:
106112
source = root / name
107-
dest = source.relative_to(Path(path).parent)
113+
dest = Path(unquote(self.id)) / source.relative_to(path)
108114
is_empty = True
109115
with open(source, 'rb') as f:
110116
while chunk := f.read(chunk_size):

rocrate/model/file.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
from urllib.parse import unquote
3232

3333
from .file_or_dir import FileOrDir
34-
from ..utils import is_url, iso_now
34+
from ..utils import is_url, iso_now, Mode
3535

3636

3737
class File(FileOrDir):
@@ -64,7 +64,6 @@ def _write_from_stream(self, out_file_path):
6464
out_file.write(chunk)
6565

6666
def _copy_file(self, path, out_file_path):
67-
path = unquote(str(path))
6867
out_file_path.parent.mkdir(parents=True, exist_ok=True)
6968
if not out_file_path.exists() or not out_file_path.samefile(path):
7069
shutil.copy(path, out_file_path)
@@ -79,7 +78,11 @@ def write(self, base_path):
7978
# Allows to record a File entity whose @id does not exist, see #73
8079
warnings.warn(f"No source for {self.id}")
8180
else:
82-
self._copy_file(self.source, out_file_path)
81+
if self.crate.mode == Mode.READ:
82+
in_file_path = unquote(str(self.source))
83+
else:
84+
in_file_path = self.source
85+
self._copy_file(in_file_path, out_file_path)
8386

8487
def _stream_from_stream(self, stream):
8588
size = 0
@@ -124,11 +127,10 @@ def _stream_from_url(self, url, chunk_size=8192):
124127
self._jsonld['contentSize'] = str(size)
125128

126129
def _stream_from_file(self, path, chunk_size=8192):
127-
path = unquote(str(path))
128130
size = 0
129131
with open(path, 'rb') as f:
130132
while chunk := f.read(chunk_size):
131-
yield self.id, chunk
133+
yield unquote(self.id), chunk
132134
size += len(chunk)
133135

134136
# yield once for an empty file
@@ -147,4 +149,8 @@ def stream(self, chunk_size=8192):
147149
# Allows to record a File entity whose @id does not exist, see #73
148150
warnings.warn(f"No source for {self.id}")
149151
else:
150-
yield from self._stream_from_file(self.source, chunk_size)
152+
if self.crate.mode == Mode.READ:
153+
path = unquote(str(self.source))
154+
else:
155+
path = self.source
156+
yield from self._stream_from_file(path, chunk_size)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
L%20M
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
The name of this file should NOT be unescaped

test/test_write.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -510,20 +510,48 @@ def test_percent_escape(test_data_dir, tmpdir, helpers):
510510
assert f1.id == "with%20space.txt"
511511
f2 = crate.add_file(f_path, dest_path="subdir/with space.txt")
512512
assert f2.id == "subdir/with%20space.txt"
513+
f3 = crate.add_file(test_data_dir / "read_crate" / "without%20space.txt")
514+
assert f3.id == "without%2520space.txt"
513515
d_path = test_data_dir / "read_crate" / "a b"
514516
d1 = crate.add_dataset(d_path)
515517
assert d1.id == "a%20b/"
516518
d2 = crate.add_dataset(d_path, dest_path="subdir/a b")
517519
assert d2.id == "subdir/a%20b/"
520+
d3 = crate.add_dataset(test_data_dir / "read_crate" / "j%20k")
521+
assert d3.id == "j%2520k/"
518522
out_path = tmpdir / "ro_crate_out"
519523
crate.write(out_path)
520524
json_entities = helpers.read_json_entities(out_path)
521525
assert "with%20space.txt" in json_entities
522526
assert "subdir/with%20space.txt" in json_entities
527+
assert "without%2520space.txt" in json_entities
523528
assert "a%20b/" in json_entities
524529
assert "subdir/a%20b/" in json_entities
530+
assert "j%2520k/" in json_entities
531+
assert (out_path / "with space.txt").is_file()
532+
assert (out_path / "subdir" / "with space.txt").is_file()
533+
assert (out_path / "without%20space.txt").is_file()
525534
assert (out_path / "a b" / "c d.txt").is_file()
526535
assert (out_path / "subdir" / "a b" / "c d.txt").is_file()
536+
assert (out_path / "j%20k" / "l%20m.txt").is_file()
537+
out_zip_path = tmpdir / "ro_crate_out.zip"
538+
crate.write_zip(out_zip_path)
539+
unpack_path = tmpdir / "unpack"
540+
with zipfile.ZipFile(out_zip_path, "r") as zf:
541+
zf.extractall(unpack_path)
542+
json_entities = helpers.read_json_entities(unpack_path)
543+
assert "with%20space.txt" in json_entities
544+
assert "subdir/with%20space.txt" in json_entities
545+
assert "without%2520space.txt" in json_entities
546+
assert "a%20b/" in json_entities
547+
assert "subdir/a%20b/" in json_entities
548+
assert "j%2520k/" in json_entities
549+
assert (unpack_path / "with space.txt").is_file()
550+
assert (unpack_path / "subdir" / "with space.txt").is_file()
551+
assert (unpack_path / "without%20space.txt").is_file()
552+
assert (unpack_path / "a b" / "c d.txt").is_file()
553+
assert (unpack_path / "subdir" / "a b" / "c d.txt").is_file()
554+
assert (unpack_path / "j%20k" / "l%20m.txt").is_file()
527555

528556

529557
def test_stream_empty_file(test_data_dir, tmpdir):
@@ -548,3 +576,24 @@ def test_stream_empty_file(test_data_dir, tmpdir):
548576

549577
assert files_in_zip["empty.txt"] == 0
550578
assert files_in_zip["folder/empty_not_listed.txt"] == 0
579+
580+
581+
def test_write_zip_nested_dest(tmpdir, helpers):
582+
root = tmpdir / "root"
583+
root.mkdir()
584+
(root / "a b").mkdir()
585+
(root / "a b" / "c d.txt").write_text("C D\n")
586+
(root / "a b" / "j k").mkdir()
587+
(root / "a b" / "j k" / "l m.txt").write_text("L M\n")
588+
crate = ROCrate()
589+
d1 = crate.add_dataset(root / "a b", dest_path="subdir/a b")
590+
assert d1.id == "subdir/a%20b/"
591+
out_zip_path = tmpdir / "ro_crate_out.zip"
592+
crate.write_zip(out_zip_path)
593+
unpack_path = tmpdir / "unpack"
594+
with zipfile.ZipFile(out_zip_path, "r") as zf:
595+
zf.extractall(unpack_path)
596+
json_entities = helpers.read_json_entities(unpack_path)
597+
assert "subdir/a%20b/" in json_entities
598+
assert (unpack_path / "subdir" / "a b" / "c d.txt").is_file()
599+
assert (unpack_path / "subdir" / "a b" / "j k" / "l m.txt").is_file()

0 commit comments

Comments
 (0)