Skip to content

Commit 016ff40

Browse files
authored
Merge pull request #242 from simleo/fix_file_dir_source
Unquote file or dir source before creating the entity
2 parents 94cf716 + 375edcb commit 016ff40

File tree

4 files changed

+36
-23
lines changed

4 files changed

+36
-23
lines changed

rocrate/model/dataset.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,7 @@ def _copy_folder(self, base_path):
6969
if self.source is None:
7070
abs_out_path.mkdir(parents=True, exist_ok=True)
7171
else:
72-
if self.crate.mode == Mode.READ:
73-
path = unquote(str(self.source))
74-
else:
75-
path = self.source
72+
path = self.source
7673
if not Path(path).exists():
7774
raise FileNotFoundError(
7875
errno.ENOENT, os.strerror(errno.ENOENT), path
@@ -97,10 +94,7 @@ def stream(self, chunk_size=8192):
9794
yield from self._stream_folder_from_path(chunk_size)
9895

9996
def _stream_folder_from_path(self, chunk_size=8192):
100-
if self.crate.mode == Mode.READ:
101-
path = unquote(str(self.source))
102-
else:
103-
path = self.source
97+
path = self.source
10498
if not Path(path).exists():
10599
raise FileNotFoundError(
106100
errno.ENOENT, os.strerror(errno.ENOENT), str(path)

rocrate/model/file.py

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
from urllib.parse import unquote
3232

3333
from .file_or_dir import FileOrDir
34-
from ..utils import is_url, iso_now, Mode
34+
from ..utils import is_url, iso_now
3535

3636

3737
class File(FileOrDir):
@@ -78,11 +78,7 @@ def write(self, base_path):
7878
# Allows to record a File entity whose @id does not exist, see #73
7979
warnings.warn(f"No source for {self.id}")
8080
else:
81-
if self.crate.mode == Mode.READ:
82-
in_file_path = unquote(str(self.source))
83-
else:
84-
in_file_path = self.source
85-
self._copy_file(in_file_path, out_file_path)
81+
self._copy_file(self.source, out_file_path)
8682

8783
def _stream_from_stream(self, stream):
8884
size = 0
@@ -149,8 +145,4 @@ def stream(self, chunk_size=8192):
149145
# Allows to record a File entity whose @id does not exist, see #73
150146
warnings.warn(f"No source for {self.id}")
151147
else:
152-
if self.crate.mode == Mode.READ:
153-
path = unquote(str(self.source))
154-
else:
155-
path = self.source
156-
yield from self._stream_from_file(path, chunk_size)
148+
yield from self._stream_from_file(self.source, chunk_size)

rocrate/rocrate.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131

3232
from collections import OrderedDict
3333
from pathlib import Path
34-
from urllib.parse import urljoin
34+
from urllib.parse import urljoin, unquote
3535

3636
from packaging.version import Version
3737

@@ -199,7 +199,7 @@ def __add_parts(self, parts, entities, source):
199199
if is_url(id_):
200200
instance = cls(self, id_, properties=entity)
201201
else:
202-
instance = cls(self, source / id_, id_, properties=entity)
202+
instance = cls(self, source / unquote(id_), id_, properties=entity)
203203
self.add(instance)
204204
if instance.type == "Dataset":
205205
self.__add_parts(as_list(entity.get("hasPart", [])), entities, source)

test/test_write.py

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -509,17 +509,31 @@ def test_percent_escape(test_data_dir, tmpdir, helpers):
509509
f_path = test_data_dir / "read_crate" / "with space.txt"
510510
f1 = crate.add_file(f_path)
511511
assert f1.id == "with%20space.txt"
512+
assert f1.source.is_file()
513+
assert str(f1.source) == str(f_path)
512514
f2 = crate.add_file(f_path, dest_path="subdir/with space.txt")
513515
assert f2.id == "subdir/with%20space.txt"
514-
f3 = crate.add_file(test_data_dir / "read_crate" / "without%20space.txt")
516+
assert f2.source.is_file()
517+
assert str(f2.source) == str(f_path)
518+
f3_path = test_data_dir / "read_crate" / "without%20space.txt"
519+
f3 = crate.add_file(f3_path)
515520
assert f3.id == "without%2520space.txt"
521+
assert f3.source.is_file()
522+
assert str(f3.source) == str(f3_path)
516523
d_path = test_data_dir / "read_crate" / "a b"
517524
d1 = crate.add_dataset(d_path)
518525
assert d1.id == "a%20b/"
526+
assert d1.source.is_dir()
527+
assert str(d1.source) == str(d_path)
519528
d2 = crate.add_dataset(d_path, dest_path="subdir/a b")
520529
assert d2.id == "subdir/a%20b/"
521-
d3 = crate.add_dataset(test_data_dir / "read_crate" / "j%20k")
530+
assert d2.source.is_dir()
531+
assert str(d2.source) == str(d_path)
532+
d3_path = test_data_dir / "read_crate" / "j%20k"
533+
d3 = crate.add_dataset(d3_path)
522534
assert d3.id == "j%2520k/"
535+
assert d3.source.is_dir()
536+
assert str(d3.source) == str(d3_path)
523537
out_path = tmpdir / "ro_crate_out"
524538
crate.write(out_path)
525539
json_entities = helpers.read_json_entities(out_path)
@@ -553,6 +567,19 @@ def test_percent_escape(test_data_dir, tmpdir, helpers):
553567
assert (unpack_path / "a b" / "c d.txt").is_file()
554568
assert (unpack_path / "subdir" / "a b" / "c d.txt").is_file()
555569
assert (unpack_path / "j%20k" / "l%20m.txt").is_file()
570+
rcrate = ROCrate(out_path)
571+
rf1 = rcrate.get("with%20space.txt")
572+
assert str(rf1.source) == str(out_path / "with space.txt")
573+
rf2 = rcrate.get("subdir/with%20space.txt")
574+
assert str(rf2.source) == str(out_path / "subdir/with space.txt")
575+
rf3 = rcrate.get("without%2520space.txt")
576+
assert str(rf3.source) == str(out_path / "without%20space.txt")
577+
df1 = rcrate.get("a%20b/")
578+
assert str(df1.source) == str(out_path / "a b/")
579+
df2 = rcrate.get("subdir/a%20b/")
580+
assert str(df2.source) == str(out_path / "subdir/a b/")
581+
df3 = rcrate.get("j%2520k/")
582+
assert str(df3.source) == str(out_path / "j%20k/")
556583

557584

558585
def test_stream_empty_file(test_data_dir, tmpdir):

0 commit comments

Comments
 (0)