Skip to content

Commit c9ccbd8

Browse files
authored
Merge pull request #225 from simleo/fix_init_percent_escape
Fix percent escaping in crate init
2 parents ae7d12e + 2a62fb1 commit c9ccbd8

File tree

5 files changed

+39
-8
lines changed

5 files changed

+39
-8
lines changed

rocrate/model/dataset.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
from urllib.parse import unquote
3131

3232
from .file_or_dir import FileOrDir
33-
from ..utils import is_url, iso_now
33+
from ..utils import is_url, iso_now, Mode
3434

3535

3636
class Dataset(FileOrDir):
@@ -75,7 +75,7 @@ def _copy_folder(self, base_path):
7575
errno.ENOENT, os.strerror(errno.ENOENT), path
7676
)
7777
abs_out_path.mkdir(parents=True, exist_ok=True)
78-
if not self.crate.source:
78+
if self.crate.mode == Mode.CREATE:
7979
self.crate._copy_unlisted(path, abs_out_path)
8080

8181
def write(self, base_path):
@@ -99,7 +99,7 @@ def _stream_folder_from_path(self, chunk_size=8192):
9999
raise FileNotFoundError(
100100
errno.ENOENT, os.strerror(errno.ENOENT), str(path)
101101
)
102-
if not self.crate.source:
102+
if self.crate.mode == Mode.CREATE:
103103
for root, _, files in os.walk(path):
104104
root = Path(root)
105105
for name in files:

rocrate/model/file_or_dir.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
from urllib.parse import quote
2828

2929
from .data_entity import DataEntity
30-
from ..utils import is_url
30+
from ..utils import is_url, Mode
3131

3232

3333
class FileOrDir(DataEntity):
@@ -45,7 +45,7 @@ def __init__(self, crate, source=None, dest_path=None, fetch_remote=False,
4545
if dest_path.is_absolute():
4646
raise ValueError("if provided, dest_path must be relative")
4747
identifier = dest_path.as_posix()
48-
if not crate.source:
48+
if not crate.mode == Mode.READ:
4949
identifier = quote(identifier)
5050
else:
5151
if not isinstance(source, (str, Path)):
@@ -54,6 +54,6 @@ def __init__(self, crate, source=None, dest_path=None, fetch_remote=False,
5454
identifier = os.path.basename(source) if fetch_remote else source
5555
else:
5656
identifier = os.path.basename(str(source).rstrip("/"))
57-
if not crate.source:
57+
if not crate.mode == Mode.READ:
5858
identifier = quote(identifier)
5959
super().__init__(crate, identifier, properties)

rocrate/rocrate.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060
from .model.testservice import get_service
6161
from .model.softwareapplication import get_app
6262

63-
from .utils import is_url, subclasses, get_norm_value, walk, as_list
63+
from .utils import is_url, subclasses, get_norm_value, walk, as_list, Mode
6464
from .metadata import read_metadata, find_root_entity_id
6565

6666

@@ -79,6 +79,7 @@ def pick_type(json_entity, type_map, fallback=None):
7979
class ROCrate():
8080

8181
def __init__(self, source=None, gen_preview=False, init=False, exclude=None):
82+
self.mode = None
8283
self.source = source
8384
self.exclude = exclude
8485
self.__entity_map = {}
@@ -90,13 +91,15 @@ def __init__(self, source=None, gen_preview=False, init=False, exclude=None):
9091
if gen_preview:
9192
self.add(Preview(self))
9293
if not source:
93-
# create a new ro-crate
94+
self.mode = Mode.CREATE
9495
self.add(RootDataset(self), Metadata(self))
9596
elif init:
97+
self.mode = Mode.INIT
9698
if isinstance(source, dict):
9799
raise ValueError("parameter 'init' is not compatible with a dict source")
98100
self.__init_from_tree(source, gen_preview=gen_preview)
99101
else:
102+
self.mode = Mode.READ
100103
source = self.__read(source, gen_preview=gen_preview)
101104
# in the zip case, self.source is the extracted dir
102105
self.source = source

rocrate/utils.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
# See the License for the specific language governing permissions and
2323
# limitations under the License.
2424

25+
from enum import Enum
2526
import os
2627
from datetime import datetime, timezone
2728
from urllib.parse import urlsplit
@@ -77,3 +78,9 @@ def walk(top, topdown=True, onerror=None, followlinks=False, exclude=None):
7778
dirs[:] = [_ for _ in dirs if _ not in exclude]
7879
files[:] = [_ for _ in files if _ not in exclude]
7980
yield root, dirs, files
81+
82+
83+
class Mode(Enum):
84+
READ = 1
85+
INIT = 2
86+
CREATE = 3

test/test_read.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -662,3 +662,24 @@ def test_no_data_entity_link_from_file():
662662
assert t1
663663
assert t1 not in crate.data_entities
664664
assert t1 in crate.contextual_entities
665+
666+
667+
def test_init_percent_escape(tmpdir, helpers):
668+
in_crate = tmpdir / "in_crate"
669+
in_file = in_crate / "in file.txt"
670+
in_dir = in_crate / "in dir"
671+
deep_file = in_crate / "in dir" / "deep file.txt"
672+
out_crate = tmpdir / "out_crate"
673+
in_crate.mkdir()
674+
in_file.write_text("IN FILE\n")
675+
in_dir.mkdir()
676+
deep_file.write_text("DEEP FILE\n")
677+
crate = ROCrate(in_crate, init=True)
678+
crate.write(out_crate)
679+
json_entities = helpers.read_json_entities(out_crate)
680+
assert "in%20file.txt" in json_entities
681+
assert "in%20dir/" in json_entities
682+
assert "in%20dir/deep%20file.txt" in json_entities
683+
assert (out_crate / "in file.txt").is_file()
684+
assert (out_crate / "in dir").is_dir()
685+
assert (out_crate / "in dir" / "deep file.txt").is_file()

0 commit comments

Comments
 (0)