Skip to content

Commit 94cf716

Browse files
authored
Merge pull request #240 from simleo/determine_data_entities
Update data entities reading for 1.2
2 parents 8d115ea + ab1ab68 commit 94cf716

File tree

6 files changed

+154
-37
lines changed

6 files changed

+154
-37
lines changed

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@ arcp==0.2.1
33
jinja2
44
python-dateutil
55
click
6+
packaging

rocrate/model/metadata.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
import json
2626
from pathlib import Path
2727

28+
from packaging.version import Version
29+
2830
from .file import File
2931
from .dataset import Dataset
3032

@@ -45,6 +47,7 @@ def __init__(self, crate, source=None, dest_path=None, properties=None, version=
4547
if version not in SUPPORTED_VERSIONS:
4648
raise ValueError(f"version {version!r} not supported")
4749
self.version = version
50+
self.version_obj = Version(version)
4851
self.profile = f"https://w3id.org/ro/crate/{self.version}"
4952
if source is None and dest_path is None:
5053
dest_path = LEGACY_BASENAME if version == "1.0" else BASENAME

rocrate/rocrate.py

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@
3333
from pathlib import Path
3434
from urllib.parse import urljoin
3535

36+
from packaging.version import Version
37+
3638
from .memory_buffer import MemoryBuffer
3739
from .model import (
3840
ComputationalWorkflow,
@@ -63,6 +65,15 @@
6365
from .metadata import read_metadata, find_root_entity_id
6466

6567

68+
DATA_ENTITY_TYPES = {"File", "Dataset"}
69+
70+
71+
def is_data_entity(entity):
72+
if entity["@id"].startswith("#"):
73+
return False
74+
return DATA_ENTITY_TYPES.intersection(as_list(entity.get("@type", [])))
75+
76+
6677
def pick_type(json_entity, type_map, fallback=None):
6778
try:
6879
t = json_entity["@type"]
@@ -172,12 +183,14 @@ def __read_data_entities(self, entities, source, gen_preview):
172183

173184
def __add_parts(self, parts, entities, source):
174185
type_map = OrderedDict((_.__name__, _) for _ in subclasses(FileOrDir))
175-
for data_entity_ref in parts:
176-
id_ = data_entity_ref['@id']
177-
try:
178-
entity = entities.pop(id_)
179-
except KeyError:
186+
for ref in parts:
187+
id_ = ref['@id']
188+
if id_ not in entities:
180189
continue
190+
if self.version_obj >= Version("1.2"):
191+
if not is_data_entity(entities[id_]):
192+
continue
193+
entity = entities.pop(id_)
181194
assert id_ == entity.pop('@id')
182195
cls = pick_type(entity, type_map, fallback=DataEntity)
183196
if cls is DataEntity:
@@ -193,11 +206,13 @@ def __add_parts(self, parts, entities, source):
193206

194207
def __read_contextual_entities(self, entities):
195208
type_map = {_.__name__: _ for _ in subclasses(ContextEntity)}
196-
# types *commonly* used for data entities
197-
data_entity_types = {"File", "Dataset"}
198209
for identifier, entity in entities.items():
199-
if data_entity_types.intersection(as_list(entity.get("@type", []))):
200-
warnings.warn(f"{entity['@id']} looks like a data entity but it's not listed in the root dataset's hasPart")
210+
if is_data_entity(entity):
211+
id_ = entity['@id']
212+
if self.version_obj >= Version("1.2"):
213+
raise ValueError(f"'{id_}' is a data entity but it's not linked to from the root dataset's hasPart")
214+
else:
215+
warnings.warn(f"'{id_}' looks like a data entity but it's not listed in the root dataset's hasPart")
201216
assert identifier == entity.pop('@id')
202217
cls = pick_type(entity, type_map, fallback=ContextEntity)
203218
self.add(cls(self, identifier, entity))
@@ -311,6 +326,10 @@ def mainEntity(self, value):
311326
def version(self):
312327
return self.metadata.version
313328

329+
@property
330+
def version_obj(self):
331+
return self.metadata.version_obj
332+
314333
@property
315334
def test_dir(self):
316335
rval = self.dereference("test")

test/test_metadata.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def test_find_root(root, basename):
4242
"@type": "CreativeWork",
4343
"about": {"@id": root_id},
4444
"conformsTo": [
45-
{"@id": "https://w3id.org/ro/crate/1.1"},
45+
{"@id": "https://w3id.org/ro/crate/1.2"},
4646
{"@id": "https://example.org/fancy-ro-crate/1.0"},
4747
]
4848
},
@@ -64,7 +64,7 @@ def test_find_root_bad_entities():
6464
"@id": "ro-crate-metadata.json",
6565
"@type": "CreativeWork",
6666
"about": {"@id": "./"},
67-
"conformsTo": {"@id": "https://w3id.org/ro/crate/1.1"},
67+
"conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"},
6868
},
6969
"./": {
7070
"@id": "./",
@@ -101,7 +101,7 @@ def test_find_root_multiple_entries():
101101
"@id": "http://example.org/ro-crate-metadata.json",
102102
"@type": "CreativeWork",
103103
"about": {"@id": "http://example.org/"},
104-
"conformsTo": {"@id": "https://w3id.org/ro/crate/1.1"},
104+
"conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"},
105105
},
106106
"http://example.org/": {
107107
"@id": "http://example.org/",
@@ -115,7 +115,7 @@ def test_find_root_multiple_entries():
115115
"@id": "http://example.com/ro-crate-metadata.json",
116116
"@type": "CreativeWork",
117117
"about": {"@id": "http://example.com/"},
118-
"conformsTo": {"@id": "https://w3id.com/ro/crate/1.1"},
118+
"conformsTo": {"@id": "https://w3id.com/ro/crate/1.2"},
119119
},
120120
"http://example.com/": {
121121
"@id": "http://example.com/",
@@ -166,7 +166,7 @@ def test_find_root_multiple_types():
166166
"@id": "ro-crate-metadata.json",
167167
"@type": "CreativeWork",
168168
"about": {"@id": "./"},
169-
"conformsTo": {"@id": "https://w3id.org/ro/crate/1.1"},
169+
"conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"},
170170
},
171171
{
172172
"@id": "./",

test/test_model.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,7 @@ def test_self_delete(test_data_dir):
315315
def test_entity_as_mapping(tmpdir, helpers):
316316
orcid = "https://orcid.org/0000-0002-1825-0097"
317317
metadata = {
318-
"@context": "https://w3id.org/ro/crate/1.1/context",
318+
"@context": "https://w3id.org/ro/crate/1.2/context",
319319
"@graph": [
320320
{"@id": "ro-crate-metadata.json",
321321
"@type": "CreativeWork",
@@ -324,7 +324,7 @@ def test_entity_as_mapping(tmpdir, helpers):
324324
"application/json",
325325
{"@id": "https://www.json.org"},
326326
],
327-
"conformsTo": {"@id": "https://w3id.org/ro/crate/1.1"}},
327+
"conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"}},
328328
{"@id": "./",
329329
"@type": "Dataset",
330330
"correction": [

0 commit comments

Comments
 (0)