diff --git a/requirements.txt b/requirements.txt index 6f3749b..81fcc58 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ arcp==0.2.1 jinja2 python-dateutil click +packaging diff --git a/rocrate/model/metadata.py b/rocrate/model/metadata.py index 4ebbeb3..9974b30 100644 --- a/rocrate/model/metadata.py +++ b/rocrate/model/metadata.py @@ -25,6 +25,8 @@ import json from pathlib import Path +from packaging.version import Version + from .file import File from .dataset import Dataset @@ -45,6 +47,7 @@ def __init__(self, crate, source=None, dest_path=None, properties=None, version= if version not in SUPPORTED_VERSIONS: raise ValueError(f"version {version!r} not supported") self.version = version + self.version_obj = Version(version) self.profile = f"https://w3id.org/ro/crate/{self.version}" if source is None and dest_path is None: dest_path = LEGACY_BASENAME if version == "1.0" else BASENAME diff --git a/rocrate/rocrate.py b/rocrate/rocrate.py index 3467ee9..87b70e5 100644 --- a/rocrate/rocrate.py +++ b/rocrate/rocrate.py @@ -33,6 +33,8 @@ from pathlib import Path from urllib.parse import urljoin +from packaging.version import Version + from .memory_buffer import MemoryBuffer from .model import ( ComputationalWorkflow, @@ -63,6 +65,15 @@ from .metadata import read_metadata, find_root_entity_id +DATA_ENTITY_TYPES = {"File", "Dataset"} + + +def is_data_entity(entity): + if entity["@id"].startswith("#"): + return False + return DATA_ENTITY_TYPES.intersection(as_list(entity.get("@type", []))) + + def pick_type(json_entity, type_map, fallback=None): try: t = json_entity["@type"] @@ -172,12 +183,14 @@ def __read_data_entities(self, entities, source, gen_preview): def __add_parts(self, parts, entities, source): type_map = OrderedDict((_.__name__, _) for _ in subclasses(FileOrDir)) - for data_entity_ref in parts: - id_ = data_entity_ref['@id'] - try: - entity = entities.pop(id_) - except KeyError: + for ref in parts: + id_ = ref['@id'] + if id_ not in entities: continue + if self.version_obj >= Version("1.2"): + if not is_data_entity(entities[id_]): + continue + entity = entities.pop(id_) assert id_ == entity.pop('@id') cls = pick_type(entity, type_map, fallback=DataEntity) if cls is DataEntity: @@ -193,11 +206,13 @@ def __add_parts(self, parts, entities, source): def __read_contextual_entities(self, entities): type_map = {_.__name__: _ for _ in subclasses(ContextEntity)} - # types *commonly* used for data entities - data_entity_types = {"File", "Dataset"} for identifier, entity in entities.items(): - if data_entity_types.intersection(as_list(entity.get("@type", []))): - warnings.warn(f"{entity['@id']} looks like a data entity but it's not listed in the root dataset's hasPart") + if is_data_entity(entity): + id_ = entity['@id'] + if self.version_obj >= Version("1.2"): + raise ValueError(f"'{id_}' is a data entity but it's not linked to from the root dataset's hasPart") + else: + warnings.warn(f"'{id_}' looks like a data entity but it's not listed in the root dataset's hasPart") assert identifier == entity.pop('@id') cls = pick_type(entity, type_map, fallback=ContextEntity) self.add(cls(self, identifier, entity)) @@ -311,6 +326,10 @@ def mainEntity(self, value): def version(self): return self.metadata.version + @property + def version_obj(self): + return self.metadata.version_obj + @property def test_dir(self): rval = self.dereference("test") diff --git a/test/test_metadata.py b/test/test_metadata.py index 826dece..c57ed18 100644 --- a/test/test_metadata.py +++ b/test/test_metadata.py @@ -42,7 +42,7 @@ def test_find_root(root, basename): "@type": "CreativeWork", "about": {"@id": root_id}, "conformsTo": [ - {"@id": "https://w3id.org/ro/crate/1.1"}, + {"@id": "https://w3id.org/ro/crate/1.2"}, {"@id": "https://example.org/fancy-ro-crate/1.0"}, ] }, @@ -64,7 +64,7 @@ def test_find_root_bad_entities(): "@id": "ro-crate-metadata.json", "@type": "CreativeWork", "about": {"@id": "./"}, - "conformsTo": {"@id": "https://w3id.org/ro/crate/1.1"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"}, }, "./": { "@id": "./", @@ -101,7 +101,7 @@ def test_find_root_multiple_entries(): "@id": "http://example.org/ro-crate-metadata.json", "@type": "CreativeWork", "about": {"@id": "http://example.org/"}, - "conformsTo": {"@id": "https://w3id.org/ro/crate/1.1"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"}, }, "http://example.org/": { "@id": "http://example.org/", @@ -115,7 +115,7 @@ def test_find_root_multiple_entries(): "@id": "http://example.com/ro-crate-metadata.json", "@type": "CreativeWork", "about": {"@id": "http://example.com/"}, - "conformsTo": {"@id": "https://w3id.com/ro/crate/1.1"}, + "conformsTo": {"@id": "https://w3id.com/ro/crate/1.2"}, }, "http://example.com/": { "@id": "http://example.com/", @@ -166,7 +166,7 @@ def test_find_root_multiple_types(): "@id": "ro-crate-metadata.json", "@type": "CreativeWork", "about": {"@id": "./"}, - "conformsTo": {"@id": "https://w3id.org/ro/crate/1.1"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"}, }, { "@id": "./", diff --git a/test/test_model.py b/test/test_model.py index 1f17f09..2bb15bd 100644 --- a/test/test_model.py +++ b/test/test_model.py @@ -315,7 +315,7 @@ def test_self_delete(test_data_dir): def test_entity_as_mapping(tmpdir, helpers): orcid = "https://orcid.org/0000-0002-1825-0097" metadata = { - "@context": "https://w3id.org/ro/crate/1.1/context", + "@context": "https://w3id.org/ro/crate/1.2/context", "@graph": [ {"@id": "ro-crate-metadata.json", "@type": "CreativeWork", @@ -324,7 +324,7 @@ def test_entity_as_mapping(tmpdir, helpers): "application/json", {"@id": "https://www.json.org"}, ], - "conformsTo": {"@id": "https://w3id.org/ro/crate/1.1"}}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2"}}, {"@id": "./", "@type": "Dataset", "correction": [ diff --git a/test/test_read.py b/test/test_read.py index 17194b9..359da13 100644 --- a/test/test_read.py +++ b/test/test_read.py @@ -28,7 +28,7 @@ from pathlib import Path from rocrate.rocrate import ROCrate -from rocrate.model import DataEntity, File, Dataset +from rocrate.model import DataEntity, ContextEntity, File, Dataset _URL = ('https://raw.githubusercontent.com/ResearchObject/ro-crate-py/master/' 'test/test-data/sample_file.txt') @@ -167,6 +167,7 @@ def test_legacy_crate(test_data_dir, tmpdir, helpers): # Remove the metadata file, leaving only the legacy one (crate_dir / helpers.METADATA_FILE_NAME).unlink() crate = ROCrate(crate_dir) + assert crate.version == "1.0" md_prop = crate.metadata.properties() assert crate.dereference(helpers.LEGACY_METADATA_FILE_NAME) is crate.metadata @@ -377,11 +378,12 @@ def test_missing_file(test_data_dir, tmpdir): assert (out_path / name).read_text() == text -def test_generic_data_entity(tmpdir): +@pytest.mark.parametrize("version", ["1.1", "1.2"]) +def test_generic_data_entity(tmpdir, version): rc_id = "#collection" metadata = { "@context": [ - "https://w3id.org/ro/crate/1.1/context", + f"https://w3id.org/ro/crate/{version}/context", {"@vocab": "http://schema.org/"}, {"@base": None} ], @@ -389,6 +391,9 @@ def test_generic_data_entity(tmpdir): { "@id": "ro-crate-metadata.json", "@type": "CreativeWork", + "conformsTo": { + "@id": f"https://w3id.org/ro/crate/{version}" + }, "about": { "@id": "./" }, @@ -416,12 +421,19 @@ def test_generic_data_entity(tmpdir): def check_rc(): rc = crate.dereference(rc_id) assert rc is not None - assert isinstance(rc, DataEntity) + if version == "1.2": + assert isinstance(rc, ContextEntity) + else: + assert isinstance(rc, DataEntity) assert rc.id == rc_id assert rc.type == "RepositoryCollection" assert rc._jsonld["name"] == "Test collection" - assert crate.data_entities == [rc] - assert not crate.contextual_entities + if version == "1.2": + assert not crate.data_entities + assert crate.contextual_entities == [rc] + else: + assert crate.data_entities == [rc] + assert not crate.contextual_entities check_rc() @@ -432,14 +444,15 @@ def check_rc(): check_rc() -def test_root_conformsto(tmpdir): +@pytest.mark.parametrize("version", ["1.1", "1.2"]) +def test_root_conformsto(tmpdir, version): # actually not a valid workflow ro-crate, but here it does not matter profiles = [ - "https://w3id.org/ro/crate/1.1", + f"https://w3id.org/ro/crate/{version}", "https://w3id.org/workflowhub/workflow-ro-crate/1.0", ] metadata = { - "@context": "https://w3id.org/ro/crate/1.1/context", + "@context": f"https://w3id.org/ro/crate/{version}/context", "@graph": [ { "@id": "ro-crate-metadata.json", @@ -461,16 +474,17 @@ def test_root_conformsto(tmpdir): assert crate.metadata["conformsTo"] == profiles -def test_multi_type_context_entity(tmpdir): +@pytest.mark.parametrize("version", ["1.1", "1.2"]) +def test_multi_type_context_entity(tmpdir, version): id_, type_ = "#xyz", ["Project", "Organization"] metadata = { - "@context": "https://w3id.org/ro/crate/1.1/context", + "@context": f"https://w3id.org/ro/crate/{version}/context", "@graph": [ { "@id": "ro-crate-metadata.json", "@type": "CreativeWork", "about": {"@id": "./"}, - "conformsTo": {"@id": "https://w3id.org/ro/crate/1.1"} + "conformsTo": {"@id": f"https://w3id.org/ro/crate/{version}"} }, { "@id": "./", @@ -492,15 +506,16 @@ def test_multi_type_context_entity(tmpdir): assert set(entity.type) == set(type_) -def test_indirect_data_entity(tmpdir): +@pytest.mark.parametrize("version", ["1.1", "1.2"]) +def test_indirect_data_entity(tmpdir, version): metadata = { - "@context": "https://w3id.org/ro/crate/1.1/context", + "@context": f"https://w3id.org/ro/crate/{version}/context", "@graph": [ { "@id": "ro-crate-metadata.json", "@type": "CreativeWork", "about": {"@id": "./"}, - "conformsTo": {"@id": "https://w3id.org/ro/crate/1.1"} + "conformsTo": {"@id": f"https://w3id.org/ro/crate/{version}"} }, { "@id": "./", @@ -546,15 +561,16 @@ def test_indirect_data_entity(tmpdir): @pytest.mark.filterwarnings("ignore") -def test_from_dict(tmpdir): +@pytest.mark.parametrize("version", ["1.1", "1.2"]) +def test_from_dict(tmpdir, version): metadata = { - "@context": "https://w3id.org/ro/crate/1.1/context", + "@context": f"https://w3id.org/ro/crate/{version}/context", "@graph": [ { "@id": "ro-crate-metadata.json", "@type": "CreativeWork", "about": {"@id": "./"}, - "conformsTo": {"@id": "https://w3id.org/ro/crate/1.1"} + "conformsTo": {"@id": f"https://w3id.org/ro/crate/{version}"} }, { "@id": "./", @@ -605,15 +621,16 @@ def test_from_dict(tmpdir): ROCrate(metadata, init=True) -def test_no_data_entity_link_from_file(): +@pytest.mark.parametrize("version", ["1.1", "1.2"]) +def test_no_data_entity_link_from_file(version): metadata = { - "@context": "https://w3id.org/ro/crate/1.1/context", + "@context": f"https://w3id.org/ro/crate/{version}/context", "@graph": [ { "@id": "ro-crate-metadata.json", "@type": "CreativeWork", "about": {"@id": "./"}, - "conformsTo": {"@id": "https://w3id.org/ro/crate/1.1"} + "conformsTo": {"@id": f"https://w3id.org/ro/crate/{version}"} }, { "@id": "./", @@ -690,3 +707,80 @@ def test_read_version(test_data_dir): assert crate.version == "1.0" crate = ROCrate(test_data_dir / "crate-1.1") assert crate.version == "1.1" + + +@pytest.mark.filterwarnings("ignore") +@pytest.mark.parametrize("version", ["1.0", "1.1", "1.2"]) +def test_data_entity_not_linked(version): + metadata = { + "@context": f"https://w3id.org/ro/crate/{version}/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"}, + "conformsTo": {"@id": f"https://w3id.org/ro/crate/{version}"} + }, + { + "@id": "./", + "@type": "Dataset", + "hasPart": [ + {"@id": "d1"} + ] + }, + { + "@id": "d1", + "@type": "Dataset" + }, + { + "@id": "f1.txt", + "@type": "File" + } + ] + } + if version == "1.2": + with pytest.raises(ValueError, match="hasPart"): + ROCrate(metadata) + else: + crate = ROCrate(metadata) + f1 = crate.get("f1.txt") + assert f1 in crate.contextual_entities + + +@pytest.mark.parametrize("version", ["1.0", "1.1", "1.2"]) +def test_not_data_entity_linked(version): + metadata = { + "@context": f"https://w3id.org/ro/crate/{version}/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"}, + "conformsTo": {"@id": f"https://w3id.org/ro/crate/{version}"} + }, + { + "@id": "./", + "@type": "Dataset", + "hasPart": [ + {"@id": "d1"}, + {"@id": "#f1.txt"} + ] + }, + { + "@id": "d1", + "@type": "Dataset" + }, + { + "@id": "#f1.txt", + "@type": "File" + } + ] + } + crate = ROCrate(metadata) + d1 = crate.get("d1") + assert d1 in crate.data_entities + f1 = crate.get("#f1.txt") + if version == "1.2": + assert f1 in crate.contextual_entities + else: + assert f1 in crate.data_entities