diff --git a/README.md b/README.md index c42645bd..6e272994 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ [![Python package](https://github.com/ResearchObject/ro-crate-py/workflows/Python%20package/badge.svg)](https://github.com/ResearchObject/ro-crate-py/actions?query=workflow%3A%22Python+package%22) [![Upload Python Package](https://github.com/ResearchObject/ro-crate-py/workflows/Upload%20Python%20Package/badge.svg)](https://github.com/ResearchObject/ro-crate-py/actions?query=workflow%3A%22Upload+Python+Package%22) [![PyPI version](https://badge.fury.io/py/rocrate.svg)](https://pypi.org/project/rocrate/) [![DOI](https://zenodo.org/badge/216605684.svg)](https://zenodo.org/badge/latestdoi/216605684) -ro-crate-py is a Python library to create and consume [Research Object Crates](https://w3id.org/ro/crate). It currently supports the [RO-Crate 1.1](https://w3id.org/ro/crate/1.1) specification. +ro-crate-py is a Python library to create and consume [Research Object Crates](https://w3id.org/ro/crate). It supports the current [RO-Crate 1.2](https://w3id.org/ro/crate/1.2) specification as well as the older [RO-Crate 1.1](https://w3id.org/ro/crate/1.1) and [RO-Crate 1.0](https://w3id.org/ro/crate/1.0). ## Installation @@ -225,6 +225,25 @@ Note that entities can have multiple types, e.g.: "@type" = ["File", "SoftwareSourceCode"] ``` +#### Selecting the RO-Crate specification version + +By default, a newly created RO-Crate conforms to the [RO-Crate 1.2](https://w3id.org/ro/crate/1.2) specification, but 1.0 and 1.1 are still supported: + +```pycon +>>> from rocrate.rocrate import ROCrate +>>> crate = ROCrate() +>>> crate.version +'1.2' +>>> crate = ROCrate(version="1.0") +>>> crate.version +'1.0' +>>> crate.metadata.id +'ro-crate-metadata.jsonld' +``` + +When consuming an RO-Crate (see below), the `version` parameter is ignored, and the RO-Crate version is read from the metadata descriptor instead. + + ### Consuming an RO-Crate An existing RO-Crate package can be loaded from a directory or zip file: diff --git a/rocrate/metadata.py b/rocrate/metadata.py index dd8a688a..4c1e7b76 100644 --- a/rocrate/metadata.py +++ b/rocrate/metadata.py @@ -23,7 +23,7 @@ import json import warnings -from .model import Metadata, LegacyMetadata +from .model.metadata import BASENAME, LEGACY_BASENAME def read_metadata(metadata_path): @@ -85,13 +85,13 @@ def find_root_entity_id(entities): is more than one, we just return an arbitrary pair. """ - descriptor = entities.get(Metadata.BASENAME, entities.get(LegacyMetadata.BASENAME)) + descriptor = entities.get(BASENAME, entities.get(LEGACY_BASENAME)) if descriptor: return _check_descriptor(descriptor, entities) candidates = [] for id_, e in entities.items(): basename = id_.rsplit("/", 1)[-1] - if basename == Metadata.BASENAME or basename == LegacyMetadata.BASENAME: + if basename == BASENAME or basename == LEGACY_BASENAME: try: candidates.append(_check_descriptor(e, entities)) except ValueError: diff --git a/rocrate/model/__init__.py b/rocrate/model/__init__.py index ae42887a..c2ed0e7e 100644 --- a/rocrate/model/__init__.py +++ b/rocrate/model/__init__.py @@ -38,7 +38,7 @@ from .entity import Entity from .file import File from .file_or_dir import FileOrDir -from .metadata import Metadata, LegacyMetadata +from .metadata import Metadata from .person import Person from .root_dataset import RootDataset from .softwareapplication import SoftwareApplication @@ -58,7 +58,6 @@ "Entity", "File", "FileOrDir", - "LegacyMetadata", "Metadata", "Person", "Preview", diff --git a/rocrate/model/metadata.py b/rocrate/model/metadata.py index 9a7c63e4..4ebbeb38 100644 --- a/rocrate/model/metadata.py +++ b/rocrate/model/metadata.py @@ -29,6 +29,11 @@ from .dataset import Dataset +SUPPORTED_VERSIONS = {"1.0", "1.1", "1.2"} +DEFAULT_VERSION = "1.2" +BASENAME = "ro-crate-metadata.json" +LEGACY_BASENAME = "ro-crate-metadata.jsonld" + WORKFLOW_PROFILE = "https://w3id.org/workflowhub/workflow-ro-crate/1.0" @@ -36,12 +41,13 @@ class Metadata(File): """\ RO-Crate metadata file. """ - BASENAME = "ro-crate-metadata.json" - PROFILE = "https://w3id.org/ro/crate/1.1" - - def __init__(self, crate, source=None, dest_path=None, properties=None): + def __init__(self, crate, source=None, dest_path=None, properties=None, version=DEFAULT_VERSION): + if version not in SUPPORTED_VERSIONS: + raise ValueError(f"version {version!r} not supported") + self.version = version + self.profile = f"https://w3id.org/ro/crate/{self.version}" if source is None and dest_path is None: - dest_path = self.BASENAME + dest_path = LEGACY_BASENAME if version == "1.0" else BASENAME super().__init__( crate, source=source, @@ -58,7 +64,7 @@ def _empty(self): # default properties of the metadata entry val = {"@id": self.id, "@type": "CreativeWork", - "conformsTo": {"@id": self.PROFILE}, + "conformsTo": {"@id": self.profile}, "about": {"@id": "./"}} return val @@ -68,7 +74,7 @@ def generate(self): graph = [] for entity in self.crate.get_entities(): graph.append(entity.properties()) - context = [f'{self.PROFILE}/context'] + context = [f'{self.profile}/context'] context.extend(self.extra_contexts) if self.extra_terms: context.append(self.extra_terms) @@ -92,12 +98,6 @@ def root(self) -> Dataset: return self.crate.root_dataset -class LegacyMetadata(Metadata): - - BASENAME = "ro-crate-metadata.jsonld" - PROFILE = "https://w3id.org/ro/crate/1.0" - - # https://github.com/ResearchObject/ro-terms/tree/master/test TESTING_EXTRA_TERMS = { "TestSuite": "https://w3id.org/ro/terms/test#TestSuite", @@ -114,13 +114,3 @@ class LegacyMetadata(Metadata): "definition": "https://w3id.org/ro/terms/test#definition", "engineVersion": "https://w3id.org/ro/terms/test#engineVersion" } - - -def metadata_class(descriptor_id): - basename = descriptor_id.rsplit("/", 1)[-1] - if basename == Metadata.BASENAME: - return Metadata - elif basename == LegacyMetadata.BASENAME: - return LegacyMetadata - else: - raise ValueError(f"Invalid metadata descriptor ID: {descriptor_id!r}") diff --git a/rocrate/rocrate.py b/rocrate/rocrate.py index c1ab5060..3467ee91 100644 --- a/rocrate/rocrate.py +++ b/rocrate/rocrate.py @@ -43,7 +43,6 @@ Entity, File, FileOrDir, - LegacyMetadata, Metadata, Preview, RootDataset, @@ -54,7 +53,7 @@ TestSuite, WorkflowDescription, ) -from .model.metadata import WORKFLOW_PROFILE, TESTING_EXTRA_TERMS, metadata_class +from .model.metadata import WORKFLOW_PROFILE, TESTING_EXTRA_TERMS, DEFAULT_VERSION, BASENAME, LEGACY_BASENAME from .model.computationalworkflow import galaxy_to_abstract_cwl from .model.computerlanguage import get_lang from .model.testservice import get_service @@ -76,9 +75,17 @@ def pick_type(json_entity, type_map, fallback=None): return fallback +def get_version(metadata_properties): + for uri in get_norm_value(metadata_properties, "conformsTo"): + base_uri, version = uri.rsplit("/", 1) + if base_uri.startswith("https://w3id.org/ro/crate"): + return version + return None + + class ROCrate(): - def __init__(self, source=None, gen_preview=False, init=False, exclude=None): + def __init__(self, source=None, gen_preview=False, init=False, exclude=None, version=DEFAULT_VERSION): self.mode = None self.source = source self.exclude = exclude @@ -92,7 +99,7 @@ def __init__(self, source=None, gen_preview=False, init=False, exclude=None): self.add(Preview(self)) if not source: self.mode = Mode.CREATE - self.add(RootDataset(self), Metadata(self)) + self.add(RootDataset(self), Metadata(self, version=version)) elif init: self.mode = Mode.INIT if isinstance(source, dict): @@ -104,11 +111,11 @@ def __init__(self, source=None, gen_preview=False, init=False, exclude=None): # in the zip case, self.source is the extracted dir self.source = source - def __init_from_tree(self, top_dir, gen_preview=False): + def __init_from_tree(self, top_dir, gen_preview=False, version=DEFAULT_VERSION): top_dir = Path(top_dir) if not top_dir.is_dir(): raise NotADirectoryError(errno.ENOTDIR, f"'{top_dir}': not a directory") - self.add(RootDataset(self), Metadata(self)) + self.add(RootDataset(self), Metadata(self, version=version)) for root, dirs, files in walk(top_dir, exclude=self.exclude): root = Path(root) for name in dirs: @@ -116,7 +123,7 @@ def __init_from_tree(self, top_dir, gen_preview=False): self.add_dataset(source, source.relative_to(top_dir)) for name in files: source = root / name - if source == top_dir / Metadata.BASENAME or source == top_dir / LegacyMetadata.BASENAME: + if source == top_dir / BASENAME or source == top_dir / LEGACY_BASENAME: continue if source != top_dir / Preview.BASENAME: self.add_file(source, source.relative_to(top_dir)) @@ -136,11 +143,11 @@ def __read(self, source, gen_preview=False): with zipfile.ZipFile(source, "r") as zf: zf.extractall(zip_path) source = Path(zip_path) - metadata_path = source / Metadata.BASENAME + metadata_path = source / BASENAME if not metadata_path.is_file(): - metadata_path = source / LegacyMetadata.BASENAME + metadata_path = source / LEGACY_BASENAME if not metadata_path.is_file(): - raise ValueError(f"Not a valid RO-Crate: missing {Metadata.BASENAME}") + raise ValueError(f"Not a valid RO-Crate: missing {BASENAME}") _, entities = read_metadata(metadata_path) self.__read_data_entities(entities, source, gen_preview) self.__read_contextual_entities(entities) @@ -154,9 +161,9 @@ def __read_data_entities(self, entities, source, gen_preview): assert root_id == root_entity.pop('@id') parts = as_list(root_entity.pop('hasPart', [])) self.add(RootDataset(self, root_id, properties=root_entity)) - MetadataClass = metadata_class(metadata_id) metadata_properties = entities.pop(metadata_id) - self.add(MetadataClass(self, metadata_id, properties=metadata_properties)) + version = get_version(metadata_properties) or DEFAULT_VERSION + self.add(Metadata(self, metadata_id, properties=metadata_properties, version=version)) preview_entity = entities.pop(Preview.BASENAME, None) if preview_entity and not gen_preview: @@ -198,18 +205,18 @@ def __read_contextual_entities(self, entities): @property def default_entities(self): return [e for e in self.__entity_map.values() - if isinstance(e, (RootDataset, Metadata, LegacyMetadata, Preview))] + if isinstance(e, (RootDataset, Metadata, Preview))] @property def data_entities(self): return [e for e in self.__entity_map.values() - if not isinstance(e, (RootDataset, Metadata, LegacyMetadata, Preview)) + if not isinstance(e, (RootDataset, Metadata, Preview)) and hasattr(e, "write")] @property def contextual_entities(self): return [e for e in self.__entity_map.values() - if not isinstance(e, (RootDataset, Metadata, LegacyMetadata, Preview)) + if not isinstance(e, (RootDataset, Metadata, Preview)) and not hasattr(e, "write")] @property @@ -300,6 +307,10 @@ def mainEntity(self): def mainEntity(self, value): self.root_dataset['mainEntity'] = value + @property + def version(self): + return self.metadata.version + @property def test_dir(self): rval = self.dereference("test") @@ -417,7 +428,7 @@ def add(self, *entities): key = e.canonical_id() if isinstance(e, RootDataset): self.root_dataset = e - elif isinstance(e, (Metadata, LegacyMetadata)): + elif isinstance(e, Metadata): self.metadata = e elif isinstance(e, Preview): self.preview = e diff --git a/test/conftest.py b/test/conftest.py index ce6a7e73..ae8cc712 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -21,24 +21,22 @@ # limitations under the License. import json -import pathlib import shutil +from pathlib import Path import pytest from rocrate.utils import get_norm_value -THIS_DIR = pathlib.Path(__file__).absolute().parent +THIS_DIR = Path(__file__).absolute().parent TEST_DATA_NAME = 'test-data' BASE_URL = 'https://w3id.org/ro/crate' -VERSION = '1.1' +DEFAULT_VERSION = '1.2' LEGACY_VERSION = '1.0' class Helpers: - PROFILE = f"{BASE_URL}/{VERSION}" - LEGACY_PROFILE = f"{BASE_URL}/{LEGACY_VERSION}" WORKFLOW_PROFILE = "https://w3id.org/workflowhub/workflow-ro-crate/1.0" METADATA_FILE_NAME = 'ro-crate-metadata.json' LEGACY_METADATA_FILE_NAME = 'ro-crate-metadata.jsonld' @@ -49,20 +47,23 @@ class Helpers: @classmethod def read_json_entities(cls, crate_base_path): - metadata_path = pathlib.Path(crate_base_path) / cls.METADATA_FILE_NAME + crate_base_path = Path(crate_base_path) + metadata_path = crate_base_path / cls.METADATA_FILE_NAME + if not metadata_path.is_file(): + metadata_path = crate_base_path / cls.LEGACY_METADATA_FILE_NAME with open(metadata_path, "rt") as f: json_data = json.load(f) return {_["@id"]: _ for _ in json_data["@graph"]} @classmethod - def check_crate(cls, json_entities, root_id="./", data_entity_ids=None): + def check_crate(cls, json_entities, root_id="./", data_entity_ids=None, version=DEFAULT_VERSION): assert root_id in json_entities root = json_entities[root_id] assert root["@type"] == "Dataset" assert cls.METADATA_FILE_NAME in json_entities metadata = json_entities[cls.METADATA_FILE_NAME] assert metadata["@type"] == "CreativeWork" - assert cls.PROFILE in get_norm_value(metadata, "conformsTo") + assert f"{BASE_URL}/{version}" in get_norm_value(metadata, "conformsTo") assert metadata["about"] == {"@id": root_id} if data_entity_ids: data_entity_ids = set(data_entity_ids) @@ -91,7 +92,7 @@ def helpers(): # pytest's default tmpdir returns a py.path object @pytest.fixture def tmpdir(tmpdir): - return pathlib.Path(tmpdir) + return Path(tmpdir) @pytest.fixture diff --git a/test/test-data/crate-1.0/data.csv b/test/test-data/crate-1.0/data.csv new file mode 100644 index 00000000..add268e5 --- /dev/null +++ b/test/test-data/crate-1.0/data.csv @@ -0,0 +1,3 @@ +name,number +foo,1 +bar,2 diff --git a/test/test-data/crate-1.0/ro-crate-metadata.jsonld b/test/test-data/crate-1.0/ro-crate-metadata.jsonld new file mode 100644 index 00000000..8387f121 --- /dev/null +++ b/test/test-data/crate-1.0/ro-crate-metadata.jsonld @@ -0,0 +1,30 @@ +{ + "@context": "https://w3id.org/ro/crate/1.0/context", + "@graph": [ + { + "@id": "ro-crate-metadata.jsonld", + "@type": "CreativeWork", + "about": {"@id": "./"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.0"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Example crate", + "description": "An example RO-Crate", + "datePublished": "2025-10-17", + "license": {"@id": "http://spdx.org/licenses/CC0-1.0"}, + "hasPart": [{"@id": "data.csv"}] + }, + { + "@id": "data.csv", + "@type": "File", + "name": "CSV data" + }, + { + "@id": "http://spdx.org/licenses/CC0-1.0", + "@type": "CreativeWork", + "name": "CC0-1.0" + } + ] +} diff --git a/test/test-data/crate-1.1/data.csv b/test/test-data/crate-1.1/data.csv new file mode 100644 index 00000000..add268e5 --- /dev/null +++ b/test/test-data/crate-1.1/data.csv @@ -0,0 +1,3 @@ +name,number +foo,1 +bar,2 diff --git a/test/test-data/crate-1.1/ro-crate-metadata.json b/test/test-data/crate-1.1/ro-crate-metadata.json new file mode 100644 index 00000000..db8b31cb --- /dev/null +++ b/test/test-data/crate-1.1/ro-crate-metadata.json @@ -0,0 +1,30 @@ +{ + "@context": "https://w3id.org/ro/crate/1.1/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.1"} + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Example crate", + "description": "An example RO-Crate", + "datePublished": "2025-10-17", + "license": {"@id": "http://spdx.org/licenses/CC0-1.0"}, + "hasPart": [{"@id": "data.csv"}] + }, + { + "@id": "data.csv", + "@type": "File", + "name": "CSV data" + }, + { + "@id": "http://spdx.org/licenses/CC0-1.0", + "@type": "CreativeWork", + "name": "CC0-1.0" + } + ] +} diff --git a/test/test-data/read_crate/ro-crate-metadata.json b/test/test-data/read_crate/ro-crate-metadata.json index 824293e3..6568c329 100644 --- a/test/test-data/read_crate/ro-crate-metadata.json +++ b/test/test-data/read_crate/ro-crate-metadata.json @@ -1,5 +1,5 @@ { - "@context": "https://w3id.org/ro/crate/1.1/context", + "@context": "https://w3id.org/ro/crate/1.2/context", "@graph": [ { "@id": "ro-crate-metadata.json", @@ -8,7 +8,7 @@ "@id": "./" }, "conformsTo": { - "@id": "https://w3id.org/ro/crate/1.1" + "@id": "https://w3id.org/ro/crate/1.2" } }, { diff --git a/test/test-data/read_extra/ro-crate-metadata.json b/test/test-data/read_extra/ro-crate-metadata.json index 90e26823..8d6a042b 100644 --- a/test/test-data/read_extra/ro-crate-metadata.json +++ b/test/test-data/read_extra/ro-crate-metadata.json @@ -1,5 +1,5 @@ { - "@context": "https://w3id.org/ro/crate/1.1/context", + "@context": "https://w3id.org/ro/crate/1.2/context", "@graph": [ { "@id": "./", @@ -24,7 +24,7 @@ "@id": "./" }, "conformsTo": { - "@id": "https://w3id.org/ro/crate/1.1" + "@id": "https://w3id.org/ro/crate/1.2" } }, { diff --git a/test/test-data/ro-crate-galaxy-sortchangecase/ro-crate-metadata.json b/test/test-data/ro-crate-galaxy-sortchangecase/ro-crate-metadata.json index ddde0c08..aeddd770 100644 --- a/test/test-data/ro-crate-galaxy-sortchangecase/ro-crate-metadata.json +++ b/test/test-data/ro-crate-galaxy-sortchangecase/ro-crate-metadata.json @@ -1,6 +1,6 @@ { "@context": [ - "https://w3id.org/ro/crate/1.1/context", + "https://w3id.org/ro/crate/1.2/context", { "TestSuite": "https://w3id.org/ro/terms/test#TestSuite", "TestInstance": "https://w3id.org/ro/terms/test#TestInstance", @@ -25,7 +25,7 @@ "@id": "./" }, "conformsTo": { - "@id": "https://w3id.org/ro/crate/1.1" + "@id": "https://w3id.org/ro/crate/1.2" } }, { diff --git a/test/test_model.py b/test/test_model.py index e195eded..1f17f09d 100644 --- a/test/test_model.py +++ b/test/test_model.py @@ -39,6 +39,7 @@ Preview, ContextEntity ) +from .conftest import BASE_URL, DEFAULT_VERSION RAW_REPO_URL = "https://raw.githubusercontent.com/ResearchObject/ro-crate-py" @@ -456,7 +457,7 @@ def test_get_by_type(test_data_dir): def test_context(helpers): crate = ROCrate() jsonld = crate.metadata.generate() - base_context = f"{helpers.PROFILE}/context" + base_context = f"{BASE_URL}/{DEFAULT_VERSION}/context" assert jsonld["@context"] == base_context wfrun_ctx = "https://w3id.org/ro/terms/workflow-run" crate.metadata.extra_contexts.append(wfrun_ctx) diff --git a/test/test_read.py b/test/test_read.py index 4396d891..17194b99 100644 --- a/test/test_read.py +++ b/test/test_read.py @@ -43,6 +43,7 @@ def test_crate_dir_loading(test_data_dir, tmpdir, helpers, gen_preview, from_zip else: crate = ROCrate(crate_dir, gen_preview=gen_preview) + assert crate.version == "1.2" assert set(_["@id"] for _ in crate.default_entities) == { "./", "ro-crate-metadata.json", @@ -74,8 +75,7 @@ def test_crate_dir_loading(test_data_dir, tmpdir, helpers, gen_preview, from_zip assert md_prop['@id'] == helpers.METADATA_FILE_NAME assert md_prop['@type'] == 'CreativeWork' assert md_prop['about'] == {'@id': './'} - # conformsTo is currently hardcoded in the Metadata class, not read from the crate - assert md_prop['conformsTo'] == {'@id': helpers.PROFILE} + assert md_prop['conformsTo'] == {'@id': "https://w3id.org/ro/crate/1.2"} assert metadata.root is root preview = crate.dereference(helpers.PREVIEW_FILE_NAME) @@ -170,7 +170,7 @@ def test_legacy_crate(test_data_dir, tmpdir, helpers): md_prop = crate.metadata.properties() assert crate.dereference(helpers.LEGACY_METADATA_FILE_NAME) is crate.metadata - assert md_prop['conformsTo'] == {'@id': helpers.LEGACY_PROFILE} + assert md_prop['conformsTo'] == {'@id': "https://w3id.org/ro/crate/1.0"} main_wf = crate.dereference('test_galaxy_wf.ga') wf_prop = main_wf.properties() @@ -683,3 +683,10 @@ def test_init_percent_escape(tmpdir, helpers): assert (out_crate / "in file.txt").is_file() assert (out_crate / "in dir").is_dir() assert (out_crate / "in dir" / "deep file.txt").is_file() + + +def test_read_version(test_data_dir): + crate = ROCrate(test_data_dir / "crate-1.0") + assert crate.version == "1.0" + crate = ROCrate(test_data_dir / "crate-1.1") + assert crate.version == "1.1" diff --git a/test/test_write.py b/test/test_write.py index 197d0bc4..d8f5e9fd 100644 --- a/test/test_write.py +++ b/test/test_write.py @@ -21,6 +21,7 @@ # limitations under the License. import io +import json import pytest import requests import os @@ -597,3 +598,19 @@ def test_write_zip_nested_dest(tmpdir, helpers): assert "subdir/a%20b/" in json_entities assert (unpack_path / "subdir" / "a b" / "c d.txt").is_file() assert (unpack_path / "subdir" / "a b" / "j k" / "l m.txt").is_file() + + +@pytest.mark.parametrize("version", ["1.0", "1.1", "1.2"]) +def test_write_version(tmpdir, helpers, version): + basename = helpers.LEGACY_METADATA_FILE_NAME if version == "1.0" else helpers.METADATA_FILE_NAME + crate = ROCrate(version=version) + assert crate.metadata.version == version + out_path = tmpdir / "ro_crate_out" + crate.write(out_path) + assert (out_path / basename).is_file() + json_entities = helpers.read_json_entities(out_path) + assert (md := json_entities.get(basename)) is not None + assert md["conformsTo"]["@id"] == f"https://w3id.org/ro/crate/{version}" + with open(out_path / basename, "rt") as f: + data = json.load(f) + assert data["@context"] == f"https://w3id.org/ro/crate/{version}/context"