Skip to content

Commit 8d115ea

Browse files
authored
Merge pull request #239 from simleo/rocrate_version
RO-Crate version selector
2 parents 0826f12 + e2cbbf0 commit 8d115ea

File tree

16 files changed

+175
-64
lines changed

16 files changed

+175
-64
lines changed

README.md

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[![Python package](https://github.com/ResearchObject/ro-crate-py/workflows/Python%20package/badge.svg)](https://github.com/ResearchObject/ro-crate-py/actions?query=workflow%3A%22Python+package%22) [![Upload Python Package](https://github.com/ResearchObject/ro-crate-py/workflows/Upload%20Python%20Package/badge.svg)](https://github.com/ResearchObject/ro-crate-py/actions?query=workflow%3A%22Upload+Python+Package%22) [![PyPI version](https://badge.fury.io/py/rocrate.svg)](https://pypi.org/project/rocrate/) [![DOI](https://zenodo.org/badge/216605684.svg)](https://zenodo.org/badge/latestdoi/216605684)
22

3-
ro-crate-py is a Python library to create and consume [Research Object Crates](https://w3id.org/ro/crate). It currently supports the [RO-Crate 1.1](https://w3id.org/ro/crate/1.1) specification.
3+
ro-crate-py is a Python library to create and consume [Research Object Crates](https://w3id.org/ro/crate). It supports the current [RO-Crate 1.2](https://w3id.org/ro/crate/1.2) specification as well as the older [RO-Crate 1.1](https://w3id.org/ro/crate/1.1) and [RO-Crate 1.0](https://w3id.org/ro/crate/1.0).
44

55
## Installation
66

@@ -225,6 +225,25 @@ Note that entities can have multiple types, e.g.:
225225
"@type" = ["File", "SoftwareSourceCode"]
226226
```
227227

228+
#### Selecting the RO-Crate specification version
229+
230+
By default, a newly created RO-Crate conforms to the [RO-Crate 1.2](https://w3id.org/ro/crate/1.2) specification, but 1.0 and 1.1 are still supported:
231+
232+
```pycon
233+
>>> from rocrate.rocrate import ROCrate
234+
>>> crate = ROCrate()
235+
>>> crate.version
236+
'1.2'
237+
>>> crate = ROCrate(version="1.0")
238+
>>> crate.version
239+
'1.0'
240+
>>> crate.metadata.id
241+
'ro-crate-metadata.jsonld'
242+
```
243+
244+
When consuming an RO-Crate (see below), the `version` parameter is ignored, and the RO-Crate version is read from the metadata descriptor instead.
245+
246+
228247
### Consuming an RO-Crate
229248

230249
An existing RO-Crate package can be loaded from a directory or zip file:

rocrate/metadata.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
import json
2424
import warnings
2525

26-
from .model import Metadata, LegacyMetadata
26+
from .model.metadata import BASENAME, LEGACY_BASENAME
2727

2828

2929
def read_metadata(metadata_path):
@@ -85,13 +85,13 @@ def find_root_entity_id(entities):
8585
is more than one, we just return an arbitrary pair.
8686
8787
"""
88-
descriptor = entities.get(Metadata.BASENAME, entities.get(LegacyMetadata.BASENAME))
88+
descriptor = entities.get(BASENAME, entities.get(LEGACY_BASENAME))
8989
if descriptor:
9090
return _check_descriptor(descriptor, entities)
9191
candidates = []
9292
for id_, e in entities.items():
9393
basename = id_.rsplit("/", 1)[-1]
94-
if basename == Metadata.BASENAME or basename == LegacyMetadata.BASENAME:
94+
if basename == BASENAME or basename == LEGACY_BASENAME:
9595
try:
9696
candidates.append(_check_descriptor(e, entities))
9797
except ValueError:

rocrate/model/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
from .entity import Entity
3939
from .file import File
4040
from .file_or_dir import FileOrDir
41-
from .metadata import Metadata, LegacyMetadata
41+
from .metadata import Metadata
4242
from .person import Person
4343
from .root_dataset import RootDataset
4444
from .softwareapplication import SoftwareApplication
@@ -58,7 +58,6 @@
5858
"Entity",
5959
"File",
6060
"FileOrDir",
61-
"LegacyMetadata",
6261
"Metadata",
6362
"Person",
6463
"Preview",

rocrate/model/metadata.py

Lines changed: 13 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -29,19 +29,25 @@
2929
from .dataset import Dataset
3030

3131

32+
SUPPORTED_VERSIONS = {"1.0", "1.1", "1.2"}
33+
DEFAULT_VERSION = "1.2"
34+
BASENAME = "ro-crate-metadata.json"
35+
LEGACY_BASENAME = "ro-crate-metadata.jsonld"
36+
3237
WORKFLOW_PROFILE = "https://w3id.org/workflowhub/workflow-ro-crate/1.0"
3338

3439

3540
class Metadata(File):
3641
"""\
3742
RO-Crate metadata file.
3843
"""
39-
BASENAME = "ro-crate-metadata.json"
40-
PROFILE = "https://w3id.org/ro/crate/1.1"
41-
42-
def __init__(self, crate, source=None, dest_path=None, properties=None):
44+
def __init__(self, crate, source=None, dest_path=None, properties=None, version=DEFAULT_VERSION):
45+
if version not in SUPPORTED_VERSIONS:
46+
raise ValueError(f"version {version!r} not supported")
47+
self.version = version
48+
self.profile = f"https://w3id.org/ro/crate/{self.version}"
4349
if source is None and dest_path is None:
44-
dest_path = self.BASENAME
50+
dest_path = LEGACY_BASENAME if version == "1.0" else BASENAME
4551
super().__init__(
4652
crate,
4753
source=source,
@@ -58,7 +64,7 @@ def _empty(self):
5864
# default properties of the metadata entry
5965
val = {"@id": self.id,
6066
"@type": "CreativeWork",
61-
"conformsTo": {"@id": self.PROFILE},
67+
"conformsTo": {"@id": self.profile},
6268
"about": {"@id": "./"}}
6369
return val
6470

@@ -68,7 +74,7 @@ def generate(self):
6874
graph = []
6975
for entity in self.crate.get_entities():
7076
graph.append(entity.properties())
71-
context = [f'{self.PROFILE}/context']
77+
context = [f'{self.profile}/context']
7278
context.extend(self.extra_contexts)
7379
if self.extra_terms:
7480
context.append(self.extra_terms)
@@ -92,12 +98,6 @@ def root(self) -> Dataset:
9298
return self.crate.root_dataset
9399

94100

95-
class LegacyMetadata(Metadata):
96-
97-
BASENAME = "ro-crate-metadata.jsonld"
98-
PROFILE = "https://w3id.org/ro/crate/1.0"
99-
100-
101101
# https://github.com/ResearchObject/ro-terms/tree/master/test
102102
TESTING_EXTRA_TERMS = {
103103
"TestSuite": "https://w3id.org/ro/terms/test#TestSuite",
@@ -114,13 +114,3 @@ class LegacyMetadata(Metadata):
114114
"definition": "https://w3id.org/ro/terms/test#definition",
115115
"engineVersion": "https://w3id.org/ro/terms/test#engineVersion"
116116
}
117-
118-
119-
def metadata_class(descriptor_id):
120-
basename = descriptor_id.rsplit("/", 1)[-1]
121-
if basename == Metadata.BASENAME:
122-
return Metadata
123-
elif basename == LegacyMetadata.BASENAME:
124-
return LegacyMetadata
125-
else:
126-
raise ValueError(f"Invalid metadata descriptor ID: {descriptor_id!r}")

rocrate/rocrate.py

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@
4343
Entity,
4444
File,
4545
FileOrDir,
46-
LegacyMetadata,
4746
Metadata,
4847
Preview,
4948
RootDataset,
@@ -54,7 +53,7 @@
5453
TestSuite,
5554
WorkflowDescription,
5655
)
57-
from .model.metadata import WORKFLOW_PROFILE, TESTING_EXTRA_TERMS, metadata_class
56+
from .model.metadata import WORKFLOW_PROFILE, TESTING_EXTRA_TERMS, DEFAULT_VERSION, BASENAME, LEGACY_BASENAME
5857
from .model.computationalworkflow import galaxy_to_abstract_cwl
5958
from .model.computerlanguage import get_lang
6059
from .model.testservice import get_service
@@ -76,9 +75,17 @@ def pick_type(json_entity, type_map, fallback=None):
7675
return fallback
7776

7877

78+
def get_version(metadata_properties):
79+
for uri in get_norm_value(metadata_properties, "conformsTo"):
80+
base_uri, version = uri.rsplit("/", 1)
81+
if base_uri.startswith("https://w3id.org/ro/crate"):
82+
return version
83+
return None
84+
85+
7986
class ROCrate():
8087

81-
def __init__(self, source=None, gen_preview=False, init=False, exclude=None):
88+
def __init__(self, source=None, gen_preview=False, init=False, exclude=None, version=DEFAULT_VERSION):
8289
self.mode = None
8390
self.source = source
8491
self.exclude = exclude
@@ -92,7 +99,7 @@ def __init__(self, source=None, gen_preview=False, init=False, exclude=None):
9299
self.add(Preview(self))
93100
if not source:
94101
self.mode = Mode.CREATE
95-
self.add(RootDataset(self), Metadata(self))
102+
self.add(RootDataset(self), Metadata(self, version=version))
96103
elif init:
97104
self.mode = Mode.INIT
98105
if isinstance(source, dict):
@@ -104,19 +111,19 @@ def __init__(self, source=None, gen_preview=False, init=False, exclude=None):
104111
# in the zip case, self.source is the extracted dir
105112
self.source = source
106113

107-
def __init_from_tree(self, top_dir, gen_preview=False):
114+
def __init_from_tree(self, top_dir, gen_preview=False, version=DEFAULT_VERSION):
108115
top_dir = Path(top_dir)
109116
if not top_dir.is_dir():
110117
raise NotADirectoryError(errno.ENOTDIR, f"'{top_dir}': not a directory")
111-
self.add(RootDataset(self), Metadata(self))
118+
self.add(RootDataset(self), Metadata(self, version=version))
112119
for root, dirs, files in walk(top_dir, exclude=self.exclude):
113120
root = Path(root)
114121
for name in dirs:
115122
source = root / name
116123
self.add_dataset(source, source.relative_to(top_dir))
117124
for name in files:
118125
source = root / name
119-
if source == top_dir / Metadata.BASENAME or source == top_dir / LegacyMetadata.BASENAME:
126+
if source == top_dir / BASENAME or source == top_dir / LEGACY_BASENAME:
120127
continue
121128
if source != top_dir / Preview.BASENAME:
122129
self.add_file(source, source.relative_to(top_dir))
@@ -136,11 +143,11 @@ def __read(self, source, gen_preview=False):
136143
with zipfile.ZipFile(source, "r") as zf:
137144
zf.extractall(zip_path)
138145
source = Path(zip_path)
139-
metadata_path = source / Metadata.BASENAME
146+
metadata_path = source / BASENAME
140147
if not metadata_path.is_file():
141-
metadata_path = source / LegacyMetadata.BASENAME
148+
metadata_path = source / LEGACY_BASENAME
142149
if not metadata_path.is_file():
143-
raise ValueError(f"Not a valid RO-Crate: missing {Metadata.BASENAME}")
150+
raise ValueError(f"Not a valid RO-Crate: missing {BASENAME}")
144151
_, entities = read_metadata(metadata_path)
145152
self.__read_data_entities(entities, source, gen_preview)
146153
self.__read_contextual_entities(entities)
@@ -154,9 +161,9 @@ def __read_data_entities(self, entities, source, gen_preview):
154161
assert root_id == root_entity.pop('@id')
155162
parts = as_list(root_entity.pop('hasPart', []))
156163
self.add(RootDataset(self, root_id, properties=root_entity))
157-
MetadataClass = metadata_class(metadata_id)
158164
metadata_properties = entities.pop(metadata_id)
159-
self.add(MetadataClass(self, metadata_id, properties=metadata_properties))
165+
version = get_version(metadata_properties) or DEFAULT_VERSION
166+
self.add(Metadata(self, metadata_id, properties=metadata_properties, version=version))
160167

161168
preview_entity = entities.pop(Preview.BASENAME, None)
162169
if preview_entity and not gen_preview:
@@ -198,18 +205,18 @@ def __read_contextual_entities(self, entities):
198205
@property
199206
def default_entities(self):
200207
return [e for e in self.__entity_map.values()
201-
if isinstance(e, (RootDataset, Metadata, LegacyMetadata, Preview))]
208+
if isinstance(e, (RootDataset, Metadata, Preview))]
202209

203210
@property
204211
def data_entities(self):
205212
return [e for e in self.__entity_map.values()
206-
if not isinstance(e, (RootDataset, Metadata, LegacyMetadata, Preview))
213+
if not isinstance(e, (RootDataset, Metadata, Preview))
207214
and hasattr(e, "write")]
208215

209216
@property
210217
def contextual_entities(self):
211218
return [e for e in self.__entity_map.values()
212-
if not isinstance(e, (RootDataset, Metadata, LegacyMetadata, Preview))
219+
if not isinstance(e, (RootDataset, Metadata, Preview))
213220
and not hasattr(e, "write")]
214221

215222
@property
@@ -300,6 +307,10 @@ def mainEntity(self):
300307
def mainEntity(self, value):
301308
self.root_dataset['mainEntity'] = value
302309

310+
@property
311+
def version(self):
312+
return self.metadata.version
313+
303314
@property
304315
def test_dir(self):
305316
rval = self.dereference("test")
@@ -417,7 +428,7 @@ def add(self, *entities):
417428
key = e.canonical_id()
418429
if isinstance(e, RootDataset):
419430
self.root_dataset = e
420-
elif isinstance(e, (Metadata, LegacyMetadata)):
431+
elif isinstance(e, Metadata):
421432
self.metadata = e
422433
elif isinstance(e, Preview):
423434
self.preview = e

test/conftest.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,24 +21,22 @@
2121
# limitations under the License.
2222

2323
import json
24-
import pathlib
2524
import shutil
25+
from pathlib import Path
2626

2727
import pytest
2828
from rocrate.utils import get_norm_value
2929

3030

31-
THIS_DIR = pathlib.Path(__file__).absolute().parent
31+
THIS_DIR = Path(__file__).absolute().parent
3232
TEST_DATA_NAME = 'test-data'
3333
BASE_URL = 'https://w3id.org/ro/crate'
34-
VERSION = '1.1'
34+
DEFAULT_VERSION = '1.2'
3535
LEGACY_VERSION = '1.0'
3636

3737

3838
class Helpers:
3939

40-
PROFILE = f"{BASE_URL}/{VERSION}"
41-
LEGACY_PROFILE = f"{BASE_URL}/{LEGACY_VERSION}"
4240
WORKFLOW_PROFILE = "https://w3id.org/workflowhub/workflow-ro-crate/1.0"
4341
METADATA_FILE_NAME = 'ro-crate-metadata.json'
4442
LEGACY_METADATA_FILE_NAME = 'ro-crate-metadata.jsonld'
@@ -49,20 +47,23 @@ class Helpers:
4947

5048
@classmethod
5149
def read_json_entities(cls, crate_base_path):
52-
metadata_path = pathlib.Path(crate_base_path) / cls.METADATA_FILE_NAME
50+
crate_base_path = Path(crate_base_path)
51+
metadata_path = crate_base_path / cls.METADATA_FILE_NAME
52+
if not metadata_path.is_file():
53+
metadata_path = crate_base_path / cls.LEGACY_METADATA_FILE_NAME
5354
with open(metadata_path, "rt") as f:
5455
json_data = json.load(f)
5556
return {_["@id"]: _ for _ in json_data["@graph"]}
5657

5758
@classmethod
58-
def check_crate(cls, json_entities, root_id="./", data_entity_ids=None):
59+
def check_crate(cls, json_entities, root_id="./", data_entity_ids=None, version=DEFAULT_VERSION):
5960
assert root_id in json_entities
6061
root = json_entities[root_id]
6162
assert root["@type"] == "Dataset"
6263
assert cls.METADATA_FILE_NAME in json_entities
6364
metadata = json_entities[cls.METADATA_FILE_NAME]
6465
assert metadata["@type"] == "CreativeWork"
65-
assert cls.PROFILE in get_norm_value(metadata, "conformsTo")
66+
assert f"{BASE_URL}/{version}" in get_norm_value(metadata, "conformsTo")
6667
assert metadata["about"] == {"@id": root_id}
6768
if data_entity_ids:
6869
data_entity_ids = set(data_entity_ids)
@@ -91,7 +92,7 @@ def helpers():
9192
# pytest's default tmpdir returns a py.path object
9293
@pytest.fixture
9394
def tmpdir(tmpdir):
94-
return pathlib.Path(tmpdir)
95+
return Path(tmpdir)
9596

9697

9798
@pytest.fixture

test/test-data/crate-1.0/data.csv

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
name,number
2+
foo,1
3+
bar,2
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
{
2+
"@context": "https://w3id.org/ro/crate/1.0/context",
3+
"@graph": [
4+
{
5+
"@id": "ro-crate-metadata.jsonld",
6+
"@type": "CreativeWork",
7+
"about": {"@id": "./"},
8+
"conformsTo": {"@id": "https://w3id.org/ro/crate/1.0"}
9+
},
10+
{
11+
"@id": "./",
12+
"@type": "Dataset",
13+
"name": "Example crate",
14+
"description": "An example RO-Crate",
15+
"datePublished": "2025-10-17",
16+
"license": {"@id": "http://spdx.org/licenses/CC0-1.0"},
17+
"hasPart": [{"@id": "data.csv"}]
18+
},
19+
{
20+
"@id": "data.csv",
21+
"@type": "File",
22+
"name": "CSV data"
23+
},
24+
{
25+
"@id": "http://spdx.org/licenses/CC0-1.0",
26+
"@type": "CreativeWork",
27+
"name": "CC0-1.0"
28+
}
29+
]
30+
}

test/test-data/crate-1.1/data.csv

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
name,number
2+
foo,1
3+
bar,2

0 commit comments

Comments
 (0)