diff --git a/mkdocs.yml b/mkdocs.yml index 28ba5d5b5a..0f5a1691fb 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -10,6 +10,7 @@ nav: - Data summary files: modality-agnostic-files/data-summary-files.md - Phenotypic and assessment data: modality-agnostic-files/phenotypic-and-assessment-data.md - Code: modality-agnostic-files/code.md + - Provenance: modality-agnostic-files/provenance.md - Events: modality-agnostic-files/events.md - Modality specific files: - Magnetic Resonance Imaging: modality-specific-files/magnetic-resonance-imaging-data.md @@ -120,6 +121,9 @@ markdown_extensions: - name: tsvgz class: tsv format: !!python/name:bidsschematools.render.tsv.fence + - name: mermaid + class: mermaid + format: !!python/name:pymdownx.superfences.fence_code_format - admonition - pymdownx.details plugins: diff --git a/src/introduction.md b/src/introduction.md index 2a5a377fe7..f0070de928 100644 --- a/src/introduction.md +++ b/src/introduction.md @@ -192,6 +192,15 @@ For example: Scientific Data 12, (13841). [doi:10.1038/s41597-025-05543-2](https://doi.org/10.1038/s41597-025-05543-2) +### Other extensions specific publications + +#### Provenance + +- Rémi Adon, Stefan Appelhoff, Tibor Auer, Laurent Guillo, Yaroslav O Halchenko, David Keator, Christopher J Markiewicz, Thomas E Nichols, Jean-Baptiste Poline, Satrajit Ghosh, Camille Maumet (2021). + **BIDS-prov: a provenance framework for BIDS**. + OHBM 2021 - 25th Annual Meeting of the Organization for Human Brain Mapping, Jun 2021, Online, South Korea. pp.1-3 + [https://inserm.hal.science/inserm-03478998v1](https://inserm.hal.science/inserm-03478998v1) + ### Research Resource Identifier (RRID) BIDS has also a diff --git a/src/metaschema.json b/src/metaschema.json index e5d4c7b400..ea06b89fa0 100644 --- a/src/metaschema.json +++ b/src/metaschema.json @@ -359,6 +359,15 @@ }, "additionalProperties": false }, + "modality_agnostic": { + "type": "object", + "patternProperties": { + "^[a-zA-Z0-9_]+$": { + "$ref": "#/definitions/suffixRule" + } + }, + "additionalProperties": false + }, "tables": { "type": "object", "patternProperties": { @@ -372,7 +381,7 @@ "additionalProperties": false } }, - "required": ["core", "tables"], + "required": ["core", "modality_agnostic", "tables"], "additionalProperties": false }, "deriv": { diff --git a/src/modality-agnostic-files/dataset-description.md b/src/modality-agnostic-files/dataset-description.md index 79d75264c3..c4f45f259c 100644 --- a/src/modality-agnostic-files/dataset-description.md +++ b/src/modality-agnostic-files/dataset-description.md @@ -45,16 +45,9 @@ and a guide for using macros can be found at } ) }} -Each object in the `GeneratedBy` array includes the following REQUIRED, RECOMMENDED -and OPTIONAL keys: - - -{{ MACROS___make_subobject_table("metadata.GeneratedBy.items") }} +!!! Note + See the [Provenance of a BIDS dataset](provenance.md#provenance-of-a-bids-dataset) section + for more information on how to describe provenance using the `GeneratedBy` field. Example: @@ -107,8 +100,6 @@ Example: } ``` -### Derived dataset and pipeline description - As for any BIDS dataset, a `dataset_description.json` file MUST be found at the top level of every derived dataset: `/derivatives//dataset_description.json`. diff --git a/src/modality-agnostic-files/provenance-context.json b/src/modality-agnostic-files/provenance-context.json new file mode 100644 index 0000000000..4947242e70 --- /dev/null +++ b/src/modality-agnostic-files/provenance-context.json @@ -0,0 +1,47 @@ +{ + "@context": { + "prov" : "http://www.w3.org/ns/prov#", + "xsd": "http://www.w3.org/2001/XMLSchema#", + "RRID": "http://scicrunch.org/resolver/", + "Id": "@id", + "Type": "@type", + "Label": "http://www.w3.org/2000/01/rdf-schema#label", + "StartedAtTime" : { + "@id": "prov:startedAtTime", + "@type": "http://www.w3.org/2001/XMLSchema#dateTime" + }, + "EndedAtTime" : { + "@id": "prov:endedAtTime", + "@type": "xsd:dateTime" + }, + "GeneratedBy" : { + "@id": "prov:wasGeneratedBy", + "@type": "@id" + }, + "AttributedTo" : { + "@id": "prov:wasAttributedTo", + "@type": "@id" + }, + "AssociatedWith" : { + "@id": "prov:wasAssociatedWith", + "@type": "@id" + }, + "InformedBy" : { + "@id": "prov:wasInformedBy", + "@type": "@id" + }, + "Used" : { + "@id": "prov:used", + "@type": "@id" + }, + "ActedOnBehalfOf" : { + "@id": "prov:actedOnBehalfOf", + "@type": "@id" + }, + "Entities": "prov:Entity", + "Environments": "prov:Entity", + "Activities": "prov:Activity", + "Software": "prov:SoftwareAgent", + "Atlocation": "prov:atLocation" + } +} diff --git a/src/modality-agnostic-files/provenance.md b/src/modality-agnostic-files/provenance.md new file mode 100644 index 0000000000..79f71bafc9 --- /dev/null +++ b/src/modality-agnostic-files/provenance.md @@ -0,0 +1,682 @@ +# Provenance + +Support for provenance was developed as a [BIDS Extension Proposal](../extensions.md#bids-extension-proposals). +Please see [Citing BIDS](../introduction.md#citing-bids) on how to appropriately credit this extension when referring to it in the +context of the academic literature. + +!!! bug + Change example links below once they are merged to bids-examples + +!!! example "Example datasets" + + The following examples have been formatted using this specification + and can be used for practical guidance when curating a new dataset. + + - [Provenance of DICOM to Nifti conversion with `dcm2niix`](https://github.com/bclenet/bids-examples/tree/BEP028_dcm2niix/provenance_dcm2niix) - [Associated Pull Request #494](https://github.com/bids-standard/bids-examples/pull/494) + - [Provenance of DICOM to Nifti conversion with `heudiconv`](https://github.com/bclenet/bids-examples/tree/BEP028_heudiconv/provenance_heudiconv) - [Associated Pull Request #496](https://github.com/bids-standard/bids-examples/pull/496) + - [Provenance of fMRI preprocessing with `SPM`](https://github.com/bclenet/bids-examples/tree/BEP028_spm/provenance_spm) - [Associated Pull Request #497](https://github.com/bids-standard/bids-examples/pull/497) + - [Provenance of fMRI preprocessing with `fMRIPrep`](https://github.com/bclenet/bids-examples/tree/BEP028_fmriprep/provenance_fmriprep) - [Associated Pull Request#502](https://github.com/bids-standard/bids-examples/pull/502) + + Further datasets are available from + the [BIDS examples repository](https://bids-website.readthedocs.io/en/latest/datasets/examples.html#provenance). + +## Overview + +This part of the BIDS specification is aimed at describing the provenance of a BIDS dataset. + +This description is retrospective: it describes a set of steps that were executed in order to obtain the dataset. + +!!! note + This is different from prospective provenance that focuses describing workflows that may be run on a dataset. + +This description is based on the [W3C Prov](https://www.w3.org/TR/2013/REC-prov-o-20130430/) standard. + +### General principles + +Provenance information SHOULD be included in a BIDS dataset when possible. + +If provenance information is included, it MUST be described using the conventions detailed by this specification. + +Provenance information reflects the provenance of a full dataset and/or of specific files at any level of the BIDS hierarchy. + +Provenance information SHOULD not include human subject identifying data. + +### Key concepts + +Provenance information is encoded using metadata fields. + +For the most part, this metadata consists of **provenance objects** of 4 types: + +- **Activities**: transformations that have been applied to data. +- **Entities**: input or output data for activities. +- **Software**: software packages used to compute the activities. +- **Environments**: software environments in which activities were performed. + +!!! example "Minimal provenance example" + + ```mermaid + flowchart BT + B[Brain extraction] -->|wasAssociatedWith| S{FSL
} + B -->|used| T1([sub-001_T1w.nii]) + B -->|used| L((Linux)) + T1p([sub-001_T1w_preproc.nii]) -->|wasGeneratedBy| B + ``` + + In this example, a brain extraction algorithm was applied on a T1-weighted image: + + - *sub-001_T1w.nii* is the original T1-weighted image; + - *sub-001_T1w_preproc.nii* is the skull striped image; + - the *"Brain extraction"* activity was performed using the *FSL* software within a *Linux* software environment. + +Provenance objects are described as JSON objects in BIDS. They are stored inside **provenance files** (see [Provenance files](#provenance-files)). + +Additionally, metadata of entities can be stored as regular BIDS metadata inside: + +- sidecar JSON files (see [Provenance of a BIDS file](#provenance-of-a-bids-file)); +- `dataset_description.json` files (see [Provenance of a BIDS dataset](#provenance-of-a-bids-dataset)). + +## Provenance files + +Template: + +```text +prov/ + [/] + prov-