From c95f81d597414ed60b5753deb59a341628d387b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Boris=20Cl=C3=A9net?= Date: Fri, 23 May 2025 10:24:46 +0200 Subject: [PATCH 1/5] Adding a new table for datasets related to provenance --- tools/print_dataset_listing.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/print_dataset_listing.py b/tools/print_dataset_listing.py index c4d714037..0dc310594 100644 --- a/tools/print_dataset_listing.py +++ b/tools/print_dataset_listing.py @@ -57,6 +57,7 @@ "PET": "pet", "qMRI": "", "Phenotype": "phenotype", + "Provenance": "", } DELIMITER = "" @@ -176,6 +177,8 @@ def add_tables(df: pd.DataFrame, output_file: Path, names) -> None: mask = names.str.contains("qmri_") elif table_name == "HED": mask = names.str.contains("_hed_") + elif table_name == "Provenance": + mask = names.str.contains("provenance_") else: mask = df["datatypes"].str.contains(table_datatypes, regex=True) From 594e5571d548863d09322ff2b42229773d581f61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Boris=20Cl=C3=A9net?= Date: Fri, 23 May 2025 10:25:09 +0200 Subject: [PATCH 2/5] BEP028 - provenance dataset for dcm2niix --- README.md | 11 ++++++++ dataset_listing.tsv | 1 + provenance_dcm2niix/README.md | 21 ++++++++++++++++ provenance_dcm2niix/dataset_description.json | 14 +++++++++++ .../prov/prov-dcm2niix_act.json | 14 +++++++++++ .../prov/prov-dcm2niix_ent.json | 9 +++++++ .../prov/prov-dcm2niix_env.json | 9 +++++++ .../prov/prov-dcm2niix_soft.json | 9 +++++++ .../sub-02/anat/sub-02_T1w.json | 25 +++++++++++++++++++ .../sub-02/anat/sub-02_T1w.nii | 0 10 files changed, 113 insertions(+) create mode 100644 provenance_dcm2niix/README.md create mode 100644 provenance_dcm2niix/dataset_description.json create mode 100644 provenance_dcm2niix/prov/prov-dcm2niix_act.json create mode 100644 provenance_dcm2niix/prov/prov-dcm2niix_ent.json create mode 100644 provenance_dcm2niix/prov/prov-dcm2niix_env.json create mode 100644 provenance_dcm2niix/prov/prov-dcm2niix_soft.json create mode 100644 provenance_dcm2niix/sub-02/anat/sub-02_T1w.json create mode 100644 provenance_dcm2niix/sub-02/anat/sub-02_T1w.nii diff --git a/README.md b/README.md index 1f25af9ba..b1c6c373f 100644 --- a/README.md +++ b/README.md @@ -362,3 +362,14 @@ DO NOT EDIT DIRECTLY. | name | description | datatypes | suffixes | link to full data | maintained by | |:--------------------------------------------------------------------------------|:-----------------------------------------------------------------|:----------------|:-----------|:--------------------|:-----------------------------------------| | [pheno004](https://github.com/bids-standard/bids-examples/tree/master/pheno004) | Minimal dataset with subjects with imaging and/or phenotype data | phenotype, anat | T1w | n/a | [@ericearl](https://github.com/ericearl) | + +### Provenance + + + +| name | description | datatypes | suffixes | link to full data | maintained by | +|:------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------|:------------|:-------------------------|:------------------------------------------------------------------------------------------------|:---------------------------------------| +| [provenance_dcm2niix](https://github.com/bids-standard/bids-examples/tree/master/provenance_dcm2niix) | Shows minimal example of provenance records for a DICOM to Nifti conversion, performed by [`dcm2niix`](https://github.com/rordenlab/dcm2niix) | anat | T1w, act, ent, env, soft | This example is buid upon [hirni-demo](https://github.com/psychoinformatics-de/hirni-demo) data | [@bclenet](https://github.com/bclenet) | diff --git a/dataset_listing.tsv b/dataset_listing.tsv index 52cf73cb2..5702248cb 100644 --- a/dataset_listing.tsv +++ b/dataset_listing.tsv @@ -83,3 +83,4 @@ mrs_fmrs Functional MRS data involving a pain stimulus task from 15 subjects [li xeeg_hed_score EEG and iEEG data with annotations of artifacts, seizures and modulators using HED-SCORE [@dorahermes](https://github.com/dorahermes) anat, eeg, ieeg T1w, channels, coordsystem, eeg, electrodes, events, ieeg dwi_deriv exemplifies the storage of diffusion MRI derivates that may be generated on the Siemens XA platform. dwi dwi pheno004 Minimal dataset with subjects with imaging and/or phenotype data [@ericearl](https://github.com/ericearl) phenotype, anat T1w +provenance_dcm2niix Shows minimal example of provenance records for a DICOM to Nifti conversion, performed by [`dcm2niix`](https://github.com/rordenlab/dcm2niix) This example is buid upon [hirni-demo](https://github.com/psychoinformatics-de/hirni-demo) data [@bclenet](https://github.com/bclenet) anat T1w, act, ent, env, soft diff --git a/provenance_dcm2niix/README.md b/provenance_dcm2niix/README.md new file mode 100644 index 000000000..e9f48d4f8 --- /dev/null +++ b/provenance_dcm2niix/README.md @@ -0,0 +1,21 @@ +# BEP028 example dataset - Provenance records for `dcm2niix` + +This example aims at showing provenance records for a DICOM to Nifti conversion, performed by [`dcm2niix`](https://github.com/rordenlab/dcm2niix +). Provenance records were created manually ; they act as a guideline for further machine-generated records by `dcm2niix`. + +After conversion, and adding provenance traces, the directory tree is as follows: + +``` +prov/ +├── prov-dcm2niix_act.json +├── prov-dcm2niix_ent.json +├── prov-dcm2niix_env.json +└── prov-dcm2niix_soft.json +sourcedata/ +sub-02/ +└── anat + ├── sub-02_T1w.json + └── sub-02_T1w.nii +``` + +Note that the `sourcedata/` directory contains the source dataset (DICOM files) known as [hirni-demo](https://github.com/psychoinformatics-de/hirni-demo). diff --git a/provenance_dcm2niix/dataset_description.json b/provenance_dcm2niix/dataset_description.json new file mode 100644 index 000000000..f87e66509 --- /dev/null +++ b/provenance_dcm2niix/dataset_description.json @@ -0,0 +1,14 @@ +{ + "Name": "Provenance records for dcm2niix", + "BIDSVersion": "1.10.0", + "DatasetType": "raw", + "License": "CC0", + "Authors": [ + "Boris Clénet" + ], + "SourceDatasets": [ + { + "URL": "https://github.com/psychoinformatics-de/hirni-demo" + } + ] +} diff --git a/provenance_dcm2niix/prov/prov-dcm2niix_act.json b/provenance_dcm2niix/prov/prov-dcm2niix_act.json new file mode 100644 index 000000000..39ee6ce78 --- /dev/null +++ b/provenance_dcm2niix/prov/prov-dcm2niix_act.json @@ -0,0 +1,14 @@ +{ + "Activities": [ + { + "Id": "bids::prov/#conversion-00f3a18f", + "Label": "Conversion", + "Command": "dcm2niix -o . -f sub-%i/anat/sub-%i_T1w sourcedata/hirni-demo/acq1/dicoms/example-dicom-structural-master/dicoms", + "AssociatedWith": "bids::prov/#dcm2niix-khhkm7u1", + "Used": [ + "bids::prov/#fedora-uldfv058", + "bids::sourcedata/hirni-demo/acq1/dicoms/example-dicom-structural-master/dicoms" + ] + } + ] +} \ No newline at end of file diff --git a/provenance_dcm2niix/prov/prov-dcm2niix_ent.json b/provenance_dcm2niix/prov/prov-dcm2niix_ent.json new file mode 100644 index 000000000..f984e2aa3 --- /dev/null +++ b/provenance_dcm2niix/prov/prov-dcm2niix_ent.json @@ -0,0 +1,9 @@ +{ + "Entities": [ + { + "Id": "bids::sourcedata/hirni-demo/acq1/dicoms/example-dicom-structural-master/dicoms", + "Type": "Entity", + "Label": "dicoms" + } + ] +} diff --git a/provenance_dcm2niix/prov/prov-dcm2niix_env.json b/provenance_dcm2niix/prov/prov-dcm2niix_env.json new file mode 100644 index 000000000..ada5e3b5b --- /dev/null +++ b/provenance_dcm2niix/prov/prov-dcm2niix_env.json @@ -0,0 +1,9 @@ +{ + "Environments": [ + { + "Id": "bids::prov/#fedora-uldfv058", + "Label": "Fedora release 36 (Thirty Six)", + "OperatingSystem": "GNU/Linux 6.2.15-100.fc36.x86_64" + } + ] +} \ No newline at end of file diff --git a/provenance_dcm2niix/prov/prov-dcm2niix_soft.json b/provenance_dcm2niix/prov/prov-dcm2niix_soft.json new file mode 100644 index 000000000..ad80dbdc6 --- /dev/null +++ b/provenance_dcm2niix/prov/prov-dcm2niix_soft.json @@ -0,0 +1,9 @@ +{ + "Software": [ + { + "Id": "bids::prov/#dcm2niix-khhkm7u1", + "Label": "dcm2niix", + "Version": "v1.0.20220720" + } + ] +} \ No newline at end of file diff --git a/provenance_dcm2niix/sub-02/anat/sub-02_T1w.json b/provenance_dcm2niix/sub-02/anat/sub-02_T1w.json new file mode 100644 index 000000000..ad24ca1a6 --- /dev/null +++ b/provenance_dcm2niix/sub-02/anat/sub-02_T1w.json @@ -0,0 +1,25 @@ +{ + "Modality": "MR", + "ManufacturersModelName": "nifti2dicom", + "SoftwareVersions": "0.4.11", + "SeriesDescription": "anat-T1w", + "ProtocolName": "anat-T1w", + "ImageType": ["DERIVED", "SECONDARY"], + "RawImage": false, + "SeriesNumber": 401, + "AcquisitionTime": "13:25:18.000000", + "AcquisitionNumber": 1, + "SliceThickness": 0.666667, + "SpacingBetweenSlices": 0.666667, + "ImageOrientationPatientDICOM": [ + 0.999032, + -0.0217884, + 0.0382096, + 0.0265195, + 0.991414, + -0.128044 ], + "ConversionSoftware": "dcm2niix", + "ConversionSoftwareVersion": "v1.0.20220720", + "GeneratedBy": "bids::prov/#conversion-00f3a18f", + "SidecarGeneratedBy": "bids::prov/#conversion-00f3a18f" +} \ No newline at end of file diff --git a/provenance_dcm2niix/sub-02/anat/sub-02_T1w.nii b/provenance_dcm2niix/sub-02/anat/sub-02_T1w.nii new file mode 100644 index 000000000..e69de29bb From 4d5717107e67e823327eb3080f14295e77ba03ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Boris=20Cl=C3=A9net?= Date: Fri, 11 Jul 2025 16:46:27 +0200 Subject: [PATCH 3/5] removing / after bids::prov --- provenance_dcm2niix/prov/prov-dcm2niix_act.json | 6 +++--- provenance_dcm2niix/prov/prov-dcm2niix_env.json | 2 +- provenance_dcm2niix/prov/prov-dcm2niix_soft.json | 2 +- provenance_dcm2niix/sub-02/anat/sub-02_T1w.json | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/provenance_dcm2niix/prov/prov-dcm2niix_act.json b/provenance_dcm2niix/prov/prov-dcm2niix_act.json index 39ee6ce78..50c5cffcb 100644 --- a/provenance_dcm2niix/prov/prov-dcm2niix_act.json +++ b/provenance_dcm2niix/prov/prov-dcm2niix_act.json @@ -1,12 +1,12 @@ { "Activities": [ { - "Id": "bids::prov/#conversion-00f3a18f", + "Id": "bids::prov#conversion-00f3a18f", "Label": "Conversion", "Command": "dcm2niix -o . -f sub-%i/anat/sub-%i_T1w sourcedata/hirni-demo/acq1/dicoms/example-dicom-structural-master/dicoms", - "AssociatedWith": "bids::prov/#dcm2niix-khhkm7u1", + "AssociatedWith": "bids::prov#dcm2niix-khhkm7u1", "Used": [ - "bids::prov/#fedora-uldfv058", + "bids::prov#fedora-uldfv058", "bids::sourcedata/hirni-demo/acq1/dicoms/example-dicom-structural-master/dicoms" ] } diff --git a/provenance_dcm2niix/prov/prov-dcm2niix_env.json b/provenance_dcm2niix/prov/prov-dcm2niix_env.json index ada5e3b5b..eb877c9b9 100644 --- a/provenance_dcm2niix/prov/prov-dcm2niix_env.json +++ b/provenance_dcm2niix/prov/prov-dcm2niix_env.json @@ -1,7 +1,7 @@ { "Environments": [ { - "Id": "bids::prov/#fedora-uldfv058", + "Id": "bids::prov#fedora-uldfv058", "Label": "Fedora release 36 (Thirty Six)", "OperatingSystem": "GNU/Linux 6.2.15-100.fc36.x86_64" } diff --git a/provenance_dcm2niix/prov/prov-dcm2niix_soft.json b/provenance_dcm2niix/prov/prov-dcm2niix_soft.json index ad80dbdc6..b3331e587 100644 --- a/provenance_dcm2niix/prov/prov-dcm2niix_soft.json +++ b/provenance_dcm2niix/prov/prov-dcm2niix_soft.json @@ -1,7 +1,7 @@ { "Software": [ { - "Id": "bids::prov/#dcm2niix-khhkm7u1", + "Id": "bids::prov#dcm2niix-khhkm7u1", "Label": "dcm2niix", "Version": "v1.0.20220720" } diff --git a/provenance_dcm2niix/sub-02/anat/sub-02_T1w.json b/provenance_dcm2niix/sub-02/anat/sub-02_T1w.json index ad24ca1a6..ccc223901 100644 --- a/provenance_dcm2niix/sub-02/anat/sub-02_T1w.json +++ b/provenance_dcm2niix/sub-02/anat/sub-02_T1w.json @@ -20,6 +20,6 @@ -0.128044 ], "ConversionSoftware": "dcm2niix", "ConversionSoftwareVersion": "v1.0.20220720", - "GeneratedBy": "bids::prov/#conversion-00f3a18f", - "SidecarGeneratedBy": "bids::prov/#conversion-00f3a18f" + "GeneratedBy": "bids::prov#conversion-00f3a18f", + "SidecarGeneratedBy": "bids::prov#conversion-00f3a18f" } \ No newline at end of file From 50640093292a4d50b4204d8094df5f2eef6c4aa8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Boris=20Cl=C3=A9net?= Date: Tue, 22 Jul 2025 17:09:37 +0200 Subject: [PATCH 4/5] Adding AltIdentifier for dcm2niix --- provenance_dcm2niix/prov/prov-dcm2niix_soft.json | 1 + 1 file changed, 1 insertion(+) diff --git a/provenance_dcm2niix/prov/prov-dcm2niix_soft.json b/provenance_dcm2niix/prov/prov-dcm2niix_soft.json index b3331e587..7f931ed9a 100644 --- a/provenance_dcm2niix/prov/prov-dcm2niix_soft.json +++ b/provenance_dcm2niix/prov/prov-dcm2niix_soft.json @@ -3,6 +3,7 @@ { "Id": "bids::prov#dcm2niix-khhkm7u1", "Label": "dcm2niix", + "AltIdentifier": "RRID:SCR_023517", "Version": "v1.0.20220720" } ] From 0e6c92b1155c059d200c25d2ea935940df82aeb5 Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Fri, 3 Oct 2025 12:08:26 -0400 Subject: [PATCH 5/5] chore: Ignore provenance on old validators, use spec#2099 schema --- .github/workflows/validate_datasets.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/validate_datasets.yml b/.github/workflows/validate_datasets.yml index 9c4981926..3b3edb2cb 100644 --- a/.github/workflows/validate_datasets.yml +++ b/.github/workflows/validate_datasets.yml @@ -107,7 +107,7 @@ jobs: fi - name: Skip legacy validation for post-legacy datasets - run: for DS in mrs_* dwi_deriv pet006 pheno004 volume_timing; do touch $DS/.SKIP_VALIDATION; done + run: for DS in mrs_* dwi_deriv pet006 pheno004 volume_timing provenance_*; do touch $DS/.SKIP_VALIDATION; done if: matrix.bids-validator == 'legacy' - name: Skip stable validation for datasets with unreleased validator features @@ -119,7 +119,7 @@ jobs: - name: Skip main validation for datasets with unreleased spec features # Replace ${EMPTY} with dataset patterns, when this is needed # Reset to "for DS in ${EMPTY}; ..." after a spec release - run: for DS in dwi_deriv pheno004; do touch $DS/.SKIP_VALIDATION; done + run: for DS in dwi_deriv pheno004 provenance_*; do touch $DS/.SKIP_VALIDATION; done if: matrix.bids-validator != 'dev' - name: Set BIDS_SCHEMA variable for dev version @@ -128,7 +128,7 @@ jobs: # Update this URL to the schema.json from PRs to the spec, when needed. # If this variable is unset, dev will generally track the latest development # release of https://jsr.io/@bids/schema - run: echo BIDS_SCHEMA=https://bids-specification.readthedocs.io/en/latest/schema.json >> $GITHUB_ENV + run: echo BIDS_SCHEMA=https://bids-specification--2099.org.readthedocs.build/en/2099/schema.json >> $GITHUB_ENV - name: Validate all BIDS datasets using bids-validator run: |