From 5323a04b555d8d01291f9fdc3cd5bdf28560b42e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Boris=20Cl=C3=A9net?= <boris.clenet@inria.fr>
Date: Fri, 23 May 2025 11:34:01 +0200
Subject: [PATCH 1/4] BEP028 - Provenance for heudiconv example

---
 README.md                                     |   5 +-
 dataset_listing.tsv                           |   1 +
 provenance_heudiconv/.bidsignore              |   1 +
 .../02/ses-acq1/info/02_ses-acq1.auto.txt     |   1 +
 .../02/ses-acq1/info/02_ses-acq1.edit.txt     |   1 +
 .../.heudiconv/02/ses-acq1/info/heuristic.py  | 355 ++++++++++++++++++
 provenance_heudiconv/CHANGES                  |   5 +
 provenance_heudiconv/README                   |  38 ++
 provenance_heudiconv/dataset_description.json |  25 ++
 provenance_heudiconv/participants.json        |  14 +
 provenance_heudiconv/participants.tsv         |   2 +
 .../prov/prov-heudiconv_act.json              |  26 ++
 .../prov/prov-heudiconv_ent.json              |  56 +++
 .../prov/prov-heudiconv_env.json              |  13 +
 .../prov/prov-heudiconv_soft.json             |  15 +
 provenance_heudiconv/scans.json               |  16 +
 provenance_heudiconv/sourcedata/README        |   2 +
 .../sub-001/anat/sub-001_run-1_T1w.json       |  26 ++
 .../sub-001/anat/sub-001_run-1_T1w.nii.gz     |   0
 .../sub-001/sub-001_scans.tsv                 |   2 +
 20 files changed, 602 insertions(+), 2 deletions(-)
 create mode 100644 provenance_heudiconv/.bidsignore
 create mode 100644 provenance_heudiconv/.heudiconv/02/ses-acq1/info/02_ses-acq1.auto.txt
 create mode 100644 provenance_heudiconv/.heudiconv/02/ses-acq1/info/02_ses-acq1.edit.txt
 create mode 100644 provenance_heudiconv/.heudiconv/02/ses-acq1/info/heuristic.py
 create mode 100644 provenance_heudiconv/CHANGES
 create mode 100644 provenance_heudiconv/README
 create mode 100644 provenance_heudiconv/dataset_description.json
 create mode 100644 provenance_heudiconv/participants.json
 create mode 100644 provenance_heudiconv/participants.tsv
 create mode 100644 provenance_heudiconv/prov/prov-heudiconv_act.json
 create mode 100644 provenance_heudiconv/prov/prov-heudiconv_ent.json
 create mode 100644 provenance_heudiconv/prov/prov-heudiconv_env.json
 create mode 100644 provenance_heudiconv/prov/prov-heudiconv_soft.json
 create mode 100644 provenance_heudiconv/scans.json
 create mode 100644 provenance_heudiconv/sourcedata/README
 create mode 100644 provenance_heudiconv/sub-001/anat/sub-001_run-1_T1w.json
 create mode 100644 provenance_heudiconv/sub-001/anat/sub-001_run-1_T1w.nii.gz
 create mode 100644 provenance_heudiconv/sub-001/sub-001_scans.tsv

diff --git a/README.md b/README.md
index 34a0ce31a..88fc2c8b0 100644
--- a/README.md
+++ b/README.md
@@ -370,5 +370,6 @@ TABLE BELOW IS GENERATED AUTOMATICALLY.
 DO NOT EDIT DIRECTLY.
 -->
 
-| name   | description   | datatypes   | suffixes   | link to full data   | maintained by   |
-|--------|---------------|-------------|------------|---------------------|-----------------|
+| name                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        | description   | datatypes   | suffixes   | link to full data   | maintained by   |
+|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------|:------------|:-----------|:--------------------|:----------------|
+| [provenance_heudiconv Shows minimal example of provenance records for a DICOM to Nifti conversion, performed by [`heudiconv`](https://heudiconv.readthedocs.io/en/latest/) This example is buid upon [hirni-demo](https://github.com/psychoinformatics-de/hirni-demo) data [@bclenet](https://github.com/bclenet) anat T1w, act, ent, env, soft](https://github.com/bids-standard/bids-examples/tree/master/provenance_heudiconv Shows minimal example of provenance records for a DICOM to Nifti conversion, performed by [`heudiconv`](https://heudiconv.readthedocs.io/en/latest/) This example is buid upon [hirni-demo](https://github.com/psychoinformatics-de/hirni-demo) data [@bclenet](https://github.com/bclenet) anat T1w, act, ent, env, soft) | n/a           | n/a         | n/a        | n/a                 | n/a             |
diff --git a/dataset_listing.tsv b/dataset_listing.tsv
index 52cf73cb2..b53f4f223 100644
--- a/dataset_listing.tsv
+++ b/dataset_listing.tsv
@@ -83,3 +83,4 @@ mrs_fmrs	Functional MRS data involving a pain stimulus task from 15 subjects	[li
 xeeg_hed_score	EEG and iEEG data with annotations of artifacts, seizures and modulators using HED-SCORE 		[@dorahermes](https://github.com/dorahermes)	anat, eeg, ieeg	T1w, channels, coordsystem, eeg, electrodes, events, ieeg
 dwi_deriv	exemplifies the storage of diffusion MRI derivates that may be generated on the Siemens XA platform.			dwi	dwi
 pheno004	Minimal dataset with subjects with imaging and/or phenotype data		[@ericearl](https://github.com/ericearl)	phenotype, anat	T1w
+provenance_heudiconv Shows minimal example of provenance records for a DICOM to Nifti conversion, performed by [`heudiconv`](https://heudiconv.readthedocs.io/en/latest/) This example is buid upon [hirni-demo](https://github.com/psychoinformatics-de/hirni-demo) data [@bclenet](https://github.com/bclenet) anat T1w, act, ent, env, soft
\ No newline at end of file
diff --git a/provenance_heudiconv/.bidsignore b/provenance_heudiconv/.bidsignore
new file mode 100644
index 000000000..692fd97d0
--- /dev/null
+++ b/provenance_heudiconv/.bidsignore
@@ -0,0 +1 @@
+.duecredit.p
\ No newline at end of file
diff --git a/provenance_heudiconv/.heudiconv/02/ses-acq1/info/02_ses-acq1.auto.txt b/provenance_heudiconv/.heudiconv/02/ses-acq1/info/02_ses-acq1.auto.txt
new file mode 100644
index 000000000..72bd4f08f
--- /dev/null
+++ b/provenance_heudiconv/.heudiconv/02/ses-acq1/info/02_ses-acq1.auto.txt
@@ -0,0 +1 @@
+{('sub-001/anat/sub-001_run-1_T1w', ('nii.gz',), None): ['401-anat-T1w']}
\ No newline at end of file
diff --git a/provenance_heudiconv/.heudiconv/02/ses-acq1/info/02_ses-acq1.edit.txt b/provenance_heudiconv/.heudiconv/02/ses-acq1/info/02_ses-acq1.edit.txt
new file mode 100644
index 000000000..72bd4f08f
--- /dev/null
+++ b/provenance_heudiconv/.heudiconv/02/ses-acq1/info/02_ses-acq1.edit.txt
@@ -0,0 +1 @@
+{('sub-001/anat/sub-001_run-1_T1w', ('nii.gz',), None): ['401-anat-T1w']}
\ No newline at end of file
diff --git a/provenance_heudiconv/.heudiconv/02/ses-acq1/info/heuristic.py b/provenance_heudiconv/.heudiconv/02/ses-acq1/info/heuristic.py
new file mode 100644
index 000000000..82750c96f
--- /dev/null
+++ b/provenance_heudiconv/.heudiconv/02/ses-acq1/info/heuristic.py
@@ -0,0 +1,355 @@
+import logging
+import lzma
+from simplejson import loads as json_loads
+from os import environ
+
+lgr = logging.getLogger(__name__)
+
+
+# map the various guesses to the cannonical labels
+modality_label_map = {
+    't1': 'T1w',
+    't1w': 'T1w',
+    't2': 'T2w',
+    't2w': 'T2w',
+    't1rho': 'T1rho',
+    't1map': 'T1map',
+    't2map': 'T2map',
+    't2star': 'T2star',
+    'flair': 'FLAIR',
+    'flash': 'FLASH',
+    'pd': 'PD',
+    'pdmap': 'PDmap',
+    'pdt2': 'PDT2',
+    'inplanet1': 'inplaneT1',
+    'inplanet2': 'inplaneT2',
+}
+
+# map the cannonical modality labels to data_type labels
+datatype_labels_map = {
+    'bold': 'func',
+    'sbref': 'func',
+
+    'T1w': 'anat',
+    'T2w': 'anat',
+    'T1rho': 'anat',
+    'T1map': 'anat',
+    'T2map': 'anat',
+    'T2star': 'anat',
+    'FLAIR': 'anat',
+    'FLASH': 'anat',
+    'PD': 'anat',
+    'PDmap': 'anat',
+    'PDT2': 'anat',
+    'inplaneT1': 'anat',
+    'inplaneT2': 'anat',
+    'angio': 'anat',
+
+    'swi': 'swi',
+    'dwi': 'dwi',
+
+    'phasediff': 'fmap',
+    'phase1': 'fmap',
+    'phase2': 'fmap',
+    'magnitude1': 'fmap',
+    'magnitude2': 'fmap',
+    'fieldmap': 'fmap',
+
+    'epi': 'fmap',  # TODO?
+}
+
+# map specification keys to BIDS abbreviation used in paths
+spec2bids_map = {
+    'subject': "sub",
+    'anon-subject': "sub",
+    'bids-session': "ses",
+    'bids-task': "task",
+    'bids-run': "run",
+    'bids-modality': "mod",
+    'bids-acquisition': "acq",
+    'bids-scan': "scan",
+    'bids-contrast-enhancement': "ce",
+    'bids-reconstruction-algorithm': "rec",
+    'bids-echo': "echo",
+    'bids-direction': "dir",
+
+    # SWI Extension:
+    'bids-part': "part",
+    'bids-coil': "coil",
+
+}
+
+
+def get_specval(spec, key):
+    return spec[key]['value']
+
+
+def has_specval(spec, key):
+    return key in spec and 'value' in spec[key] and spec[key]['value']
+
+
+# Snippet from https://github.com/datalad/datalad to avoid depending on it for
+# just one function:
+def LZMAFile(*args, **kwargs):
+    """A little decorator to overcome a bug in lzma
+
+    A unique to yoh and some others bug with pyliblzma
+    calling dir() helps to avoid AttributeError __exit__
+    see https://bugs.launchpad.net/pyliblzma/+bug/1219296
+    """
+    lzmafile = lzma.LZMAFile(*args, **kwargs)
+    dir(lzmafile)
+    return lzmafile
+
+
+def loads(s, *args, **kwargs):
+    """Helper to log actual value which failed to be parsed"""
+    try:
+        return json_loads(s, *args, **kwargs)
+    except:
+        lgr.error(
+            "Failed to load content from %r with args=%r kwargs=%r"
+            % (s, args, kwargs)
+        )
+        raise
+
+
+def load_stream(fname, compressed=False):
+
+    _open = LZMAFile if compressed else open
+    with _open(fname, mode='r') as f:
+        for line in f:
+            yield loads(line)
+
+# END datalad Snippet
+
+
+def create_key(template, outtype=('nii.gz',), annotation_classes=None):
+    if template is None or not template:
+        raise ValueError('Template must be a valid format string')
+
+    return template, outtype, annotation_classes
+
+
+class SpecLoader(object):
+    """
+    Persistent object to hold the study specification and not read the JSON on
+    each invocation of `infotodict`. Module level attribute for the spec itself
+    doesn't work, since the env variable isn't necessarily available at first
+    import.
+    """
+
+    def __init__(self):
+        self._spec = None
+        # get chosen subject id (orig or anon) from env var
+        self.subject = environ.get('HIRNI_SPEC2BIDS_SUBJECT')
+
+    def get_study_spec(self):
+        if self._spec is None:
+            filename = environ.get('HIRNI_STUDY_SPEC')
+            if filename:
+                self._spec = [d for d in load_stream(filename)
+                              if d['type'] == 'dicomseries']
+            else:
+                # TODO: Just raise or try a default location first?
+                raise ValueError("No study specification provided. "
+                                 "Set environment variable HIRNI_STUDY_SPEC "
+                                 "to do so.")
+        return self._spec
+
+
+_spec = SpecLoader()
+
+
+def validate_spec(spec):
+
+    if not spec:
+        raise ValueError("Image series specification is empty.")
+
+    tags = spec.get('tags', None)
+    if tags and 'hirni-dicom-converter-ignore' in tags:
+        lgr.debug("Skip series %s (marked 'ignore' in spec)", spec['uid'])
+        return False
+
+    # mandatory keys for any spec dict (not only dicomseries)
+    for k in spec.keys():
+        # automatically managed keys with no subdict:
+        # TODO: Where to define this list?
+        # TODO: Test whether those are actually present!
+        if k in ['type', 'location', 'uid', 'dataset-id',
+                 'dataset-refcommit', 'procedures', 'tags']:
+            continue
+        if 'value' not in spec[k]:
+            lgr.warning("DICOM series specification (UID: {uid}) has no value "
+                        "for key '{key}'.".format(uid=spec['uid'], key=k))
+            return False
+
+    if spec['type'] != 'dicomseries':
+        lgr.warning("Specification not of type 'dicomseries'.")
+        return False
+
+    if 'uid' not in spec.keys() or not spec['uid']:
+        lgr.warning("Missing image series UID.")
+        return False
+
+    for var in ('bids-modality',):
+        if not has_specval(spec, var):
+            lgr.warning("Missing specification value for key '%s'", var)
+            return False
+
+    return True
+
+
+# TODO: can be removed, whenever nipy/heudiconv #197 is solved
+def infotoids(seqinfos, outdir):
+    return {'locator': None,
+            'session': None,
+            'subject': None}
+
+
+def infotodict(seqinfo):  # pragma: no cover
+    """Heuristic evaluator for determining which runs belong where
+
+    allowed template fields - follow python string module:
+
+    item: index within category
+    subject: participant id
+    seqitem: run number during scanning
+    subindex: sub index within group
+    """
+
+    info = dict()
+    for idx, s in enumerate(seqinfo):
+
+        # find in spec:
+        candidates = [series for series in _spec.get_study_spec()
+                      if str(s.series_uid) == series['uid']]
+        if not candidates:
+            raise ValueError("Found no match for seqinfo: %s" % str(s))
+        if len(candidates) != 1:
+            raise ValueError("Found %s match(es) for series UID %s" %
+                             (len(candidates), s.uid))
+        series_spec = candidates[0]
+
+        if not validate_spec(series_spec):
+            lgr.debug("Series invalid (%s). Skip.", str(s.series_uid))
+            continue
+
+        dirname = filename = "sub-{}".format(_spec.subject)
+        # session
+        if has_specval(series_spec, 'bids-session'):
+            ses = get_specval(series_spec, 'bids-session')
+            dirname += "/ses-{}".format(ses)
+            filename += "_ses-{}".format(ses)
+
+        # data type
+        modality = get_specval(series_spec, 'bids-modality')
+        # make cannonical if possible
+        modality = modality_label_map.get(modality, modality)
+        # apply fixed mapping from modality -> data_type
+        data_type = datatype_labels_map[modality]
+
+        dirname += "/{}".format(data_type)
+
+        # TODO: Once special cases (like when to use '_mod-' prefix for modality
+        # are clear, integrate data type selection with spec_key list and
+        # thereby reduce code duplication further
+
+        if data_type == 'func':
+            # func/sub-<participant_label>[_ses-<session_label>]
+            # _task-<task_label>[_acq-<label>][_rec-<label>][_run-<index>][_echo-<index>]_<modality_label>.nii[.gz]
+
+            for spec_key in ['bids-task', 'bids-acquisition',
+                             'bids-reconstruction_algorithm', 'bids-run',
+                             'bids-echo']:
+                if has_specval(series_spec, spec_key):
+                    filename += "_{}-{}".format(
+                            spec2bids_map[spec_key],
+                            get_specval(series_spec, spec_key))
+
+            filename += "_{}".format(modality)
+
+        if data_type == 'anat':
+            # anat/sub-<participant_label>[_ses-<session_label>]
+            # [_acq-<label>][_ce-<label>][_rec-<label>][_run-<index>][_mod-<label>]_<modality_label>.nii[.gz]
+
+            for spec_key in ['bids-acquisition',
+                             'bids-contrast_enhancement',
+                             'bids-reconstruction_algorithm',
+                             'bids-run']:
+                if has_specval(series_spec, spec_key):
+                    filename += "_{}-{}".format(
+                            spec2bids_map[spec_key],
+                            get_specval(series_spec, spec_key))
+
+            # TODO: [_mod-<label>]  (modality if defaced, right?)
+            #       => simple bool 'defaced' in spec or is there more to it?
+
+            filename += "_{}".format(modality)
+
+        if data_type == 'dwi':
+            # dwi/sub-<participant_label>[_ses-<session_label>]
+            # [_acq-<label>][_run-<index>]_dwi.nii[.gz]
+
+            for spec_key in ['bids-acquisition',
+                             'bids-run']:
+                if has_specval(series_spec, spec_key):
+                    filename += "_{}-{}".format(
+                            spec2bids_map[spec_key],
+                            get_specval(series_spec, spec_key))
+
+            # TODO: Double check: Is this always correct?
+            filename += "_dwi"
+
+        if data_type == 'swi':
+            # BIDS-Extension:
+            # https://docs.google.com/document/d/1kyw9mGgacNqeMbp4xZet3RnDhcMmf4_BmRgKaOkO2Sc
+            # swi/sub-<participant_label>[_ses-<session_label>]
+            #       [_acq-<label>][_rec-<label>]_part-<phase|mag>[_coil-<index>][_echo-<index>][_run-<index>]_GRE.nii[.gz]
+
+            for spec_key in ['bids-acquisition',
+                             'bids-reconstruction_algorithm',
+                             'bids-part',
+                             'bids-coil',
+                             'bids-echo',
+                             'bids-run',
+                             ]:
+                if has_specval(series_spec, spec_key):
+                    filename += "_{}-{}".format(
+                            spec2bids_map[spec_key],
+                            get_specval(series_spec, spec_key))
+
+            filename += "_GRE"
+
+        if data_type == 'fmap':
+            # Case 1: Phase difference image and at least one magnitude image
+            # sub-<participant_label>/[ses-<session_label>/]
+            # [_acq-<label>][_dir-<dir_label>][_run-<run_index>]_<modality_label>.nii[.gz]
+
+            # Note/TODO: fmap modalities:
+            # _phasediff
+            # _magnitude1
+            # _magnitude2
+            # _phase1
+            # _phase2
+            # _magnitude
+            # _fieldmap
+            # _epi
+
+            for spec_key in ['bids-acquisition',
+                             'bids-direction',
+                             'bids-run']:
+                if has_specval(series_spec, spec_key):
+                    filename += "_{}-{}".format(
+                            spec2bids_map[spec_key],
+                            get_specval(series_spec, spec_key))
+
+            filename += "_{}".format(modality)
+
+        key = create_key(dirname + '/' + filename)
+        if key not in info:
+            info[key] = []
+
+        info[key].append(s[2])
+
+    return info
diff --git a/provenance_heudiconv/CHANGES b/provenance_heudiconv/CHANGES
new file mode 100644
index 000000000..061247385
--- /dev/null
+++ b/provenance_heudiconv/CHANGES
@@ -0,0 +1,5 @@
+0.0.1  Initial data acquired
+TODOs:
+	- verify and possibly extend information in participants.tsv (see for example http://datasets.datalad.org/?dir=/openfmri/ds000208)
+	- fill out dataset_description.json, README, sourcedata/README (if present)
+	- provide _events.tsv file for each _bold.nii.gz with onsets of events (see  '8.5 Task events'  of BIDS specification)
\ No newline at end of file
diff --git a/provenance_heudiconv/README b/provenance_heudiconv/README
new file mode 100644
index 000000000..0e143555b
--- /dev/null
+++ b/provenance_heudiconv/README
@@ -0,0 +1,38 @@
+# BEP028 example dataset - Provenance records for `heudiconv`
+
+This example aims at showing provenance records for a DICOM to Nifti conversion, performed by [`heudiconv`](https://heudiconv.readthedocs.io/en/latest/). Provenance records were created manually ; they act as a guideline for further machine-generated records by `heudiconv`. 
+
+After conversion, and adding provenance traces, the directory tree is as follows:
+
+```
+.
+├── ⭐ .bidsignore
+├── ⭐ CHANGES
+├── ⭐✍️ dataset_description.json
+├── ⭐ .heudiconv/
+├── ⭐ participants.json
+├── ⭐ participants.tsv
+├── prov/
+│   ├── ✍️ prov-heudiconv_act.json
+│   ├── ✍️ prov-heudiconv_ent.json
+│   ├── ✍️ prov-heudiconv_env.json
+│   └── ✍️ prov-heudiconv_soft.json
+├── ⭐✍️ README
+├── ⭐ scans.json
+├── sourcedata/
+│   ├── hirni-demo
+│   │   └── ...
+│   └── ⭐ README
+└── ⭐ sub-001/
+    ├── ⭐ anat
+    │   ├── ⭐ sub-001_run-1_T1w.json
+    │   └── ⭐ sub-001_run-1_T1w.nii.gz
+    └── ⭐ sub-001_scans.tsv
+```
+
+About symbols:
+* files with a ⭐ were generated by `heudiconv` ;
+* files with a ✍️ were generated manually ;
+* files with ⭐✍️ were generated by `heudiconv` and then edited by hand.
+
+Note that the `sourcedata/` directory contains the source dataset (DICOM files) known as [hirni-demo](https://github.com/psychoinformatics-de/hirni-demo).
diff --git a/provenance_heudiconv/dataset_description.json b/provenance_heudiconv/dataset_description.json
new file mode 100644
index 000000000..e551b6597
--- /dev/null
+++ b/provenance_heudiconv/dataset_description.json
@@ -0,0 +1,25 @@
+{
+  "Acknowledgements": "TODO: whom you want to acknowledge",
+  "Authors": [
+    "Boris Clénet"
+  ],
+  "BIDSVersion": "1.8.0",
+  "DatasetDOI": "TODO: eventually a DOI for the dataset",
+  "Funding": [
+    "TODO",
+    "GRANT #1",
+    "GRANT #2"
+  ],
+  "HowToAcknowledge": "TODO: describe how to acknowledge -- either cite a corresponding paper, or just in acknowledgement section",
+  "License": "TODO: choose a license, e.g. PDDL (http://opendatacommons.org/licenses/pddl/)",
+  "Name": "Provenance records for heudiconv",
+  "ReferencesAndLinks": [
+    "TODO",
+    "List of papers or websites"
+  ],
+  "SourceDatasets": [
+    {
+      "URL": "https://github.com/psychoinformatics-de/hirni-demo"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/provenance_heudiconv/participants.json b/provenance_heudiconv/participants.json
new file mode 100644
index 000000000..0d9e3ad57
--- /dev/null
+++ b/provenance_heudiconv/participants.json
@@ -0,0 +1,14 @@
+{
+  "participant_id": {
+    "Description": "Participant identifier"
+  },
+  "age": {
+    "Description": "Age in years (TODO - verify) as in the initial session, might not be correct for other sessions"
+  },
+  "sex": {
+    "Description": "self-rated by participant, M for male/F for female (TODO: verify)"
+  },
+  "group": {
+    "Description": "(TODO: adjust - by default everyone is in control group)"
+  }
+}
\ No newline at end of file
diff --git a/provenance_heudiconv/participants.tsv b/provenance_heudiconv/participants.tsv
new file mode 100644
index 000000000..08e275deb
--- /dev/null
+++ b/provenance_heudiconv/participants.tsv
@@ -0,0 +1,2 @@
+participant_id	age	sex	group
+sub-02	42	F	control
diff --git a/provenance_heudiconv/prov/prov-heudiconv_act.json b/provenance_heudiconv/prov/prov-heudiconv_act.json
new file mode 100644
index 000000000..a17d3956c
--- /dev/null
+++ b/provenance_heudiconv/prov/prov-heudiconv_act.json
@@ -0,0 +1,26 @@
+{
+  "Activities": [
+    {
+      "Id": "bids::prov/#preparation-conversion-1xkhm1ft",
+      "Label": "Preparation to conversion",
+      "Command": "heudiconv --files sourcedata/hirni-demo/acq1/dicoms/example-dicom-structural-master/dicoms/*.dcm -o . -f sourcedata/hirni-demo/code/hirni-toolbox/converters/heudiconv/hirni_heuristic.py -s 02 -ss acq1 -c dcm2niix -b --minmeta --overwrite",
+      "AssociatedWith": "bids::prov/#heudiconv-a9x5yd3j",
+      "Used": [
+        "bids::prov/#fedora-1cu6r6ou",
+        "bids::sourcedata/hirni-demo/acq1/studyspec.json",
+        "bids::sourcedata/hirni-demo/acq1/dicoms/example-dicom-structural-master/dicoms",
+        "bids::sourcedata/hirni-demo/code/hirni-toolbox/converters/heudiconv/hirni_heuristic.py"
+      ]
+    },
+    {
+      "Id": "bids::prov/#conversion-00f3a18f",
+      "Label": "Conversion",
+      "Command": "dcm2niix",
+      "AssociatedWith": "bids::prov/#dcm2niix-r4a7zxc0",
+      "Used": [
+        "bids::prov/#fedora-1cu6r6ou",
+        "bids::sourcedata/hirni-demo/acq1/dicoms/example-dicom-structural-master/dicoms"
+      ]
+    }
+  ]
+}
diff --git a/provenance_heudiconv/prov/prov-heudiconv_ent.json b/provenance_heudiconv/prov/prov-heudiconv_ent.json
new file mode 100644
index 000000000..b42bb4da1
--- /dev/null
+++ b/provenance_heudiconv/prov/prov-heudiconv_ent.json
@@ -0,0 +1,56 @@
+{
+  "Entities": [
+    {
+      "Id": "bids::sourcedata/hirni-demo/code/hirni-toolbox/converters/heudiconv/hirni_heuristic.py",
+      "Label": "heuristic"
+    },
+    {
+      "Id": "bids::sourcedata/hirni-demo/acq1/studyspec.json",
+      "Label": "studyspec"
+    },
+    {
+      "Id": "bids::sourcedata/hirni-demo/acq1/dicoms/example-dicom-structural-master/dicoms",
+      "Label": "dicoms"
+    },
+    {
+      "Id": "bids::.bidsignore",
+      "Label": ".bidsignore",
+      "GeneratedBy": "bids::prov/#preparation-conversion-1xkhm1ft"
+    },
+    {
+      "Id": "bids::.heudiconv/",
+      "Label": ".heudiconv/",
+      "GeneratedBy": "bids::prov/#preparation-conversion-1xkhm1ft"
+    },
+    {
+      "Id": "bids::CHANGES",
+      "Label": "CHANGES",
+      "GeneratedBy": "bids::prov/#preparation-conversion-1xkhm1ft"
+    },
+    {
+      "Id": "bids::dataset_description.json",
+      "Label": "dataset_description.json",
+      "GeneratedBy": "bids::prov/#preparation-conversion-1xkhm1ft"
+    },
+    {
+      "Id": "bids::participants.json",
+      "Label": "participants.json",
+      "GeneratedBy": "bids::prov/#preparation-conversion-1xkhm1ft"
+    },
+    {
+      "Id": "bids::participants.tsv",
+      "Label": "participants.tsv",
+      "GeneratedBy": "bids::prov/#preparation-conversion-1xkhm1ft"
+    },
+    {
+      "Id": "bids::README",
+      "Label": "README",
+      "GeneratedBy": "bids::prov/#preparation-conversion-1xkhm1ft"
+    },
+    {
+      "Id": "bids::scans.json",
+      "Label": "scans.json",
+      "GeneratedBy": "bids::prov/#preparation-conversion-1xkhm1ft"
+    }
+  ]
+}
diff --git a/provenance_heudiconv/prov/prov-heudiconv_env.json b/provenance_heudiconv/prov/prov-heudiconv_env.json
new file mode 100644
index 000000000..2268aa1e3
--- /dev/null
+++ b/provenance_heudiconv/prov/prov-heudiconv_env.json
@@ -0,0 +1,13 @@
+{
+  "Environments": [
+    {
+      "Id": "bids::prov/#fedora-1cu6r6ou",
+      "Label": "Fedora release 36 (Thirty Six)",
+      "OperatingSystem": "GNU/Linux 6.2.15-100.fc36.x86_64",
+      "EnvVars": {
+        "HIRNI_STUDY_SPEC": "sourcedata/hirni-demo/acq1/studyspec.json",
+        "HIRNI_SPEC2BIDS_SUBJECT": "001"
+      }
+    }
+  ]
+}
diff --git a/provenance_heudiconv/prov/prov-heudiconv_soft.json b/provenance_heudiconv/prov/prov-heudiconv_soft.json
new file mode 100644
index 000000000..3d4312ef5
--- /dev/null
+++ b/provenance_heudiconv/prov/prov-heudiconv_soft.json
@@ -0,0 +1,15 @@
+{
+  "Software": [
+    {
+      "Id": "bids::prov/#heudiconv-a9x5yd3j",
+      "Label": "heudiconv",
+      "Version": "v1.3.2"
+    },
+    {
+      "Id": "bids::prov/#dcm2niix-r4a7zxc0",
+      "Label": "dcm2niix",
+      "Version": "v1.3.2",
+      "prov:actedOnBehalfOf": "bids::prov/#heudiconv-a9x5yd3j"
+    }
+  ]
+}
diff --git a/provenance_heudiconv/scans.json b/provenance_heudiconv/scans.json
new file mode 100644
index 000000000..8733b9e8c
--- /dev/null
+++ b/provenance_heudiconv/scans.json
@@ -0,0 +1,16 @@
+{
+  "filename": {
+    "Description": "Name of the nifti file"
+  },
+  "acq_time": {
+    "LongName": "Acquisition time",
+    "Description": "Acquisition time of the particular scan"
+  },
+  "operator": {
+    "Description": "Name of the operator"
+  },
+  "randstr": {
+    "LongName": "Random string",
+    "Description": "md5 hash of UIDs"
+  }
+}
\ No newline at end of file
diff --git a/provenance_heudiconv/sourcedata/README b/provenance_heudiconv/sourcedata/README
new file mode 100644
index 000000000..699eb76c8
--- /dev/null
+++ b/provenance_heudiconv/sourcedata/README
@@ -0,0 +1,2 @@
+TODO: Provide description about source data, e.g. 
+Directory below contains DICOMS compressed into tarballs per each sequence, replicating directory hierarchy of the BIDS dataset itself.
\ No newline at end of file
diff --git a/provenance_heudiconv/sub-001/anat/sub-001_run-1_T1w.json b/provenance_heudiconv/sub-001/anat/sub-001_run-1_T1w.json
new file mode 100644
index 000000000..c3aa29700
--- /dev/null
+++ b/provenance_heudiconv/sub-001/anat/sub-001_run-1_T1w.json
@@ -0,0 +1,26 @@
+{
+  "AcquisitionNumber": 1,
+  "AcquisitionTime": "13:25:18.000000",
+  "ConversionSoftware": "dcm2niix",
+  "ConversionSoftwareVersion": "v1.0.20220720",
+  "HeudiconvVersion": "1.3.2",
+  "ImageOrientationPatientDICOM": [0.999032, -0.0217884, 0.0382096, 0.0265195, 0.991414, -0.128044],
+  "ImageType": [
+    "DERIVED",
+    "SECONDARY"
+],
+  "ManufacturersModelName": "nifti2dicom",
+  "Modality": "MR",
+  "ProtocolName": "anat-T1w",
+  "RawImage": false,
+  "SeriesDescription": "anat-T1w",
+  "SeriesNumber": 401,
+  "SliceThickness": 0.666667,
+  "SoftwareVersions": "0.4.11",
+  "SpacingBetweenSlices": 0.666667,
+  "GeneratedBy": "bids::prov/#conversion-00f3a18f",
+  "SidecarGeneratedBy": [
+    "bids::prov/#preparation-conversion-1xkhm1ft",
+    "bids::prov/#conversion-00f3a18f"
+  ]
+}
\ No newline at end of file
diff --git a/provenance_heudiconv/sub-001/anat/sub-001_run-1_T1w.nii.gz b/provenance_heudiconv/sub-001/anat/sub-001_run-1_T1w.nii.gz
new file mode 100644
index 000000000..e69de29bb
diff --git a/provenance_heudiconv/sub-001/sub-001_scans.tsv b/provenance_heudiconv/sub-001/sub-001_scans.tsv
new file mode 100644
index 000000000..aa5e62479
--- /dev/null
+++ b/provenance_heudiconv/sub-001/sub-001_scans.tsv
@@ -0,0 +1,2 @@
+filename	acq_time	operator	randstr
+anat/sub-001_run-1_T1w.nii.gz	2013-07-17T13:25:18	n/a	e72d11b8

From 38655bf3213d98775a197d74bf90e7b4d7b361d0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Boris=20Cl=C3=A9net?= <boris.clenet@inria.fr>
Date: Fri, 13 Jun 2025 10:51:06 +0200
Subject: [PATCH 2/4] Participant id mismatch

---
 provenance_heudiconv/participants.tsv             | 2 +-
 provenance_heudiconv/prov/prov-heudiconv_act.json | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/provenance_heudiconv/participants.tsv b/provenance_heudiconv/participants.tsv
index 08e275deb..2b8a2cfe9 100644
--- a/provenance_heudiconv/participants.tsv
+++ b/provenance_heudiconv/participants.tsv
@@ -1,2 +1,2 @@
 participant_id	age	sex	group
-sub-02	42	F	control
+sub-001	42	F	control
diff --git a/provenance_heudiconv/prov/prov-heudiconv_act.json b/provenance_heudiconv/prov/prov-heudiconv_act.json
index a17d3956c..523c4bc69 100644
--- a/provenance_heudiconv/prov/prov-heudiconv_act.json
+++ b/provenance_heudiconv/prov/prov-heudiconv_act.json
@@ -3,7 +3,7 @@
     {
       "Id": "bids::prov/#preparation-conversion-1xkhm1ft",
       "Label": "Preparation to conversion",
-      "Command": "heudiconv --files sourcedata/hirni-demo/acq1/dicoms/example-dicom-structural-master/dicoms/*.dcm -o . -f sourcedata/hirni-demo/code/hirni-toolbox/converters/heudiconv/hirni_heuristic.py -s 02 -ss acq1 -c dcm2niix -b --minmeta --overwrite",
+      "Command": "heudiconv --files sourcedata/hirni-demo/acq1/dicoms/example-dicom-structural-master/dicoms/*.dcm -o . -f sourcedata/hirni-demo/code/hirni-toolbox/converters/heudiconv/hirni_heuristic.py -s 001 -ss acq1 -c dcm2niix -b --minmeta --overwrite",
       "AssociatedWith": "bids::prov/#heudiconv-a9x5yd3j",
       "Used": [
         "bids::prov/#fedora-1cu6r6ou",

From 93568b1b53f254c7d0ff972db9856135cac1bbd9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Boris=20Cl=C3=A9net?= <boris.clenet@inria.fr>
Date: Fri, 13 Jun 2025 10:56:02 +0200
Subject: [PATCH 3/4] Codespell in dataset listing

---
 README.md           | 6 +++---
 dataset_listing.tsv | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 88fc2c8b0..2403bd72b 100644
--- a/README.md
+++ b/README.md
@@ -370,6 +370,6 @@ TABLE BELOW IS GENERATED AUTOMATICALLY.
 DO NOT EDIT DIRECTLY.
 -->
 
-| name                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        | description   | datatypes   | suffixes   | link to full data   | maintained by   |
-|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------|:------------|:-----------|:--------------------|:----------------|
-| [provenance_heudiconv Shows minimal example of provenance records for a DICOM to Nifti conversion, performed by [`heudiconv`](https://heudiconv.readthedocs.io/en/latest/) This example is buid upon [hirni-demo](https://github.com/psychoinformatics-de/hirni-demo) data [@bclenet](https://github.com/bclenet) anat T1w, act, ent, env, soft](https://github.com/bids-standard/bids-examples/tree/master/provenance_heudiconv Shows minimal example of provenance records for a DICOM to Nifti conversion, performed by [`heudiconv`](https://heudiconv.readthedocs.io/en/latest/) This example is buid upon [hirni-demo](https://github.com/psychoinformatics-de/hirni-demo) data [@bclenet](https://github.com/bclenet) anat T1w, act, ent, env, soft) | n/a           | n/a         | n/a        | n/a                 | n/a             |
+| name                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          | description   | datatypes   | suffixes   | link to full data   | maintained by   |
+|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------|:------------|:-----------|:--------------------|:----------------|
+| [provenance_heudiconv Shows minimal example of provenance records for a DICOM to Nifti conversion, performed by [`heudiconv`](https://heudiconv.readthedocs.io/en/latest/) This example is build upon [hirni-demo](https://github.com/psychoinformatics-de/hirni-demo) data [@bclenet](https://github.com/bclenet) anat T1w, act, ent, env, soft](https://github.com/bids-standard/bids-examples/tree/master/provenance_heudiconv Shows minimal example of provenance records for a DICOM to Nifti conversion, performed by [`heudiconv`](https://heudiconv.readthedocs.io/en/latest/) This example is build upon [hirni-demo](https://github.com/psychoinformatics-de/hirni-demo) data [@bclenet](https://github.com/bclenet) anat T1w, act, ent, env, soft) | n/a           | n/a         | n/a        | n/a                 | n/a             |
diff --git a/dataset_listing.tsv b/dataset_listing.tsv
index b53f4f223..20dc487c2 100644
--- a/dataset_listing.tsv
+++ b/dataset_listing.tsv
@@ -83,4 +83,4 @@ mrs_fmrs	Functional MRS data involving a pain stimulus task from 15 subjects	[li
 xeeg_hed_score	EEG and iEEG data with annotations of artifacts, seizures and modulators using HED-SCORE 		[@dorahermes](https://github.com/dorahermes)	anat, eeg, ieeg	T1w, channels, coordsystem, eeg, electrodes, events, ieeg
 dwi_deriv	exemplifies the storage of diffusion MRI derivates that may be generated on the Siemens XA platform.			dwi	dwi
 pheno004	Minimal dataset with subjects with imaging and/or phenotype data		[@ericearl](https://github.com/ericearl)	phenotype, anat	T1w
-provenance_heudiconv Shows minimal example of provenance records for a DICOM to Nifti conversion, performed by [`heudiconv`](https://heudiconv.readthedocs.io/en/latest/) This example is buid upon [hirni-demo](https://github.com/psychoinformatics-de/hirni-demo) data [@bclenet](https://github.com/bclenet) anat T1w, act, ent, env, soft
\ No newline at end of file
+provenance_heudiconv Shows minimal example of provenance records for a DICOM to Nifti conversion, performed by [`heudiconv`](https://heudiconv.readthedocs.io/en/latest/) This example is build upon [hirni-demo](https://github.com/psychoinformatics-de/hirni-demo) data [@bclenet](https://github.com/bclenet) anat T1w, act, ent, env, soft
\ No newline at end of file

From f33aa6082c3e573f565535c06028821adec2c508 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Boris=20Cl=C3=A9net?= <boris.clenet@inria.fr>
Date: Fri, 11 Jul 2025 16:52:05 +0200
Subject: [PATCH 4/4] removing / after bids::prov

---
 .../prov/prov-heudiconv_act.json                 | 12 ++++++------
 .../prov/prov-heudiconv_ent.json                 | 16 ++++++++--------
 .../prov/prov-heudiconv_env.json                 |  2 +-
 .../prov/prov-heudiconv_soft.json                |  6 +++---
 .../sub-001/anat/sub-001_run-1_T1w.json          |  6 +++---
 5 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/provenance_heudiconv/prov/prov-heudiconv_act.json b/provenance_heudiconv/prov/prov-heudiconv_act.json
index 523c4bc69..43aba2bd0 100644
--- a/provenance_heudiconv/prov/prov-heudiconv_act.json
+++ b/provenance_heudiconv/prov/prov-heudiconv_act.json
@@ -1,24 +1,24 @@
 {
   "Activities": [
     {
-      "Id": "bids::prov/#preparation-conversion-1xkhm1ft",
+      "Id": "bids::prov#preparation-conversion-1xkhm1ft",
       "Label": "Preparation to conversion",
       "Command": "heudiconv --files sourcedata/hirni-demo/acq1/dicoms/example-dicom-structural-master/dicoms/*.dcm -o . -f sourcedata/hirni-demo/code/hirni-toolbox/converters/heudiconv/hirni_heuristic.py -s 001 -ss acq1 -c dcm2niix -b --minmeta --overwrite",
-      "AssociatedWith": "bids::prov/#heudiconv-a9x5yd3j",
+      "AssociatedWith": "bids::prov#heudiconv-a9x5yd3j",
       "Used": [
-        "bids::prov/#fedora-1cu6r6ou",
+        "bids::prov#fedora-1cu6r6ou",
         "bids::sourcedata/hirni-demo/acq1/studyspec.json",
         "bids::sourcedata/hirni-demo/acq1/dicoms/example-dicom-structural-master/dicoms",
         "bids::sourcedata/hirni-demo/code/hirni-toolbox/converters/heudiconv/hirni_heuristic.py"
       ]
     },
     {
-      "Id": "bids::prov/#conversion-00f3a18f",
+      "Id": "bids::prov#conversion-00f3a18f",
       "Label": "Conversion",
       "Command": "dcm2niix",
-      "AssociatedWith": "bids::prov/#dcm2niix-r4a7zxc0",
+      "AssociatedWith": "bids::prov#dcm2niix-r4a7zxc0",
       "Used": [
-        "bids::prov/#fedora-1cu6r6ou",
+        "bids::prov#fedora-1cu6r6ou",
         "bids::sourcedata/hirni-demo/acq1/dicoms/example-dicom-structural-master/dicoms"
       ]
     }
diff --git a/provenance_heudiconv/prov/prov-heudiconv_ent.json b/provenance_heudiconv/prov/prov-heudiconv_ent.json
index b42bb4da1..3ecfdc9c9 100644
--- a/provenance_heudiconv/prov/prov-heudiconv_ent.json
+++ b/provenance_heudiconv/prov/prov-heudiconv_ent.json
@@ -15,42 +15,42 @@
     {
       "Id": "bids::.bidsignore",
       "Label": ".bidsignore",
-      "GeneratedBy": "bids::prov/#preparation-conversion-1xkhm1ft"
+      "GeneratedBy": "bids::prov#preparation-conversion-1xkhm1ft"
     },
     {
       "Id": "bids::.heudiconv/",
       "Label": ".heudiconv/",
-      "GeneratedBy": "bids::prov/#preparation-conversion-1xkhm1ft"
+      "GeneratedBy": "bids::prov#preparation-conversion-1xkhm1ft"
     },
     {
       "Id": "bids::CHANGES",
       "Label": "CHANGES",
-      "GeneratedBy": "bids::prov/#preparation-conversion-1xkhm1ft"
+      "GeneratedBy": "bids::prov#preparation-conversion-1xkhm1ft"
     },
     {
       "Id": "bids::dataset_description.json",
       "Label": "dataset_description.json",
-      "GeneratedBy": "bids::prov/#preparation-conversion-1xkhm1ft"
+      "GeneratedBy": "bids::prov#preparation-conversion-1xkhm1ft"
     },
     {
       "Id": "bids::participants.json",
       "Label": "participants.json",
-      "GeneratedBy": "bids::prov/#preparation-conversion-1xkhm1ft"
+      "GeneratedBy": "bids::prov#preparation-conversion-1xkhm1ft"
     },
     {
       "Id": "bids::participants.tsv",
       "Label": "participants.tsv",
-      "GeneratedBy": "bids::prov/#preparation-conversion-1xkhm1ft"
+      "GeneratedBy": "bids::prov#preparation-conversion-1xkhm1ft"
     },
     {
       "Id": "bids::README",
       "Label": "README",
-      "GeneratedBy": "bids::prov/#preparation-conversion-1xkhm1ft"
+      "GeneratedBy": "bids::prov#preparation-conversion-1xkhm1ft"
     },
     {
       "Id": "bids::scans.json",
       "Label": "scans.json",
-      "GeneratedBy": "bids::prov/#preparation-conversion-1xkhm1ft"
+      "GeneratedBy": "bids::prov#preparation-conversion-1xkhm1ft"
     }
   ]
 }
diff --git a/provenance_heudiconv/prov/prov-heudiconv_env.json b/provenance_heudiconv/prov/prov-heudiconv_env.json
index 2268aa1e3..cdffa3fc6 100644
--- a/provenance_heudiconv/prov/prov-heudiconv_env.json
+++ b/provenance_heudiconv/prov/prov-heudiconv_env.json
@@ -1,7 +1,7 @@
 {
   "Environments": [
     {
-      "Id": "bids::prov/#fedora-1cu6r6ou",
+      "Id": "bids::prov#fedora-1cu6r6ou",
       "Label": "Fedora release 36 (Thirty Six)",
       "OperatingSystem": "GNU/Linux 6.2.15-100.fc36.x86_64",
       "EnvVars": {
diff --git a/provenance_heudiconv/prov/prov-heudiconv_soft.json b/provenance_heudiconv/prov/prov-heudiconv_soft.json
index 3d4312ef5..c23d177f3 100644
--- a/provenance_heudiconv/prov/prov-heudiconv_soft.json
+++ b/provenance_heudiconv/prov/prov-heudiconv_soft.json
@@ -1,15 +1,15 @@
 {
   "Software": [
     {
-      "Id": "bids::prov/#heudiconv-a9x5yd3j",
+      "Id": "bids::prov#heudiconv-a9x5yd3j",
       "Label": "heudiconv",
       "Version": "v1.3.2"
     },
     {
-      "Id": "bids::prov/#dcm2niix-r4a7zxc0",
+      "Id": "bids::prov#dcm2niix-r4a7zxc0",
       "Label": "dcm2niix",
       "Version": "v1.3.2",
-      "prov:actedOnBehalfOf": "bids::prov/#heudiconv-a9x5yd3j"
+      "prov:actedOnBehalfOf": "bids::prov#heudiconv-a9x5yd3j"
     }
   ]
 }
diff --git a/provenance_heudiconv/sub-001/anat/sub-001_run-1_T1w.json b/provenance_heudiconv/sub-001/anat/sub-001_run-1_T1w.json
index c3aa29700..11c49fae7 100644
--- a/provenance_heudiconv/sub-001/anat/sub-001_run-1_T1w.json
+++ b/provenance_heudiconv/sub-001/anat/sub-001_run-1_T1w.json
@@ -18,9 +18,9 @@
   "SliceThickness": 0.666667,
   "SoftwareVersions": "0.4.11",
   "SpacingBetweenSlices": 0.666667,
-  "GeneratedBy": "bids::prov/#conversion-00f3a18f",
+  "GeneratedBy": "bids::prov#conversion-00f3a18f",
   "SidecarGeneratedBy": [
-    "bids::prov/#preparation-conversion-1xkhm1ft",
-    "bids::prov/#conversion-00f3a18f"
+    "bids::prov#preparation-conversion-1xkhm1ft",
+    "bids::prov#conversion-00f3a18f"
   ]
 }
\ No newline at end of file