From 5323a04b555d8d01291f9fdc3cd5bdf28560b42e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Boris=20Cl=C3=A9net?= Date: Fri, 23 May 2025 11:34:01 +0200 Subject: [PATCH 1/4] BEP028 - Provenance for heudiconv example --- README.md | 5 +- dataset_listing.tsv | 1 + provenance_heudiconv/.bidsignore | 1 + .../02/ses-acq1/info/02_ses-acq1.auto.txt | 1 + .../02/ses-acq1/info/02_ses-acq1.edit.txt | 1 + .../.heudiconv/02/ses-acq1/info/heuristic.py | 355 ++++++++++++++++++ provenance_heudiconv/CHANGES | 5 + provenance_heudiconv/README | 38 ++ provenance_heudiconv/dataset_description.json | 25 ++ provenance_heudiconv/participants.json | 14 + provenance_heudiconv/participants.tsv | 2 + .../prov/prov-heudiconv_act.json | 26 ++ .../prov/prov-heudiconv_ent.json | 56 +++ .../prov/prov-heudiconv_env.json | 13 + .../prov/prov-heudiconv_soft.json | 15 + provenance_heudiconv/scans.json | 16 + provenance_heudiconv/sourcedata/README | 2 + .../sub-001/anat/sub-001_run-1_T1w.json | 26 ++ .../sub-001/anat/sub-001_run-1_T1w.nii.gz | 0 .../sub-001/sub-001_scans.tsv | 2 + 20 files changed, 602 insertions(+), 2 deletions(-) create mode 100644 provenance_heudiconv/.bidsignore create mode 100644 provenance_heudiconv/.heudiconv/02/ses-acq1/info/02_ses-acq1.auto.txt create mode 100644 provenance_heudiconv/.heudiconv/02/ses-acq1/info/02_ses-acq1.edit.txt create mode 100644 provenance_heudiconv/.heudiconv/02/ses-acq1/info/heuristic.py create mode 100644 provenance_heudiconv/CHANGES create mode 100644 provenance_heudiconv/README create mode 100644 provenance_heudiconv/dataset_description.json create mode 100644 provenance_heudiconv/participants.json create mode 100644 provenance_heudiconv/participants.tsv create mode 100644 provenance_heudiconv/prov/prov-heudiconv_act.json create mode 100644 provenance_heudiconv/prov/prov-heudiconv_ent.json create mode 100644 provenance_heudiconv/prov/prov-heudiconv_env.json create mode 100644 provenance_heudiconv/prov/prov-heudiconv_soft.json create mode 100644 provenance_heudiconv/scans.json create mode 100644 provenance_heudiconv/sourcedata/README create mode 100644 provenance_heudiconv/sub-001/anat/sub-001_run-1_T1w.json create mode 100644 provenance_heudiconv/sub-001/anat/sub-001_run-1_T1w.nii.gz create mode 100644 provenance_heudiconv/sub-001/sub-001_scans.tsv diff --git a/README.md b/README.md index 34a0ce31a..88fc2c8b0 100644 --- a/README.md +++ b/README.md @@ -370,5 +370,6 @@ TABLE BELOW IS GENERATED AUTOMATICALLY. DO NOT EDIT DIRECTLY. --> -| name | description | datatypes | suffixes | link to full data | maintained by | -|--------|---------------|-------------|------------|---------------------|-----------------| +| name | description | datatypes | suffixes | link to full data | maintained by | +|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------|:------------|:-----------|:--------------------|:----------------| +| [provenance_heudiconv Shows minimal example of provenance records for a DICOM to Nifti conversion, performed by [`heudiconv`](https://heudiconv.readthedocs.io/en/latest/) This example is buid upon [hirni-demo](https://github.com/psychoinformatics-de/hirni-demo) data [@bclenet](https://github.com/bclenet) anat T1w, act, ent, env, soft](https://github.com/bids-standard/bids-examples/tree/master/provenance_heudiconv Shows minimal example of provenance records for a DICOM to Nifti conversion, performed by [`heudiconv`](https://heudiconv.readthedocs.io/en/latest/) This example is buid upon [hirni-demo](https://github.com/psychoinformatics-de/hirni-demo) data [@bclenet](https://github.com/bclenet) anat T1w, act, ent, env, soft) | n/a | n/a | n/a | n/a | n/a | diff --git a/dataset_listing.tsv b/dataset_listing.tsv index 52cf73cb2..b53f4f223 100644 --- a/dataset_listing.tsv +++ b/dataset_listing.tsv @@ -83,3 +83,4 @@ mrs_fmrs Functional MRS data involving a pain stimulus task from 15 subjects [li xeeg_hed_score EEG and iEEG data with annotations of artifacts, seizures and modulators using HED-SCORE [@dorahermes](https://github.com/dorahermes) anat, eeg, ieeg T1w, channels, coordsystem, eeg, electrodes, events, ieeg dwi_deriv exemplifies the storage of diffusion MRI derivates that may be generated on the Siemens XA platform. dwi dwi pheno004 Minimal dataset with subjects with imaging and/or phenotype data [@ericearl](https://github.com/ericearl) phenotype, anat T1w +provenance_heudiconv Shows minimal example of provenance records for a DICOM to Nifti conversion, performed by [`heudiconv`](https://heudiconv.readthedocs.io/en/latest/) This example is buid upon [hirni-demo](https://github.com/psychoinformatics-de/hirni-demo) data [@bclenet](https://github.com/bclenet) anat T1w, act, ent, env, soft \ No newline at end of file diff --git a/provenance_heudiconv/.bidsignore b/provenance_heudiconv/.bidsignore new file mode 100644 index 000000000..692fd97d0 --- /dev/null +++ b/provenance_heudiconv/.bidsignore @@ -0,0 +1 @@ +.duecredit.p \ No newline at end of file diff --git a/provenance_heudiconv/.heudiconv/02/ses-acq1/info/02_ses-acq1.auto.txt b/provenance_heudiconv/.heudiconv/02/ses-acq1/info/02_ses-acq1.auto.txt new file mode 100644 index 000000000..72bd4f08f --- /dev/null +++ b/provenance_heudiconv/.heudiconv/02/ses-acq1/info/02_ses-acq1.auto.txt @@ -0,0 +1 @@ +{('sub-001/anat/sub-001_run-1_T1w', ('nii.gz',), None): ['401-anat-T1w']} \ No newline at end of file diff --git a/provenance_heudiconv/.heudiconv/02/ses-acq1/info/02_ses-acq1.edit.txt b/provenance_heudiconv/.heudiconv/02/ses-acq1/info/02_ses-acq1.edit.txt new file mode 100644 index 000000000..72bd4f08f --- /dev/null +++ b/provenance_heudiconv/.heudiconv/02/ses-acq1/info/02_ses-acq1.edit.txt @@ -0,0 +1 @@ +{('sub-001/anat/sub-001_run-1_T1w', ('nii.gz',), None): ['401-anat-T1w']} \ No newline at end of file diff --git a/provenance_heudiconv/.heudiconv/02/ses-acq1/info/heuristic.py b/provenance_heudiconv/.heudiconv/02/ses-acq1/info/heuristic.py new file mode 100644 index 000000000..82750c96f --- /dev/null +++ b/provenance_heudiconv/.heudiconv/02/ses-acq1/info/heuristic.py @@ -0,0 +1,355 @@ +import logging +import lzma +from simplejson import loads as json_loads +from os import environ + +lgr = logging.getLogger(__name__) + + +# map the various guesses to the cannonical labels +modality_label_map = { + 't1': 'T1w', + 't1w': 'T1w', + 't2': 'T2w', + 't2w': 'T2w', + 't1rho': 'T1rho', + 't1map': 'T1map', + 't2map': 'T2map', + 't2star': 'T2star', + 'flair': 'FLAIR', + 'flash': 'FLASH', + 'pd': 'PD', + 'pdmap': 'PDmap', + 'pdt2': 'PDT2', + 'inplanet1': 'inplaneT1', + 'inplanet2': 'inplaneT2', +} + +# map the cannonical modality labels to data_type labels +datatype_labels_map = { + 'bold': 'func', + 'sbref': 'func', + + 'T1w': 'anat', + 'T2w': 'anat', + 'T1rho': 'anat', + 'T1map': 'anat', + 'T2map': 'anat', + 'T2star': 'anat', + 'FLAIR': 'anat', + 'FLASH': 'anat', + 'PD': 'anat', + 'PDmap': 'anat', + 'PDT2': 'anat', + 'inplaneT1': 'anat', + 'inplaneT2': 'anat', + 'angio': 'anat', + + 'swi': 'swi', + 'dwi': 'dwi', + + 'phasediff': 'fmap', + 'phase1': 'fmap', + 'phase2': 'fmap', + 'magnitude1': 'fmap', + 'magnitude2': 'fmap', + 'fieldmap': 'fmap', + + 'epi': 'fmap', # TODO? +} + +# map specification keys to BIDS abbreviation used in paths +spec2bids_map = { + 'subject': "sub", + 'anon-subject': "sub", + 'bids-session': "ses", + 'bids-task': "task", + 'bids-run': "run", + 'bids-modality': "mod", + 'bids-acquisition': "acq", + 'bids-scan': "scan", + 'bids-contrast-enhancement': "ce", + 'bids-reconstruction-algorithm': "rec", + 'bids-echo': "echo", + 'bids-direction': "dir", + + # SWI Extension: + 'bids-part': "part", + 'bids-coil': "coil", + +} + + +def get_specval(spec, key): + return spec[key]['value'] + + +def has_specval(spec, key): + return key in spec and 'value' in spec[key] and spec[key]['value'] + + +# Snippet from https://github.com/datalad/datalad to avoid depending on it for +# just one function: +def LZMAFile(*args, **kwargs): + """A little decorator to overcome a bug in lzma + + A unique to yoh and some others bug with pyliblzma + calling dir() helps to avoid AttributeError __exit__ + see https://bugs.launchpad.net/pyliblzma/+bug/1219296 + """ + lzmafile = lzma.LZMAFile(*args, **kwargs) + dir(lzmafile) + return lzmafile + + +def loads(s, *args, **kwargs): + """Helper to log actual value which failed to be parsed""" + try: + return json_loads(s, *args, **kwargs) + except: + lgr.error( + "Failed to load content from %r with args=%r kwargs=%r" + % (s, args, kwargs) + ) + raise + + +def load_stream(fname, compressed=False): + + _open = LZMAFile if compressed else open + with _open(fname, mode='r') as f: + for line in f: + yield loads(line) + +# END datalad Snippet + + +def create_key(template, outtype=('nii.gz',), annotation_classes=None): + if template is None or not template: + raise ValueError('Template must be a valid format string') + + return template, outtype, annotation_classes + + +class SpecLoader(object): + """ + Persistent object to hold the study specification and not read the JSON on + each invocation of `infotodict`. Module level attribute for the spec itself + doesn't work, since the env variable isn't necessarily available at first + import. + """ + + def __init__(self): + self._spec = None + # get chosen subject id (orig or anon) from env var + self.subject = environ.get('HIRNI_SPEC2BIDS_SUBJECT') + + def get_study_spec(self): + if self._spec is None: + filename = environ.get('HIRNI_STUDY_SPEC') + if filename: + self._spec = [d for d in load_stream(filename) + if d['type'] == 'dicomseries'] + else: + # TODO: Just raise or try a default location first? + raise ValueError("No study specification provided. " + "Set environment variable HIRNI_STUDY_SPEC " + "to do so.") + return self._spec + + +_spec = SpecLoader() + + +def validate_spec(spec): + + if not spec: + raise ValueError("Image series specification is empty.") + + tags = spec.get('tags', None) + if tags and 'hirni-dicom-converter-ignore' in tags: + lgr.debug("Skip series %s (marked 'ignore' in spec)", spec['uid']) + return False + + # mandatory keys for any spec dict (not only dicomseries) + for k in spec.keys(): + # automatically managed keys with no subdict: + # TODO: Where to define this list? + # TODO: Test whether those are actually present! + if k in ['type', 'location', 'uid', 'dataset-id', + 'dataset-refcommit', 'procedures', 'tags']: + continue + if 'value' not in spec[k]: + lgr.warning("DICOM series specification (UID: {uid}) has no value " + "for key '{key}'.".format(uid=spec['uid'], key=k)) + return False + + if spec['type'] != 'dicomseries': + lgr.warning("Specification not of type 'dicomseries'.") + return False + + if 'uid' not in spec.keys() or not spec['uid']: + lgr.warning("Missing image series UID.") + return False + + for var in ('bids-modality',): + if not has_specval(spec, var): + lgr.warning("Missing specification value for key '%s'", var) + return False + + return True + + +# TODO: can be removed, whenever nipy/heudiconv #197 is solved +def infotoids(seqinfos, outdir): + return {'locator': None, + 'session': None, + 'subject': None} + + +def infotodict(seqinfo): # pragma: no cover + """Heuristic evaluator for determining which runs belong where + + allowed template fields - follow python string module: + + item: index within category + subject: participant id + seqitem: run number during scanning + subindex: sub index within group + """ + + info = dict() + for idx, s in enumerate(seqinfo): + + # find in spec: + candidates = [series for series in _spec.get_study_spec() + if str(s.series_uid) == series['uid']] + if not candidates: + raise ValueError("Found no match for seqinfo: %s" % str(s)) + if len(candidates) != 1: + raise ValueError("Found %s match(es) for series UID %s" % + (len(candidates), s.uid)) + series_spec = candidates[0] + + if not validate_spec(series_spec): + lgr.debug("Series invalid (%s). Skip.", str(s.series_uid)) + continue + + dirname = filename = "sub-{}".format(_spec.subject) + # session + if has_specval(series_spec, 'bids-session'): + ses = get_specval(series_spec, 'bids-session') + dirname += "/ses-{}".format(ses) + filename += "_ses-{}".format(ses) + + # data type + modality = get_specval(series_spec, 'bids-modality') + # make cannonical if possible + modality = modality_label_map.get(modality, modality) + # apply fixed mapping from modality -> data_type + data_type = datatype_labels_map[modality] + + dirname += "/{}".format(data_type) + + # TODO: Once special cases (like when to use '_mod-' prefix for modality + # are clear, integrate data type selection with spec_key list and + # thereby reduce code duplication further + + if data_type == 'func': + # func/sub-[_ses-] + # _task-[_acq-