Skip to content

Commit 64569f1

Browse files
authored
Merge pull request #569 from dbic/enh-reproin-doc
Minor face-lifts to ReproIn: align doc and code better to BIDS terms, address deprecation warnings etc
2 parents e9bc381 + ce2089c commit 64569f1

File tree

2 files changed

+78
-60
lines changed

2 files changed

+78
-60
lines changed

heudiconv/heuristics/reproin.py

Lines changed: 67 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
Sequence names on the scanner must follow this specification to avoid manual
2929
conversion/handling:
3030
31-
[PREFIX:][WIP ]<seqtype[-label]>[_ses-<SESID>][_task-<TASKID>][_acq-<ACQLABEL>][_run-<RUNID>][_dir-<DIR>][<more BIDS>][__<custom>]
31+
[PREFIX:][WIP ]<datatype[-<suffix>]>[_ses-<SESID>][_task-<TASKID>][_acq-<ACQLABEL>][_run-<RUNID>][_dir-<DIR>][<more BIDS>][__<custom>]
3232
3333
where
3434
[PREFIX:] - leading capital letters followed by : are stripped/ignored
@@ -42,24 +42,33 @@
4242
descriptive ones for e.g. SESID (_ses-movie, _ses-localizer)
4343
4444
45-
<seqtype[-label]>
46-
a known BIDS sequence type which is usually a name of the folder under
47-
subject's directory. And (optional) label is specific per sequence type
48-
(e.g. typical "bold" for func, or "T1w" for "anat"), which could often
49-
(but not always) be deduced from DICOM. Known to BIDS modalities are:
45+
<datatype[-suffix]>
46+
a known BIDS sequence datatype which is usually a name of the folder under
47+
subject's directory. And (optional) suffix is a specific sequence type
48+
(e.g., "bold" for func, or "T1w" for "anat"), which could often
49+
(but not always) be deduced from DICOM. Known to ReproIn BIDS modalities
50+
are:
5051
5152
anat - anatomical data. Might also be collected multiple times across
5253
runs (e.g. if subject is taken out of magnet etc), so could
5354
(optionally) have "_run" definition attached. For "standard anat"
54-
labels, please consult to "8.3 Anatomy imaging data" but most
55-
common are 'T1w', 'T2w', 'angio'
55+
suffixes, please consult to "8.3 Anatomy imaging data" but most
56+
common are 'T1w', 'T2w', 'angio'.
57+
beh - behavioral data. known but not "treated".
5658
func - functional (AKA task, including resting state) data.
5759
Typically contains multiple runs, and might have multiple different
5860
tasks different per each run
5961
(e.g. _task-memory_run-01, _task-oddball_run-02)
6062
fmap - field maps
6163
dwi - diffusion weighted imaging (also can as well have runs)
6264
65+
The other BIDS modalities are not known ATM and their data will not be
66+
converted and will be just skipped (with a warning). Full list of datatypes
67+
can be found at
68+
https://github.com/bids-standard/bids-specification/blob/v1.7.0/src/schema/objects/datatypes.yaml
69+
and their corresponding suffixes at
70+
https://github.com/bids-standard/bids-specification/tree/v1.7.0/src/schema/rules/datatypes
71+
6372
_ses-<SESID> (optional)
6473
a session. Having a single sequence within a study would make that study
6574
follow "multi-session" layout. A common practice to have a _ses specifier
@@ -204,6 +213,10 @@
204213
'criterion': 'Closest'
205214
}
206215

216+
217+
KNOWN_DATATYPES = {'anat', 'func', 'dwi', 'behav', 'fmap'}
218+
219+
207220
def _delete_chars(from_str, deletechars):
208221
""" Delete characters from string allowing for Python 2 / 3 difference
209222
"""
@@ -404,9 +417,9 @@ def infotodict(seqinfo):
404417
# 1 - PRIMARY/SECONDARY
405418
# 3 - Image IOD specific specialization (optional)
406419
dcm_image_iod_spec = s.image_type[2]
407-
image_type_seqtype = {
420+
image_type_datatype = {
408421
# Note: P and M are too generic to make a decision here, could be
409-
# for different seqtypes (bold, fmap, etc)
422+
# for different datatypes (bold, fmap, etc)
410423
'FMRI': 'func',
411424
'MPR': 'anat',
412425
'DIFFUSION': 'dwi',
@@ -415,7 +428,7 @@ def infotodict(seqinfo):
415428
'MIP_TRA': 'anat', # angiography
416429
}.get(dcm_image_iod_spec, None)
417430
else:
418-
dcm_image_iod_spec = image_type_seqtype = None
431+
dcm_image_iod_spec = image_type_datatype = None
419432

420433
series_info = {} # For please lintian and its friends
421434
for sfield in series_spec_fields:
@@ -440,19 +453,19 @@ def infotodict(seqinfo):
440453
if dcm_image_iod_spec and dcm_image_iod_spec.startswith('MIP'):
441454
series_info['acq'] = series_info.get('acq', '') + sanitize_str(dcm_image_iod_spec)
442455

443-
seqtype = series_info.pop('seqtype')
444-
seqtype_label = series_info.pop('seqtype_label', None)
456+
datatype = series_info.pop('datatype')
457+
datatype_suffix = series_info.pop('datatype_suffix', None)
445458

446-
if image_type_seqtype and seqtype != image_type_seqtype:
459+
if image_type_datatype and datatype != image_type_datatype:
447460
lgr.warning(
448-
"Deduced seqtype to be %s from DICOM, but got %s out of %s",
449-
image_type_seqtype, seqtype, series_spec)
461+
"Deduced datatype to be %s from DICOM, but got %s out of %s",
462+
image_type_datatype, datatype, series_spec)
450463

451464
# if s.is_derived:
452465
# # Let's for now stash those close to original images
453466
# # TODO: we might want a separate tree for all of this!?
454467
# # so more of a parameter to the create_key
455-
# #seqtype += '/derivative'
468+
# #datatype += '/derivative'
456469
# # just keep it lower case and without special characters
457470
# # XXXX what for???
458471
# #seq.append(s.series_description.lower())
@@ -462,46 +475,46 @@ def infotodict(seqinfo):
462475
prefix = ''
463476

464477
#
465-
# Figure out the seqtype_label (BIDS _suffix)
478+
# Figure out the datatype_suffix (BIDS _suffix)
466479
#
467480
# If none was provided -- let's deduce it from the information we find:
468481
# analyze s.protocol_name (series_id is based on it) for full name mapping etc
469-
if not seqtype_label:
470-
if seqtype == 'func':
482+
if not datatype_suffix:
483+
if datatype == 'func':
471484
if '_pace_' in series_spec:
472-
seqtype_label = 'pace' # or should it be part of seq-
485+
datatype_suffix = 'pace' # or should it be part of seq-
473486
elif 'P' in s.image_type:
474-
seqtype_label = 'phase'
487+
datatype_suffix = 'phase'
475488
elif 'M' in s.image_type:
476-
seqtype_label = 'bold'
489+
datatype_suffix = 'bold'
477490
else:
478491
# assume bold by default
479-
seqtype_label = 'bold'
480-
elif seqtype == 'fmap':
492+
datatype_suffix = 'bold'
493+
elif datatype == 'fmap':
481494
# TODO: support phase1 phase2 like in "Case 2: Two phase images ..."
482495
if not dcm_image_iod_spec:
483496
raise ValueError("Do not know image data type yet to make decision")
484-
seqtype_label = {
497+
datatype_suffix = {
485498
# might want explicit {file_index} ?
486499
# _epi for pepolar fieldmaps, see
487500
# https://bids-specification.readthedocs.io/en/stable/04-modality-specific-files/01-magnetic-resonance-imaging-data.html#case-4-multiple-phase-encoded-directions-pepolar
488501
'M': 'epi' if 'dir' in series_info else 'magnitude',
489502
'P': 'phasediff',
490503
'DIFFUSION': 'epi', # according to KODI those DWI are the EPIs we need
491504
}[dcm_image_iod_spec]
492-
elif seqtype == 'dwi':
505+
elif datatype == 'dwi':
493506
# label for dwi as well
494-
seqtype_label = 'dwi'
507+
datatype_suffix = 'dwi'
495508

496509
#
497-
# Even if seqtype_label was provided, for some data we might need to override,
510+
# Even if datatype_suffix was provided, for some data we might need to override,
498511
# since they are complementary files produced along-side with original
499512
# ones.
500513
#
501514
if s.series_description.endswith('_SBRef'):
502-
seqtype_label = 'sbref'
515+
datatype_suffix = 'sbref'
503516

504-
if not seqtype_label:
517+
if not datatype_suffix:
505518
# Might be provided by the bids ending within series_spec, we would
506519
# just want to check if that the last element is not _key-value pair
507520
bids_ending = series_info.get('bids', None)
@@ -559,7 +572,7 @@ def infotodict(seqinfo):
559572
# assert s.is_derived, "Motion corrected images must be 'derived'"
560573

561574
if s.is_motion_corrected and 'rec-' in series_info.get('bids', ''):
562-
raise NotImplementedError("want to add _acq-moco but there is _acq- already")
575+
raise NotImplementedError("want to add _rec-moco but there is _rec- already")
563576

564577
def from_series_info(name):
565578
"""A little helper to provide _name-value if series_info knows it
@@ -571,7 +584,12 @@ def from_series_info(name):
571584
else:
572585
return None
573586

574-
suffix_parts = [
587+
# TODO: get order from schema, do not hardcode. ATM could be checked at
588+
# https://bids-specification.readthedocs.io/en/stable/99-appendices/04-entity-table.html
589+
# https://github.com/bids-standard/bids-specification/blob/HEAD/src/schema/rules/entities.yaml
590+
# ATM we at large rely on possible (re)ordering according to schema to be done
591+
# by heudiconv, not reproin here.
592+
filename_suffix_parts = [
575593
from_series_info('task'),
576594
from_series_info('acq'),
577595
# But we want to add an indicator in case it was motion corrected
@@ -580,10 +598,10 @@ def from_series_info(name):
580598
from_series_info('dir'),
581599
series_info.get('bids'),
582600
run_label,
583-
seqtype_label,
601+
datatype_suffix,
584602
]
585603
# filter those which are None, and join with _
586-
suffix = '_'.join(filter(bool, suffix_parts))
604+
suffix = '_'.join(filter(bool, filename_suffix_parts))
587605

588606
# # .series_description in case of
589607
# sdesc = s.study_description
@@ -602,12 +620,12 @@ def from_series_info(name):
602620
# For scouts -- we want only dicoms
603621
# https://github.com/nipy/heudiconv/issues/145
604622
if "_Scout" in s.series_description or \
605-
(seqtype == 'anat' and seqtype_label and seqtype_label.startswith('scout')):
623+
(datatype == 'anat' and datatype_suffix and datatype_suffix.startswith('scout')):
606624
outtype = ('dicom',)
607625
else:
608626
outtype = ('nii.gz', 'dicom')
609627

610-
template = create_key(seqtype, suffix, prefix=prefix, outtype=outtype)
628+
template = create_key(datatype, suffix, prefix=prefix, outtype=outtype)
611629
# we wanted ordered dict for consistent demarcation of dups
612630
if template not in info:
613631
info[template] = []
@@ -849,17 +867,17 @@ def split2(s):
849867
return s, None
850868

851869
# Let's analyze first element which should tell us sequence type
852-
seqtype, seqtype_label = split2(split[0])
853-
if seqtype not in {'anat', 'func', 'dwi', 'behav', 'fmap'}:
870+
datatype, datatype_suffix = split2(split[0])
871+
if datatype not in KNOWN_DATATYPES:
854872
# It is not something we don't consume
855873
if bids:
856-
lgr.warning("It was instructed to be BIDS sequence but unknown "
857-
"type %s found", seqtype)
874+
lgr.warning("It was instructed to be BIDS datatype but unknown "
875+
"%s found. Known are: %s", datatype, ', '.join(KNOWN_DATATYPES))
858876
return {}
859877

860-
regd = dict(seqtype=seqtype)
861-
if seqtype_label:
862-
regd['seqtype_label'] = seqtype_label
878+
regd = dict(datatype=datatype)
879+
if datatype_suffix:
880+
regd['datatype_suffix'] = datatype_suffix
863881
# now go through each to see if one which we care
864882
bids_leftovers = []
865883
for s in split[1:]:
@@ -886,12 +904,12 @@ def split2(s):
886904
# TODO: might want to check for all known "standard" BIDS suffixes here
887905
# among bids_leftovers, thus serve some kind of BIDS validator
888906

889-
# if not regd.get('seqtype_label', None):
890-
# # might need to assign a default label for each seqtype if was not
907+
# if not regd.get('datatype_suffix', None):
908+
# # might need to assign a default label for each datatype if was not
891909
# # given
892-
# regd['seqtype_label'] = {
910+
# regd['datatype_suffix'] = {
893911
# 'func': 'bold'
894-
# }.get(regd['seqtype'], None)
912+
# }.get(regd['datatype'], None)
895913

896914
return regd
897915

@@ -900,7 +918,7 @@ def fixup_subjectid(subjectid):
900918
"""Just in case someone managed to miss a zero or added an extra one"""
901919
# make it lowercase
902920
subjectid = subjectid.lower()
903-
reg = re.match("sid0*(\d+)$", subjectid)
921+
reg = re.match(r"sid0*(\d+)$", subjectid)
904922
if not reg:
905923
# some completely other pattern
906924
# just filter out possible _- in it

heudiconv/heuristics/test_reproin.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ def test_fix_dbic_protocol():
114114
seqinfos = [seq1, seq2]
115115
protocols2fix = {
116116
md5sum('mystudy'):
117-
[('scout_run\+', 'THESCOUT-runX'),
117+
[(r'scout_run\+', 'THESCOUT-runX'),
118118
('run-life[0-9]', 'run+_task-life')],
119119
re.compile('^my.*'):
120120
[('THESCOUT-runX', 'THESCOUT')],
@@ -169,7 +169,7 @@ def test_parse_series_spec():
169169

170170
assert pdpn("bids_func-bold") == \
171171
pdpn("func-bold") == \
172-
{'seqtype': 'func', 'seqtype_label': 'bold'}
172+
{'datatype': 'func', 'datatype_suffix': 'bold'}
173173

174174
# pdpn("bids_func_ses+_task-boo_run+") == \
175175
# order and PREFIX: should not matter, as well as trailing spaces
@@ -179,8 +179,8 @@ def test_parse_series_spec():
179179
pdpn("WIP func_ses+_task-boo_run+") == \
180180
pdpn("bids_func_ses+_run+_task-boo") == \
181181
{
182-
'seqtype': 'func',
183-
# 'seqtype_label': 'bold',
182+
'datatype': 'func',
183+
# 'datatype_suffix': 'bold',
184184
'session': '+',
185185
'run': '+',
186186
'task': 'boo',
@@ -191,7 +191,7 @@ def test_parse_series_spec():
191191
pdpn("bids_func-pace_ses-1_run-2_task-boo_acq-bu_bids-please__therest") == \
192192
pdpn("func-pace_ses-1_task-boo_acq-bu_bids-please_run-2") == \
193193
{
194-
'seqtype': 'func', 'seqtype_label': 'pace',
194+
'datatype': 'func', 'datatype_suffix': 'pace',
195195
'session': '1',
196196
'run': '2',
197197
'task': 'boo',
@@ -201,24 +201,24 @@ def test_parse_series_spec():
201201

202202
assert pdpn("bids_anat-scout_ses+") == \
203203
{
204-
'seqtype': 'anat',
205-
'seqtype_label': 'scout',
204+
'datatype': 'anat',
205+
'datatype_suffix': 'scout',
206206
'session': '+',
207207
}
208208

209209
assert pdpn("anat_T1w_acq-MPRAGE_run+") == \
210210
{
211-
'seqtype': 'anat',
211+
'datatype': 'anat',
212212
'run': '+',
213213
'acq': 'MPRAGE',
214-
'seqtype_label': 'T1w'
214+
'datatype_suffix': 'T1w'
215215
}
216216

217217
# Check for currently used {date}, which should also should get adjusted
218218
# from (date) since Philips does not allow for {}
219219
assert pdpn("func_ses-{date}") == \
220220
pdpn("func_ses-(date)") == \
221-
{'seqtype': 'func', 'session': '{date}'}
221+
{'datatype': 'func', 'session': '{date}'}
222222

223223
assert pdpn("fmap_dir-AP_ses-01") == \
224-
{'seqtype': 'fmap', 'session': '01', 'dir': 'AP'}
224+
{'datatype': 'fmap', 'session': '01', 'dir': 'AP'}

0 commit comments

Comments
 (0)