Skip to content

Commit 2815f07

Browse files
committed
RF+ENH: reproin - harmonized naming a bit, skip protocol_name if not parsable not just empty
1 parent 403620e commit 2815f07

File tree

1 file changed

+53
-38
lines changed

1 file changed

+53
-38
lines changed

heudiconv/heuristics/reproin.py

Lines changed: 53 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,18 @@
114114
import logging
115115
lgr = logging.getLogger('heudiconv')
116116

117+
# Terminology to hamornise and use to name variables etc
118+
# experiment
119+
# subject
120+
# [session]
121+
# exam (AKA scanning session) - currently seqinfo, unless brought together from multiple
122+
# series (AKA protocol?)
123+
# - series_spec - deduced from fields the spec (literal value)
124+
# - series_info - the dictionary with fields parsed from series_spec
125+
126+
# Which fields in seqinfo (in this order) to check for the ReproIn spec
127+
series_spec_fields = ('protocol_name', 'series_description')
128+
117129
# dictionary from accession-number to runs that need to be marked as bad
118130
# NOTE: even if filename has number that is 0-padded, internally no padding
119131
# is done
@@ -227,7 +239,6 @@
227239
('_test', ''),
228240
],
229241
}
230-
keys2replace = ['protocol_name', 'series_description']
231242

232243
# list containing StudyInstanceUID to skip -- hopefully doesn't happen too often
233244
dicoms2skip = [
@@ -341,13 +352,13 @@ def fix_canceled_runs(seqinfo, accession2run=fix_accession2run):
341352
if re.match(badruns_pattern, s.series_id):
342353
lgr.info('Fixing bad run {0}'.format(s.series_id))
343354
fixedkwargs = dict()
344-
for key in keys2replace:
355+
for key in series_spec_fields:
345356
fixedkwargs[key] = 'cancelme_' + getattr(s, key)
346357
seqinfo[i] = s._replace(**fixedkwargs)
347358
return seqinfo
348359

349360

350-
def fix_dbic_protocol(seqinfo, keys=keys2replace, subsdict=protocols2fix):
361+
def fix_dbic_protocol(seqinfo, keys=series_spec_fields, subsdict=protocols2fix):
351362
"""Ad-hoc fixup for existing protocols
352363
"""
353364
study_hash = get_study_hash(seqinfo)
@@ -454,33 +465,36 @@ def infotodict(seqinfo):
454465
else:
455466
dcm_image_iod_spec = image_type_seqtype = None
456467

457-
protocol_name_tuned = s.protocol_name
458-
if not protocol_name_tuned:
459-
protocol_name_tuned = s.series_description
460-
if not protocol_name_tuned:
468+
series_info = {} # For please lintian and its friends
469+
for sfield in series_spec_fields:
470+
svalue = getattr(s, sfield)
471+
series_info = parse_series_spec(svalue)
472+
if series_info: # looks like a valid spec - we are done
473+
series_spec = svalue
474+
break
475+
else:
476+
lgr.debug(
477+
"Failed to parse reproin spec in .%s=%r",
478+
sfield, svalue)
479+
480+
if not series_info:
481+
series_spec = None # we cannot know better
461482
lgr.warning(
462483
"Could not determine the series name by looking at "
463-
"protocol_name and series_description - both were empty")
464-
# Few common replacements
465-
if protocol_name_tuned in {'AAHead_Scout'}:
466-
protocol_name_tuned = 'anat-scout'
467-
468-
regd = parse_dbic_protocol_name(protocol_name_tuned)
469-
470-
if dcm_image_iod_spec and dcm_image_iod_spec.startswith('MIP'):
471-
regd['acq'] = regd.get('acq', '') + sanitize_str(dcm_image_iod_spec)
472-
473-
if not regd:
484+
"%s fields", ', '.join(series_spec_fields))
474485
skipped_unknown.append(s.series_id)
475486
continue
476487

477-
seqtype = regd.pop('seqtype')
478-
seqtype_label = regd.pop('seqtype_label', None)
488+
if dcm_image_iod_spec and dcm_image_iod_spec.startswith('MIP'):
489+
series_info['acq'] = series_info.get('acq', '') + sanitize_str(dcm_image_iod_spec)
490+
491+
seqtype = series_info.pop('seqtype')
492+
seqtype_label = series_info.pop('seqtype_label', None)
479493

480494
if image_type_seqtype and seqtype != image_type_seqtype:
481495
lgr.warning(
482496
"Deduced seqtype to be %s from DICOM, but got %s out of %s",
483-
image_type_seqtype, seqtype, protocol_name_tuned)
497+
image_type_seqtype, seqtype, series_spec)
484498

485499
# if s.is_derived:
486500
# # Let's for now stash those close to original images
@@ -497,7 +511,7 @@ def infotodict(seqinfo):
497511

498512
# analyze s.protocol_name (series_id is based on it) for full name mapping etc
499513
if seqtype == 'func' and not seqtype_label:
500-
if '_pace_' in protocol_name_tuned:
514+
if '_pace_' in series_spec:
501515
seqtype_label = 'pace' # or should it be part of seq-
502516
else:
503517
# assume bold by default
@@ -516,7 +530,7 @@ def infotodict(seqinfo):
516530
if seqtype == 'dwi' and not seqtype_label:
517531
seqtype_label = 'dwi'
518532

519-
run = regd.get('run')
533+
run = series_info.get('run')
520534
if run is not None:
521535
# so we have an indicator for a run
522536
if run == '+':
@@ -563,16 +577,16 @@ def infotodict(seqinfo):
563577
# if s.is_motion_corrected:
564578
# assert s.is_derived, "Motion corrected images must be 'derived'"
565579

566-
if s.is_motion_corrected and 'rec-' in regd.get('bids', ''):
580+
if s.is_motion_corrected and 'rec-' in series_info.get('bids', ''):
567581
raise NotImplementedError("want to add _acq-moco but there is _acq- already")
568582

569583
suffix_parts = [
570-
None if not regd.get('task') else "task-%s" % regd['task'],
571-
None if not regd.get('acq') else "acq-%s" % regd['acq'],
584+
None if not series_info.get('task') else "task-%s" % series_info['task'],
585+
None if not series_info.get('acq') else "acq-%s" % series_info['acq'],
572586
# But we want to add an indicator in case it was motion corrected
573587
# in the magnet. ref sample /2017/01/03/qa
574588
None if not s.is_motion_corrected else 'rec-moco',
575-
regd.get('bids'),
589+
series_info.get('bids'),
576590
run_label,
577591
seqtype_label,
578592
]
@@ -680,7 +694,7 @@ def infotoids(seqinfos, outdir):
680694

681695
# So -- use `outdir` and locator etc to see if for a given locator/subject
682696
# and possible ses+ in the sequence names, so we would provide a sequence
683-
# So might need to go through parse_dbic_protocol_name(s.protocol_name)
697+
# So might need to go through parse_series_spec(s.protocol_name)
684698
# to figure out presence of sessions.
685699
ses_markers = []
686700

@@ -691,7 +705,7 @@ def infotoids(seqinfos, outdir):
691705
for s in seqinfos:
692706
if s.is_derived:
693707
continue
694-
session_ = parse_dbic_protocol_name(s.protocol_name).get('session', None)
708+
session_ = parse_series_spec(s.protocol_name).get('session', None)
695709
if session_ and '{' in session_:
696710
# there was a marker for something we could provide from our seqinfo
697711
# e.g. {date}
@@ -761,22 +775,23 @@ def sanitize_str(value):
761775
return _delete_chars(value, '#!@$%^&.,:;_-')
762776

763777

764-
def parse_dbic_protocol_name(protocol_name):
778+
def parse_series_spec(series_spec):
765779
"""Parse protocol name according to our convention with minimal set of fixups
766780
"""
767781
# Since Yarik didn't know better place to put it in, but could migrate outside
768-
# at some point
769-
protocol_name = protocol_name.replace("anat_T1w", "anat-T1w")
770-
protocol_name = protocol_name.replace("hardi_64", "dwi_acq-hardi64")
771-
782+
# at some point. TODO
783+
series_spec = series_spec.replace("anat_T1w", "anat-T1w")
784+
series_spec = series_spec.replace("hardi_64", "dwi_acq-hardi64")
785+
series_spec = series_spec.replace("AAHead_Scout", "anat-scout")
786+
772787
# Parse the name according to our convention
773788
# https://docs.google.com/document/d/1R54cgOe481oygYVZxI7NHrifDyFUZAjOBwCTu7M7y48/edit?usp=sharing
774789
# Remove possible suffix we don't care about after __
775-
protocol_name = protocol_name.split('__', 1)[0]
790+
series_spec = series_spec.split('__', 1)[0]
776791

777792
bids = None # we don't know yet for sure
778793
# We need to figure out if it is a valid bids
779-
split = protocol_name.split('_')
794+
split = series_spec.split('_')
780795
prefix = split[0]
781796

782797
# Fixups
@@ -956,8 +971,8 @@ def test_fixupsubjectid():
956971
assert fixup_subjectid("SID30") == "sid000030"
957972

958973

959-
def test_parse_dbic_protocol_name():
960-
pdpn = parse_dbic_protocol_name
974+
def test_parse_series_spec():
975+
pdpn = parse_series_spec
961976

962977
assert pdpn("nondbic_func-bold") == {}
963978
assert pdpn("cancelme_func-bold") == {}

0 commit comments

Comments
 (0)