28
28
Sequence names on the scanner must follow this specification to avoid manual
29
29
conversion/handling:
30
30
31
- <seqtype[-label]>[_ses-<SESID>][_task-<TASKID>][_acq-<ACQLABEL>][_run-<RUNID>][_dir-<DIR>][<more BIDS>][__<custom>]
31
+ [PREFIX:] <seqtype[-label]>[_ses-<SESID>][_task-<TASKID>][_acq-<ACQLABEL>][_run-<RUNID>][_dir-<DIR>][<more BIDS>][__<custom>]
32
32
33
33
where
34
+ [PREFIX:] - leading capital letters followed by : are stripped/ignored
34
35
<...> - value to be entered
35
36
[...] - optional -- might be nearly mandatory for some modalities (e.g.,
36
37
run for functional) and very optional for others
114
115
import logging
115
116
lgr = logging .getLogger ('heudiconv' )
116
117
118
+ # Terminology to hamornise and use to name variables etc
119
+ # experiment
120
+ # subject
121
+ # [session]
122
+ # exam (AKA scanning session) - currently seqinfo, unless brought together from multiple
123
+ # series (AKA protocol?)
124
+ # - series_spec - deduced from fields the spec (literal value)
125
+ # - series_info - the dictionary with fields parsed from series_spec
126
+
127
+ # Which fields in seqinfo (in this order) to check for the ReproIn spec
128
+ series_spec_fields = ('protocol_name' , 'series_description' )
129
+
117
130
# dictionary from accession-number to runs that need to be marked as bad
118
131
# NOTE: even if filename has number that is 0-padded, internally no padding
119
132
# is done
227
240
('_test' , '' ),
228
241
],
229
242
}
230
- keys2replace = ['protocol_name' , 'series_description' ]
231
243
232
244
# list containing StudyInstanceUID to skip -- hopefully doesn't happen too often
233
245
dicoms2skip = [
@@ -341,13 +353,13 @@ def fix_canceled_runs(seqinfo, accession2run=fix_accession2run):
341
353
if re .match (badruns_pattern , s .series_id ):
342
354
lgr .info ('Fixing bad run {0}' .format (s .series_id ))
343
355
fixedkwargs = dict ()
344
- for key in keys2replace :
356
+ for key in series_spec_fields :
345
357
fixedkwargs [key ] = 'cancelme_' + getattr (s , key )
346
358
seqinfo [i ] = s ._replace (** fixedkwargs )
347
359
return seqinfo
348
360
349
361
350
- def fix_dbic_protocol (seqinfo , keys = keys2replace , subsdict = protocols2fix ):
362
+ def fix_dbic_protocol (seqinfo , keys = series_spec_fields , subsdict = protocols2fix ):
351
363
"""Ad-hoc fixup for existing protocols
352
364
"""
353
365
study_hash = get_study_hash (seqinfo )
@@ -413,12 +425,13 @@ def infotodict(seqinfo):
413
425
skipped , skipped_unknown = [], []
414
426
current_run = 0
415
427
run_label = None # run-
416
- image_data_type = None
428
+ dcm_image_iod_spec = None
429
+ skip_derived = False
417
430
for s in seqinfo :
418
431
# XXX: skip derived sequences, we don't store them to avoid polluting
419
432
# the directory, unless it is the motion corrected ones
420
433
# (will get _rec-moco suffix)
421
- if s .is_derived and not s .is_motion_corrected :
434
+ if skip_derived and s .is_derived and not s .is_motion_corrected :
422
435
skipped .append (s .series_id )
423
436
lgr .debug ("Ignoring derived data %s" , s .series_id )
424
437
continue
@@ -433,40 +446,56 @@ def infotodict(seqinfo):
433
446
434
447
# figure out type of image from s.image_info -- just for checking ATM
435
448
# since we primarily rely on encoded in the protocol name information
436
- prev_image_data_type = image_data_type
437
- image_data_type = s .image_type [2 ]
438
- image_type_seqtype = {
439
- 'P' : 'fmap' , # phase
440
- 'FMRI' : 'func' ,
441
- 'MPR' : 'anat' ,
442
- # 'M': 'func', "magnitude" -- can be for scout, anat, bold, fmap
443
- 'DIFFUSION' : 'dwi' ,
444
- 'MIP_SAG' : 'anat' , # angiography
445
- 'MIP_COR' : 'anat' , # angiography
446
- 'MIP_TRA' : 'anat' , # angiography
447
- }.get (image_data_type , None )
448
-
449
- protocol_name_tuned = s .protocol_name
450
- # Few common replacements
451
- if protocol_name_tuned in {'AAHead_Scout' }:
452
- protocol_name_tuned = 'anat-scout'
453
-
454
- regd = parse_dbic_protocol_name (protocol_name_tuned )
455
-
456
- if image_data_type .startswith ('MIP' ):
457
- regd ['acq' ] = regd .get ('acq' , '' ) + sanitize_str (image_data_type )
458
-
459
- if not regd :
449
+ prev_dcm_image_iod_spec = dcm_image_iod_spec
450
+ if len (s .image_type ) > 2 :
451
+ # https://dicom.innolitics.com/ciods/cr-image/general-image/00080008
452
+ # 0 - ORIGINAL/DERIVED
453
+ # 1 - PRIMARY/SECONDARY
454
+ # 3 - Image IOD specific specialization (optional)
455
+ dcm_image_iod_spec = s .image_type [2 ]
456
+ image_type_seqtype = {
457
+ 'P' : 'fmap' , # phase
458
+ 'FMRI' : 'func' ,
459
+ 'MPR' : 'anat' ,
460
+ # 'M': 'func', "magnitude" -- can be for scout, anat, bold, fmap
461
+ 'DIFFUSION' : 'dwi' ,
462
+ 'MIP_SAG' : 'anat' , # angiography
463
+ 'MIP_COR' : 'anat' , # angiography
464
+ 'MIP_TRA' : 'anat' , # angiography
465
+ }.get (dcm_image_iod_spec , None )
466
+ else :
467
+ dcm_image_iod_spec = image_type_seqtype = None
468
+
469
+ series_info = {} # For please lintian and its friends
470
+ for sfield in series_spec_fields :
471
+ svalue = getattr (s , sfield )
472
+ series_info = parse_series_spec (svalue )
473
+ if series_info : # looks like a valid spec - we are done
474
+ series_spec = svalue
475
+ break
476
+ else :
477
+ lgr .debug (
478
+ "Failed to parse reproin spec in .%s=%r" ,
479
+ sfield , svalue )
480
+
481
+ if not series_info :
482
+ series_spec = None # we cannot know better
483
+ lgr .warning (
484
+ "Could not determine the series name by looking at "
485
+ "%s fields" , ', ' .join (series_spec_fields ))
460
486
skipped_unknown .append (s .series_id )
461
487
continue
462
488
463
- seqtype = regd .pop ('seqtype' )
464
- seqtype_label = regd .pop ('seqtype_label' , None )
489
+ if dcm_image_iod_spec and dcm_image_iod_spec .startswith ('MIP' ):
490
+ series_info ['acq' ] = series_info .get ('acq' , '' ) + sanitize_str (dcm_image_iod_spec )
491
+
492
+ seqtype = series_info .pop ('seqtype' )
493
+ seqtype_label = series_info .pop ('seqtype_label' , None )
465
494
466
495
if image_type_seqtype and seqtype != image_type_seqtype :
467
496
lgr .warning (
468
497
"Deduced seqtype to be %s from DICOM, but got %s out of %s" ,
469
- image_type_seqtype , seqtype , protocol_name_tuned )
498
+ image_type_seqtype , seqtype , series_spec )
470
499
471
500
# if s.is_derived:
472
501
# # Let's for now stash those close to original images
@@ -483,32 +512,34 @@ def infotodict(seqinfo):
483
512
484
513
# analyze s.protocol_name (series_id is based on it) for full name mapping etc
485
514
if seqtype == 'func' and not seqtype_label :
486
- if '_pace_' in protocol_name_tuned :
515
+ if '_pace_' in series_spec :
487
516
seqtype_label = 'pace' # or should it be part of seq-
488
517
else :
489
518
# assume bold by default
490
519
seqtype_label = 'bold'
491
520
492
521
if seqtype == 'fmap' and not seqtype_label :
522
+ if not dcm_image_iod_spec :
523
+ raise ValueError ("Do not know image data type yet to make decision" )
493
524
seqtype_label = {
494
525
'M' : 'magnitude' , # might want explicit {file_index} ?
495
526
'P' : 'phasediff' ,
496
527
'DIFFUSION' : 'epi' , # according to KODI those DWI are the EPIs we need
497
- }[image_data_type ]
528
+ }[dcm_image_iod_spec ]
498
529
499
530
# label for dwi as well
500
531
if seqtype == 'dwi' and not seqtype_label :
501
532
seqtype_label = 'dwi'
502
533
503
- run = regd .get ('run' )
534
+ run = series_info .get ('run' )
504
535
if run is not None :
505
536
# so we have an indicator for a run
506
537
if run == '+' :
507
538
# some sequences, e.g. fmap, would generate two (or more?)
508
539
# sequences -- e.g. one for magnitude(s) and other ones for
509
540
# phases. In those we must not increment run!
510
- if image_data_type == 'P' :
511
- if prev_image_data_type != 'M' :
541
+ if dcm_image_iod_spec and dcm_image_iod_spec == 'P' :
542
+ if prev_dcm_image_iod_spec != 'M' :
512
543
# XXX if we have a known earlier study, we need to always
513
544
# increase the run counter for phasediff because magnitudes
514
545
# were not acquired
@@ -517,7 +548,7 @@ def infotodict(seqinfo):
517
548
else :
518
549
raise RuntimeError (
519
550
"Was expecting phase image to follow magnitude "
520
- "image, but previous one was %r" , prev_image_data_type )
551
+ "image, but previous one was %r" , prev_dcm_image_iod_spec )
521
552
# else we do nothing special
522
553
else : # and otherwise we go to the next run
523
554
current_run += 1
@@ -547,16 +578,16 @@ def infotodict(seqinfo):
547
578
# if s.is_motion_corrected:
548
579
# assert s.is_derived, "Motion corrected images must be 'derived'"
549
580
550
- if s .is_motion_corrected and 'rec-' in regd .get ('bids' , '' ):
581
+ if s .is_motion_corrected and 'rec-' in series_info .get ('bids' , '' ):
551
582
raise NotImplementedError ("want to add _acq-moco but there is _acq- already" )
552
583
553
584
suffix_parts = [
554
- None if not regd .get ('task' ) else "task-%s" % regd ['task' ],
555
- None if not regd .get ('acq' ) else "acq-%s" % regd ['acq' ],
585
+ None if not series_info .get ('task' ) else "task-%s" % series_info ['task' ],
586
+ None if not series_info .get ('acq' ) else "acq-%s" % series_info ['acq' ],
556
587
# But we want to add an indicator in case it was motion corrected
557
588
# in the magnet. ref sample /2017/01/03/qa
558
589
None if not s .is_motion_corrected else 'rec-moco' ,
559
- regd .get ('bids' ),
590
+ series_info .get ('bids' ),
560
591
run_label ,
561
592
seqtype_label ,
562
593
]
@@ -664,7 +695,7 @@ def infotoids(seqinfos, outdir):
664
695
665
696
# So -- use `outdir` and locator etc to see if for a given locator/subject
666
697
# and possible ses+ in the sequence names, so we would provide a sequence
667
- # So might need to go through parse_dbic_protocol_name (s.protocol_name)
698
+ # So might need to go through parse_series_spec (s.protocol_name)
668
699
# to figure out presence of sessions.
669
700
ses_markers = []
670
701
@@ -675,7 +706,7 @@ def infotoids(seqinfos, outdir):
675
706
for s in seqinfos :
676
707
if s .is_derived :
677
708
continue
678
- session_ = parse_dbic_protocol_name (s .protocol_name ).get ('session' , None )
709
+ session_ = parse_series_spec (s .protocol_name ).get ('session' , None )
679
710
if session_ and '{' in session_ :
680
711
# there was a marker for something we could provide from our seqinfo
681
712
# e.g. {date}
@@ -745,22 +776,31 @@ def sanitize_str(value):
745
776
return _delete_chars (value , '#!@$%^&.,:;_-' )
746
777
747
778
748
- def parse_dbic_protocol_name ( protocol_name ):
779
+ def parse_series_spec ( series_spec ):
749
780
"""Parse protocol name according to our convention with minimal set of fixups
750
781
"""
751
782
# Since Yarik didn't know better place to put it in, but could migrate outside
752
- # at some point
753
- protocol_name = protocol_name .replace ("anat_T1w" , "anat-T1w" )
754
- protocol_name = protocol_name .replace ("hardi_64" , "dwi_acq-hardi64" )
783
+ # at some point. TODO
784
+ series_spec = series_spec .replace ("anat_T1w" , "anat-T1w" )
785
+ series_spec = series_spec .replace ("hardi_64" , "dwi_acq-hardi64" )
786
+ series_spec = series_spec .replace ("AAHead_Scout" , "anat-scout" )
787
+
788
+ # Parse the name according to our convention/specification
789
+
790
+ # leading or trailing spaces do not matter
791
+ series_spec = series_spec .strip (' ' )
792
+
793
+ # Strip off leading CAPITALS: prefix to accommodate some reported usecases:
794
+ # https://github.com/ReproNim/reproin/issues/14
795
+ # where PU: prefix is added by the scanner
796
+ series_spec = re .sub ("^[A-Z]*:" , "" , series_spec )
755
797
756
- # Parse the name according to our convention
757
- # https://docs.google.com/document/d/1R54cgOe481oygYVZxI7NHrifDyFUZAjOBwCTu7M7y48/edit?usp=sharing
758
798
# Remove possible suffix we don't care about after __
759
- protocol_name = protocol_name .split ('__' , 1 )[0 ]
799
+ series_spec = series_spec .split ('__' , 1 )[0 ]
760
800
761
801
bids = None # we don't know yet for sure
762
802
# We need to figure out if it is a valid bids
763
- split = protocol_name .split ('_' )
803
+ split = series_spec .split ('_' )
764
804
prefix = split [0 ]
765
805
766
806
# Fixups
@@ -940,8 +980,8 @@ def test_fixupsubjectid():
940
980
assert fixup_subjectid ("SID30" ) == "sid000030"
941
981
942
982
943
- def test_parse_dbic_protocol_name ():
944
- pdpn = parse_dbic_protocol_name
983
+ def test_parse_series_spec ():
984
+ pdpn = parse_series_spec
945
985
946
986
assert pdpn ("nondbic_func-bold" ) == {}
947
987
assert pdpn ("cancelme_func-bold" ) == {}
@@ -951,15 +991,19 @@ def test_parse_dbic_protocol_name():
951
991
{'seqtype' : 'func' , 'seqtype_label' : 'bold' }
952
992
953
993
# pdpn("bids_func_ses+_task-boo_run+") == \
954
- # order should not matter
955
- assert pdpn ("bids_func_ses+_run+_task-boo" ) == \
994
+ # order and PREFIX: should not matter, as well as trailing spaces
995
+ assert \
996
+ pdpn (" PREFIX:bids_func_ses+_task-boo_run+ " ) == \
997
+ pdpn ("PREFIX:bids_func_ses+_task-boo_run+" ) == \
998
+ pdpn ("bids_func_ses+_run+_task-boo" ) == \
956
999
{
957
1000
'seqtype' : 'func' ,
958
1001
# 'seqtype_label': 'bold',
959
1002
'session' : '+' ,
960
1003
'run' : '+' ,
961
1004
'task' : 'boo' ,
962
1005
}
1006
+
963
1007
# TODO: fix for that
964
1008
assert pdpn ("bids_func-pace_ses-1_task-boo_acq-bu_bids-please_run-2__therest" ) == \
965
1009
pdpn ("bids_func-pace_ses-1_run-2_task-boo_acq-bu_bids-please__therest" ) == \
0 commit comments