28
28
Sequence names on the scanner must follow this specification to avoid manual
29
29
conversion/handling:
30
30
31
- [PREFIX:][WIP ]<seqtype[-label ]>[_ses-<SESID>][_task-<TASKID>][_acq-<ACQLABEL>][_run-<RUNID>][_dir-<DIR>][<more BIDS>][__<custom>]
31
+ [PREFIX:][WIP ]<datatype[-<suffix> ]>[_ses-<SESID>][_task-<TASKID>][_acq-<ACQLABEL>][_run-<RUNID>][_dir-<DIR>][<more BIDS>][__<custom>]
32
32
33
33
where
34
34
[PREFIX:] - leading capital letters followed by : are stripped/ignored
42
42
descriptive ones for e.g. SESID (_ses-movie, _ses-localizer)
43
43
44
44
45
- <seqtype[-label]>
46
- a known BIDS sequence type which is usually a name of the folder under
47
- subject's directory. And (optional) label is specific per sequence type
48
- (e.g. typical "bold" for func, or "T1w" for "anat"), which could often
49
- (but not always) be deduced from DICOM. Known to BIDS modalities are:
45
+ <datatype[-suffix]>
46
+ a known BIDS sequence datatype which is usually a name of the folder under
47
+ subject's directory. And (optional) suffix is a specific sequence type
48
+ (e.g., "bold" for func, or "T1w" for "anat"), which could often
49
+ (but not always) be deduced from DICOM. Known to ReproIn BIDS modalities
50
+ are:
50
51
51
52
anat - anatomical data. Might also be collected multiple times across
52
53
runs (e.g. if subject is taken out of magnet etc), so could
53
54
(optionally) have "_run" definition attached. For "standard anat"
54
- labels, please consult to "8.3 Anatomy imaging data" but most
55
- common are 'T1w', 'T2w', 'angio'
55
+ suffixes, please consult to "8.3 Anatomy imaging data" but most
56
+ common are 'T1w', 'T2w', 'angio'.
57
+ beh - behavioral data. known but not "treated".
56
58
func - functional (AKA task, including resting state) data.
57
59
Typically contains multiple runs, and might have multiple different
58
60
tasks different per each run
59
61
(e.g. _task-memory_run-01, _task-oddball_run-02)
60
62
fmap - field maps
61
63
dwi - diffusion weighted imaging (also can as well have runs)
62
64
65
+ The other BIDS modalities are not known ATM and their data will not be
66
+ converted and will be just skipped (with a warning). Full list of datatypes
67
+ can be found at
68
+ https://github.com/bids-standard/bids-specification/blob/v1.7.0/src/schema/objects/datatypes.yaml
69
+ and their corresponding suffixes at
70
+ https://github.com/bids-standard/bids-specification/tree/v1.7.0/src/schema/rules/datatypes
71
+
63
72
_ses-<SESID> (optional)
64
73
a session. Having a single sequence within a study would make that study
65
74
follow "multi-session" layout. A common practice to have a _ses specifier
204
213
'criterion' : 'Closest'
205
214
}
206
215
216
+
217
+ KNOWN_DATATYPES = {'anat' , 'func' , 'dwi' , 'behav' , 'fmap' }
218
+
219
+
207
220
def _delete_chars (from_str , deletechars ):
208
221
""" Delete characters from string allowing for Python 2 / 3 difference
209
222
"""
@@ -404,9 +417,9 @@ def infotodict(seqinfo):
404
417
# 1 - PRIMARY/SECONDARY
405
418
# 3 - Image IOD specific specialization (optional)
406
419
dcm_image_iod_spec = s .image_type [2 ]
407
- image_type_seqtype = {
420
+ image_type_datatype = {
408
421
# Note: P and M are too generic to make a decision here, could be
409
- # for different seqtypes (bold, fmap, etc)
422
+ # for different datatypes (bold, fmap, etc)
410
423
'FMRI' : 'func' ,
411
424
'MPR' : 'anat' ,
412
425
'DIFFUSION' : 'dwi' ,
@@ -415,7 +428,7 @@ def infotodict(seqinfo):
415
428
'MIP_TRA' : 'anat' , # angiography
416
429
}.get (dcm_image_iod_spec , None )
417
430
else :
418
- dcm_image_iod_spec = image_type_seqtype = None
431
+ dcm_image_iod_spec = image_type_datatype = None
419
432
420
433
series_info = {} # For please lintian and its friends
421
434
for sfield in series_spec_fields :
@@ -440,19 +453,19 @@ def infotodict(seqinfo):
440
453
if dcm_image_iod_spec and dcm_image_iod_spec .startswith ('MIP' ):
441
454
series_info ['acq' ] = series_info .get ('acq' , '' ) + sanitize_str (dcm_image_iod_spec )
442
455
443
- seqtype = series_info .pop ('seqtype ' )
444
- seqtype_label = series_info .pop ('seqtype_label ' , None )
456
+ datatype = series_info .pop ('datatype ' )
457
+ datatype_suffix = series_info .pop ('datatype_suffix ' , None )
445
458
446
- if image_type_seqtype and seqtype != image_type_seqtype :
459
+ if image_type_datatype and datatype != image_type_datatype :
447
460
lgr .warning (
448
- "Deduced seqtype to be %s from DICOM, but got %s out of %s" ,
449
- image_type_seqtype , seqtype , series_spec )
461
+ "Deduced datatype to be %s from DICOM, but got %s out of %s" ,
462
+ image_type_datatype , datatype , series_spec )
450
463
451
464
# if s.is_derived:
452
465
# # Let's for now stash those close to original images
453
466
# # TODO: we might want a separate tree for all of this!?
454
467
# # so more of a parameter to the create_key
455
- # #seqtype += '/derivative'
468
+ # #datatype += '/derivative'
456
469
# # just keep it lower case and without special characters
457
470
# # XXXX what for???
458
471
# #seq.append(s.series_description.lower())
@@ -462,46 +475,46 @@ def infotodict(seqinfo):
462
475
prefix = ''
463
476
464
477
#
465
- # Figure out the seqtype_label (BIDS _suffix)
478
+ # Figure out the datatype_suffix (BIDS _suffix)
466
479
#
467
480
# If none was provided -- let's deduce it from the information we find:
468
481
# analyze s.protocol_name (series_id is based on it) for full name mapping etc
469
- if not seqtype_label :
470
- if seqtype == 'func' :
482
+ if not datatype_suffix :
483
+ if datatype == 'func' :
471
484
if '_pace_' in series_spec :
472
- seqtype_label = 'pace' # or should it be part of seq-
485
+ datatype_suffix = 'pace' # or should it be part of seq-
473
486
elif 'P' in s .image_type :
474
- seqtype_label = 'phase'
487
+ datatype_suffix = 'phase'
475
488
elif 'M' in s .image_type :
476
- seqtype_label = 'bold'
489
+ datatype_suffix = 'bold'
477
490
else :
478
491
# assume bold by default
479
- seqtype_label = 'bold'
480
- elif seqtype == 'fmap' :
492
+ datatype_suffix = 'bold'
493
+ elif datatype == 'fmap' :
481
494
# TODO: support phase1 phase2 like in "Case 2: Two phase images ..."
482
495
if not dcm_image_iod_spec :
483
496
raise ValueError ("Do not know image data type yet to make decision" )
484
- seqtype_label = {
497
+ datatype_suffix = {
485
498
# might want explicit {file_index} ?
486
499
# _epi for pepolar fieldmaps, see
487
500
# https://bids-specification.readthedocs.io/en/stable/04-modality-specific-files/01-magnetic-resonance-imaging-data.html#case-4-multiple-phase-encoded-directions-pepolar
488
501
'M' : 'epi' if 'dir' in series_info else 'magnitude' ,
489
502
'P' : 'phasediff' ,
490
503
'DIFFUSION' : 'epi' , # according to KODI those DWI are the EPIs we need
491
504
}[dcm_image_iod_spec ]
492
- elif seqtype == 'dwi' :
505
+ elif datatype == 'dwi' :
493
506
# label for dwi as well
494
- seqtype_label = 'dwi'
507
+ datatype_suffix = 'dwi'
495
508
496
509
#
497
- # Even if seqtype_label was provided, for some data we might need to override,
510
+ # Even if datatype_suffix was provided, for some data we might need to override,
498
511
# since they are complementary files produced along-side with original
499
512
# ones.
500
513
#
501
514
if s .series_description .endswith ('_SBRef' ):
502
- seqtype_label = 'sbref'
515
+ datatype_suffix = 'sbref'
503
516
504
- if not seqtype_label :
517
+ if not datatype_suffix :
505
518
# Might be provided by the bids ending within series_spec, we would
506
519
# just want to check if that the last element is not _key-value pair
507
520
bids_ending = series_info .get ('bids' , None )
@@ -559,7 +572,7 @@ def infotodict(seqinfo):
559
572
# assert s.is_derived, "Motion corrected images must be 'derived'"
560
573
561
574
if s .is_motion_corrected and 'rec-' in series_info .get ('bids' , '' ):
562
- raise NotImplementedError ("want to add _acq -moco but there is _acq - already" )
575
+ raise NotImplementedError ("want to add _rec -moco but there is _rec - already" )
563
576
564
577
def from_series_info (name ):
565
578
"""A little helper to provide _name-value if series_info knows it
@@ -571,7 +584,12 @@ def from_series_info(name):
571
584
else :
572
585
return None
573
586
574
- suffix_parts = [
587
+ # TODO: get order from schema, do not hardcode. ATM could be checked at
588
+ # https://bids-specification.readthedocs.io/en/stable/99-appendices/04-entity-table.html
589
+ # https://github.com/bids-standard/bids-specification/blob/HEAD/src/schema/rules/entities.yaml
590
+ # ATM we at large rely on possible (re)ordering according to schema to be done
591
+ # by heudiconv, not reproin here.
592
+ filename_suffix_parts = [
575
593
from_series_info ('task' ),
576
594
from_series_info ('acq' ),
577
595
# But we want to add an indicator in case it was motion corrected
@@ -580,10 +598,10 @@ def from_series_info(name):
580
598
from_series_info ('dir' ),
581
599
series_info .get ('bids' ),
582
600
run_label ,
583
- seqtype_label ,
601
+ datatype_suffix ,
584
602
]
585
603
# filter those which are None, and join with _
586
- suffix = '_' .join (filter (bool , suffix_parts ))
604
+ suffix = '_' .join (filter (bool , filename_suffix_parts ))
587
605
588
606
# # .series_description in case of
589
607
# sdesc = s.study_description
@@ -602,12 +620,12 @@ def from_series_info(name):
602
620
# For scouts -- we want only dicoms
603
621
# https://github.com/nipy/heudiconv/issues/145
604
622
if "_Scout" in s .series_description or \
605
- (seqtype == 'anat' and seqtype_label and seqtype_label .startswith ('scout' )):
623
+ (datatype == 'anat' and datatype_suffix and datatype_suffix .startswith ('scout' )):
606
624
outtype = ('dicom' ,)
607
625
else :
608
626
outtype = ('nii.gz' , 'dicom' )
609
627
610
- template = create_key (seqtype , suffix , prefix = prefix , outtype = outtype )
628
+ template = create_key (datatype , suffix , prefix = prefix , outtype = outtype )
611
629
# we wanted ordered dict for consistent demarcation of dups
612
630
if template not in info :
613
631
info [template ] = []
@@ -849,17 +867,17 @@ def split2(s):
849
867
return s , None
850
868
851
869
# Let's analyze first element which should tell us sequence type
852
- seqtype , seqtype_label = split2 (split [0 ])
853
- if seqtype not in { 'anat' , 'func' , 'dwi' , 'behav' , 'fmap' } :
870
+ datatype , datatype_suffix = split2 (split [0 ])
871
+ if datatype not in KNOWN_DATATYPES :
854
872
# It is not something we don't consume
855
873
if bids :
856
- lgr .warning ("It was instructed to be BIDS sequence but unknown "
857
- "type %s found" , seqtype )
874
+ lgr .warning ("It was instructed to be BIDS datatype but unknown "
875
+ "%s found. Known are: %s " , datatype , ', ' . join ( KNOWN_DATATYPES ) )
858
876
return {}
859
877
860
- regd = dict (seqtype = seqtype )
861
- if seqtype_label :
862
- regd ['seqtype_label ' ] = seqtype_label
878
+ regd = dict (datatype = datatype )
879
+ if datatype_suffix :
880
+ regd ['datatype_suffix ' ] = datatype_suffix
863
881
# now go through each to see if one which we care
864
882
bids_leftovers = []
865
883
for s in split [1 :]:
@@ -886,12 +904,12 @@ def split2(s):
886
904
# TODO: might want to check for all known "standard" BIDS suffixes here
887
905
# among bids_leftovers, thus serve some kind of BIDS validator
888
906
889
- # if not regd.get('seqtype_label ', None):
890
- # # might need to assign a default label for each seqtype if was not
907
+ # if not regd.get('datatype_suffix ', None):
908
+ # # might need to assign a default label for each datatype if was not
891
909
# # given
892
- # regd['seqtype_label '] = {
910
+ # regd['datatype_suffix '] = {
893
911
# 'func': 'bold'
894
- # }.get(regd['seqtype '], None)
912
+ # }.get(regd['datatype '], None)
895
913
896
914
return regd
897
915
@@ -900,7 +918,7 @@ def fixup_subjectid(subjectid):
900
918
"""Just in case someone managed to miss a zero or added an extra one"""
901
919
# make it lowercase
902
920
subjectid = subjectid .lower ()
903
- reg = re .match ("sid0*(\d+)$" , subjectid )
921
+ reg = re .match (r "sid0*(\d+)$" , subjectid )
904
922
if not reg :
905
923
# some completely other pattern
906
924
# just filter out possible _- in it
0 commit comments