v2022.8.25 Major refactor and recoding. DICOM header based multiecho and recon-type handling. Improved auto run-numbering.

jmtyszka · jmtyszka · commit 6551e85a17e9 · 2022-08-25T12:14:48.000-07:00
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # BIDSKIT
 
-### Version 2022.8.24
+### Version 2022.8.25
 Python utilities for converting from DICOM to BIDS neuroimaging formats.
 
 The *bidskit* console command takes a directory tree containing imaging series from one or more subjects (eg T1w MPRAGE, BOLD EPI, Fieldmaps), converts the imaging data to Nifti-1 format with JSON metadata files (sidecars) and populates a directory tree according to the latest BIDS specification.
diff --git a/bidskit/__init__.py b/bidskit/__init__.py
@@ -0,0 +1,12 @@
+from .io import (
+    read_json,
+    write_json,
+    dcm_info,
+    parse_dcm2niix_fname,
+    parse_bids_fname_keyvals,
+    safe_copy,
+    safe_mkdir,
+    create_file_if_missing,
+    strip_extensions,
+    nii_to_json
+)
diff --git a/bidskit/dcm2niix.py b/bidskit/dcm2niix.py
@@ -122,7 +122,8 @@ def organize_series(
             # Load JSON sidecar metadata
             src_meta = bio.read_json(src_json_fname)
 
-            # dcm2niix replaces ' ' with '_' for series description in filenames. We must do the same
+            # DICOM series description string from BIDS sidecar
+            # For consistency with dcm2niix, replace spaces in DICOM SerDesc (eg ' RMS') with underscores
             ser_desc = src_meta['SeriesDescription'].replace(' ', '_')
 
             # Check if we're creating a new protocol dictionary
@@ -233,20 +234,18 @@ def handle_multiecho(work_json_fname, bids_json_fname, echo_flag, nii_ext):
         flag to add echo- key to filename (if necessary)
     """
 
-    # Isolate echo/part suffix (e*[_ph])
-    work_info = bio.parse_dcm2niix_fname(work_json_fname)
-    suffix = work_info['Suffix']
+    # Load BIDS sidecar metadata
+    bids_info = bio.read_json(work_json_fname)
 
-    # Default BIDS Nifti filename from JSON filename
-    bids_nii_fname = bids_json_fname.replace('.json', nii_ext)
+    # Init Nifti image fname
+    bids_nii_fname = bids_json_fname.replace('.json', '.nii.gz')
 
-    if suffix.startswith('e'):
+    # DICOM EchoNumber tag only present for multiecho sequences
+    if 'EchoNumber' in bids_info.keys():
 
-        print('    Multiple echoes detected')
+        echo_num = bids_info['EchoNumber']
 
-        # Split at '_' if present
-        chunks = suffix.split('_')
-        echo_num = int(chunks[0][1:])
+        print(f'    Multiple echoes detected')
         print(f'    Echo number {echo_num:d}')
 
         # Add an "echo-{echo_num}" key to the BIDS Nifti and JSON filenames
diff --git a/bidskit/io.py b/bidskit/io.py
@@ -207,7 +207,7 @@ def parse_bids_fname_keyvals(fname):
 
     # Split fname in containing directory and base name
     dname = os.path.dirname(fname)
-    bname = os.path.basename(fname)
+    bids_stub = os.path.basename(fname)
 
     # Init return dictionary with BIDS 1.1.1 valid key strings
     bids_keys = {
@@ -228,37 +228,56 @@ def parse_bids_fname_keyvals(fname):
 
     # Extract base filename and strip up to two extensions
     # Accounts for both '.nii' and '.nii.gz' variants
-    bname, ext1 = os.path.splitext(bname)
-    bname, ext2 = os.path.splitext(bname)
+    bids_stub, ext1 = os.path.splitext(bids_stub)
+    bids_stub, ext2 = os.path.splitext(bids_stub)
 
     # Remember full extension
     bids_keys['extension'] = ext2 + ext1
 
-    # Locate, record and remove final contrast suffix
+    #
+    # Logic for identifying, saving and trimming sequence type suffix (_bold, _T1w, etc)
+    #
+
+    # Check for recon variants keys at end of basename
+    # These can have leading '_' or ' ' eg 'acq-mez_T1w RMS' and 'task-rest_bold_SBRef'
+
+    recon_list = ['SBRef', 'RMS']
+    recon_key = ''
+
+    # DEBUG
+    if bids_stub.endswith('RMS'):
+        pass
+
+    for recon_str in recon_list:
+        if bids_stub.endswith(recon_str):
+            recon_start = bids_stub.rfind(recon_str)
+            recon_key = recon_str
+            bids_stub = bids_stub[:(recon_start-1)]
 
-    # Find position of first underscore from right of basename
-    suffix_start = bname.rfind('_') + 1
-    draft_suffix = bname[suffix_start:]
+    # Find position of last underscore in basename
+    last_underscore = bids_stub.rfind('_')
 
-    # Handle special case of no suffix, only key-value pairs
-    if '-' in draft_suffix or len(draft_suffix) < 1:
+    if last_underscore < 0:
 
-        # Leave suffix empty in dict
+        # No underscores found in bids_stub - set empty suffix
         bids_keys['suffix'] = ''
 
     else:
 
-        # Handle double suffices introduced by some Siemens research sequences
-        # eg *_bold_SBRef and *_T1w_RMS
-        # This code is only relevant when parsing ReproIn style series descriptions through this function
-        if bname.endswith('SBRef') or bname.endswith('RMS'):
-            # Find the second underscore in from the right
-            tmp = bname[:(suffix_start-1)]
-            suffix_start = tmp.rfind('_') + 1
+        suffix_start = bids_stub.rfind('_') + 1
+        draft_suffix = bids_stub[suffix_start:]
+
+        # Handle special case of no suffix, only key-value pairs
+        if '-' in draft_suffix or len(draft_suffix) < 1:
+
+            # Leave suffix empty in dict
+            bids_keys['suffix'] = ''
+
+        else:
 
-        # Split basename into prefix and suffix
-        bids_keys['suffix'] = bname[suffix_start:]
-        bname = bname[:suffix_start]
+            # Split basename into prefix and suffix
+            bids_keys['suffix'] = bids_stub[suffix_start:]
+            bids_stub = bids_stub[:(suffix_start-1)]
 
     # Divide filename into keys and values
     # Value segments are delimited by '<key>-' strings
@@ -273,7 +292,7 @@ def parse_bids_fname_keyvals(fname):
 
         key_str = key + '-'
 
-        i0 = bname.find(key_str)
+        i0 = bids_stub.find(key_str)
         if i0 > -1:
             i1 = i0 + len(key_str)
             key_name.append(key)
@@ -300,19 +319,16 @@ def parse_bids_fname_keyvals(fname):
 
         # Catch negative vend (only happens for final key-value without suffix)
         if vend < 0:
-            bids_keys[kname] = bname[vstart:]
+            bids_keys[kname] = bids_stub[vstart:]
         else:
-            bids_keys[kname] = bname[vstart:vend]
+            bids_keys[kname] = bids_stub[vstart:vend]
 
     # Tidy up Siemens recon extensions
-    # Only relevant when using this function to parse ReproIn-style series descriptions
-    if bids_keys['suffix'].endswith('SBRef'):
+    if 'SBRef' in recon_key:
         # Replace entire double suffix with 'sbref'
         bids_keys['suffix'] = 'sbref'
 
-    if bids_keys['suffix'].endswith('RMS'):
-        # Retain left part of double suffix ('T1w', etc)
-        bids_keys['suffix'] = bids_keys['suffix'].split('_')[0]
+    if 'RMS' in recon_key:
         # Add 'rms' to acq key
         bids_keys['acq'] = bids_keys['acq'] + 'rms'
 
diff --git a/bidskit/translate.py b/bidskit/translate.py
@@ -29,10 +29,9 @@
 from .io import (read_json,
                  write_json,
                  parse_bids_fname_keyvals,
-                 parse_dcm2niix_fname,
                  safe_copy,
                  create_file_if_missing,
-                 strip_extensions)
+                 nii_to_json)
 
 
 def add_participant_record(studydir, subject, age, sex):
@@ -141,15 +140,15 @@ def purpose_handling(bids_meta,
 
         print('    Identifying fieldmap image type')
 
-        if scan_seq == 'GR':
+        if 'GR' in scan_seq:
 
             print('    Gradient echo fieldmap detected')
             print('    Identifying magnitude and phase images')
 
             # Update BIDS filenames according to BIDS Fieldmap Case (1 or 2 - see specification)
             bids_nii_fname, bids_json_fname = fmaps.handle_fmap_case(work_json_fname, bids_nii_fname, bids_json_fname)
 
-        elif scan_seq == 'EP':
+        elif 'EP' in scan_seq:
 
             print('    EPI fieldmap detected')
 
@@ -164,7 +163,7 @@ def purpose_handling(bids_meta,
 
     elif bids_purpose == 'anat':
 
-        if scan_seq == 'GR_IR':
+        if 'GR' in scan_seq and 'IR' in scan_seq:
 
             print('    IR-prepared GRE detected - likely T1w MPRAGE or MEMPRAGE')
 
@@ -180,13 +179,13 @@ def purpose_handling(bids_meta,
             bids_nii_fname, bids_json_fname = d2n.handle_bias_recon(
                 work_json_fname, bids_json_fname, key_flags['Recon'], nii_ext)
 
-        elif scan_seq == 'SE':
+        elif 'SE' in scan_seq:
 
             print('    Spin echo detected - likely T1w or T2w anatomic image')
             bids_nii_fname, bids_json_fname = d2n.handle_bias_recon(
                 work_json_fname, bids_json_fname, key_flags['Recon'], nii_ext)
 
-        elif scan_seq == 'GR':
+        elif 'GR' in scan_seq:
 
             print('    Gradient echo detected')
 
@@ -345,7 +344,7 @@ def bids_legalize_keys(keys):
     return keys
 
 
-def auto_run_no(file_list, prot_dict):
+def auto_run_no(d2n_nii_list, prot_dict):
     """
     Search for duplicate series names in dcm2niix output file list
     Return inferred run numbers accounting for duplication and multiple recons from single acquisition
@@ -358,8 +357,8 @@ def auto_run_no(file_list, prot_dict):
     - If no duplicates of a given series are found, drop the run- key from the BIDS filename
     - Current dcm2niix version: 1.0.20211006
 
-    :param file_list: list of str
-        Nifti file name list
+    :param d2n_nii_list: list of str
+        dcm2niix output Nifti filename list
     :param prot_dict: dictionary
         Protocol translation dictionary
     :return: run_num, array of int
@@ -368,17 +367,22 @@ def auto_run_no(file_list, prot_dict):
     # Construct list of series descriptions and original numbers from file names
     series_id_list = []
 
-    for fname in file_list:
+    # Loop over all
+    for nii_fname in d2n_nii_list:
 
-        # Parse dcm2niix filename into relevant keys, including suffix
-        info = parse_dcm2niix_fname(fname)
+        # Load JSON sidecar for this Nifti image
+        json_fname = nii_to_json(nii_fname, '.nii.gz')
+        bids_info = read_json(json_fname)
 
-        ser_desc = info['SerDesc']
-        echo_no = info['EchoNo']
-        suffix = info['Suffix']
+        ser_desc = bids_info['SeriesDescription'].replace(' ', '_')
+        if 'EchoNumber' in bids_info.keys():
+            echo_no = bids_info['EchoNumber']
+        else:
+            echo_no = 1
+        recon_type = '-'.join(bids_info['ImageType'])
 
         if ser_desc in prot_dict:
-            _, bids_stub, _ = prot_dict[info['SerDesc']]
+            _, bids_stub, _ = prot_dict[ser_desc]
         else:
             print('')
             print('* Series description {} missing from code/Protocol_Translator.json'.format(ser_desc))
@@ -387,7 +391,7 @@ def auto_run_no(file_list, prot_dict):
             sys.exit(1)
 
         # Construct a unique series identifier including echo number and suffix
-        series_id = f"{bids_stub}_{echo_no}_{suffix}"
+        series_id = f"{bids_stub}_ECHO{echo_no}_{recon_type}"
 
         # Add to list
         series_id_list.append(series_id)
@@ -396,7 +400,7 @@ def auto_run_no(file_list, prot_dict):
     unique_series_ids = set(series_id_list)
 
     # Init vector of run numbers and max run numbers for each series
-    run_no = np.zeros(len(file_list)).astype(int)
+    run_no = np.zeros(len(d2n_nii_list)).astype(int)
 
     # Loop over unique series descriptions
     for unique_series_id in unique_series_ids:
diff --git a/setup.py b/setup.py
@@ -45,7 +45,7 @@
     # For a discussion on single-sourcing the version across setup.py and the
     # project code, see
     # https://packaging.python.org/en/latest/single_source_version.html
-    version='2022.8.24',  # Required
+    version='2022.8.25',  # Required
 
     # This is a one-line description or tagline of what your project does. This
     # corresponds to the "Summary" metadata field: