Merge pull request #461 from tsalo/fix/updater-lists

yarikoptic · web-flow · commit 92eecfe23706 · 2022-02-17T21:14:28.000-05:00
FIX: Convert sets to lists for filename updaters
diff --git a/heudiconv/convert.py b/heudiconv/convert.py
@@ -2,9 +2,7 @@
 import os
 import os.path as op
 import logging
-from math import nan
 import shutil
-import sys
 import random
 import re
 
@@ -239,7 +237,7 @@ def prep_conversion(sid, dicoms, outdir, heuristic, converter, anon_sid,
                                     getattr(heuristic, 'DEFAULT_FIELDS', {}))
 
 
-def update_complex_name(metadata, filename, suffix):
+def update_complex_name(metadata, filename):
     """
     Insert `_part-<mag|phase>` entity into filename if data are from a
     sequence with magnitude/phase part.
@@ -250,14 +248,11 @@ def update_complex_name(metadata, filename, suffix):
         Scan metadata dictionary from BIDS sidecar file.
     filename : str
         Incoming filename
-    suffix : str
-        An index used for cases where a single scan produces multiple files,
-        but the differences between those files are unknown.
 
     Returns
     -------
     filename : str
-        Updated filename with rec entity added in appropriate position.
+        Updated filename with part entity added in appropriate position.
     """
     # Some scans separate magnitude/phase differently
     # A small note: _phase is deprecated, but this may add part-mag to
@@ -275,12 +270,12 @@ def update_complex_name(metadata, filename, suffix):
     elif 'P' in metadata.get('ImageType'):
         mag_or_phase = 'phase'
     else:
-        mag_or_phase = suffix
+        raise RuntimeError("Data type could not be inferred from the metadata.")
 
     # Determine scan suffix
     filetype = '_' + filename.split('_')[-1]
 
-    # Insert rec label
+    # Insert part label
     if not ('_part-%s' % mag_or_phase) in filename:
         # If "_part-" is specified, prepend the 'mag_or_phase' value.
         if '_part-' in filename:
@@ -290,7 +285,21 @@ def update_complex_name(metadata, filename, suffix):
             )
 
         # Insert it **before** the following string(s), whichever appears first.
-        for label in ['_recording', '_proc', '_space', filetype]:
+        # https://bids-specification.readthedocs.io/en/stable/99-appendices/09-entities.html
+        entities_after_part = [
+            "_proc",
+            "_hemi",
+            "_space",
+            "_split",
+            "_recording",
+            "_chunk",
+            "_res",
+            "_den",
+            "_label",
+            "_desc",
+            filetype,
+        ]
+        for label in entities_after_part:
             if (label == filetype) or (label in filename):
                 filename = filename.replace(
                     label, "_part-%s%s" % (mag_or_phase, label)
@@ -320,26 +329,52 @@ def update_multiecho_name(metadata, filename, echo_times):
     filename : str
         Updated filename with echo entity added, if appropriate.
     """
-    # Field maps separate echoes differently
+    # Field maps separate echoes differently, so do not attempt to update any filenames with these
+    # suffixes
     unsupported_types = [
         '_magnitude', '_magnitude1', '_magnitude2',
         '_phasediff', '_phase1', '_phase2', '_fieldmap'
     ]
     if any(ut in filename for ut in unsupported_types):
         return filename
 
-    # Get the EchoNumber from json file info.  If not present, use EchoTime
+    if not isinstance(echo_times, list):
+        raise TypeError(f'Argument "echo_times" must be a list, not a {type(echo_times)}')
+
+    # Get the EchoNumber from json file info.  If not present, use EchoTime.
     if 'EchoNumber' in metadata.keys():
         echo_number = metadata['EchoNumber']
-    else:
+    elif 'EchoTime' in metadata.keys():
         echo_number = echo_times.index(metadata['EchoTime']) + 1
+    else:
+        raise KeyError(
+            'Either "EchoNumber" or "EchoTime" must be in metadata keys. '
+            f'Keys detected: {metadata.keys()}'
+        )
 
     # Determine scan suffix
     filetype = '_' + filename.split('_')[-1]
 
     # Insert it **before** the following string(s), whichever appears first.
-    # https://bids-specification.readthedocs.io/en/stable/99-appendices/04-entity-table.html
-    for label in ['_flip', '_inv', '_mt', '_part', '_recording', '_proc', '_space', filetype]:
+    # https://bids-specification.readthedocs.io/en/stable/99-appendices/09-entities.html
+    entities_after_echo = [
+        "_flip",
+        "_inv",
+        "_mt",
+        "_part",
+        "_proc",
+        "_hemi",
+        "_space",
+        "_split",
+        "_recording",
+        "_chunk",
+        "_res",
+        "_den",
+        "_label",
+        "_desc",
+        filetype,
+    ]
+    for label in entities_after_echo:
         if (label == filetype) or (label in filename):
             filename = filename.replace(
                 label, "_echo-%s%s" % (echo_number, label)
@@ -374,6 +409,9 @@ def update_uncombined_name(metadata, filename, channel_names):
     if any(ut in filename for ut in unsupported_types):
         return filename
 
+    if not isinstance(channel_names, list):
+        raise TypeError(f'Argument "channel_names" must be a list, not a {type(channel_names)}')
+
     # Determine the channel number
     channel_number = ''.join([c for c in metadata['CoilString'] if c.isdigit()])
     if not channel_number:
@@ -385,7 +423,21 @@ def update_uncombined_name(metadata, filename, channel_names):
 
     # Insert it **before** the following string(s), whichever appears first.
     # Choosing to put channel near the end since it's not in the specification yet.
-    for label in ['_recording', '_proc', '_space', filetype]:
+    # See https://bids-specification.readthedocs.io/en/stable/99-appendices/09-entities.html
+    entities_after_ch = [
+        "_proc",
+        "_hemi",
+        "_space",
+        "_split",
+        "_recording",
+        "_chunk",
+        "_res",
+        "_den",
+        "_label",
+        "_desc",
+        filetype,
+    ]
+    for label in entities_after_ch:
         if (label == filetype) or (label in filename):
             filename = filename.replace(
                 label, "_ch-%s%s" % (channel_number, label)
@@ -731,12 +783,17 @@ def save_converted_files(res, item_dicoms, bids_options, outtype, prefix, outnam
         for metadata in bids_metas:
             if not metadata:
                 continue
-            echo_times.add(metadata.get('EchoTime', nan))
-            channel_names.add(metadata.get('CoilString', nan))
-            image_types.update(metadata.get('ImageType', [nan]))
+
+            # If the field is not available, fill that entry in the set with a False.
+            echo_times.add(metadata.get('EchoTime', False))
+            channel_names.add(metadata.get('CoilString', False))
+            image_types.update(metadata.get('ImageType', [False]))
+
         is_multiecho = len(set(filter(bool, echo_times))) > 1  # Check for varying echo times
         is_uncombined = len(set(filter(bool, channel_names))) > 1  # Check for uncombined data
         is_complex = 'M' in image_types and 'P' in image_types  # Determine if data are complex (magnitude + phase)
+        echo_times = sorted(echo_times)  # also converts to list
+        channel_names = sorted(channel_names)  # also converts to list
 
         ### Loop through the bids_files, set the output name and save files
         for fl, suffix, bids_file, bids_meta in zip(res_files, suffixes, bids_files, bids_metas):
@@ -756,7 +813,7 @@ def save_converted_files(res, item_dicoms, bids_options, outtype, prefix, outnam
 
                 if is_complex:
                     this_prefix_basename = update_complex_name(
-                        bids_meta, this_prefix_basename, suffix
+                        bids_meta, this_prefix_basename
                     )
 
                 if is_uncombined:
diff --git a/heudiconv/tests/test_convert.py b/heudiconv/tests/test_convert.py
@@ -4,86 +4,137 @@
 from glob import glob
 
 import pytest
-from .utils import TESTS_DATA_PATH
-
-from heudiconv.convert import (update_complex_name,
-                               update_multiecho_name,
-                               update_uncombined_name,
-                               DW_IMAGE_IN_FMAP_FOLDER_WARNING,
-                               )
 from heudiconv.bids import BIDSError
 from heudiconv.cli.run import main as runner
+from heudiconv.convert import (
+    DW_IMAGE_IN_FMAP_FOLDER_WARNING,
+    update_complex_name,
+    update_multiecho_name,
+    update_uncombined_name,
+)
+
+from .utils import TESTS_DATA_PATH
 
 
 def test_update_complex_name():
     """Unit testing for heudiconv.convert.update_complex_name(), which updates
     filenames with the part field if appropriate.
     """
     # Standard name update
-    fn = 'sub-X_ses-Y_task-Z_run-01_sbref'
+    base_fn = 'sub-X_ses-Y_task-Z_run-01_sbref'
     metadata = {'ImageType': ['ORIGINAL', 'PRIMARY', 'P', 'MB', 'TE3', 'ND', 'MOSAIC']}
-    suffix = 3
     out_fn_true = 'sub-X_ses-Y_task-Z_run-01_part-phase_sbref'
-    out_fn_test = update_complex_name(metadata, fn, suffix)
+    out_fn_test = update_complex_name(metadata, base_fn)
     assert out_fn_test == out_fn_true
+
     # Catch an unsupported type and *do not* update
-    fn = 'sub-X_ses-Y_task-Z_run-01_phase'
-    out_fn_test = update_complex_name(metadata, fn, suffix)
-    assert out_fn_test == fn
-    # Data type is missing from metadata so use suffix
-    fn = 'sub-X_ses-Y_task-Z_run-01_sbref'
+    base_fn = 'sub-X_ses-Y_task-Z_run-01_phase'
+    out_fn_test = update_complex_name(metadata, base_fn)
+    assert out_fn_test == base_fn
+
+    # Data type is missing from metadata so raise a RuntimeError
+    base_fn = 'sub-X_ses-Y_task-Z_run-01_sbref'
     metadata = {'ImageType': ['ORIGINAL', 'PRIMARY', 'MB', 'TE3', 'ND', 'MOSAIC']}
-    out_fn_true = 'sub-X_ses-Y_task-Z_run-01_part-3_sbref'
-    out_fn_test = update_complex_name(metadata, fn, suffix)
-    assert out_fn_test == out_fn_true
-    # Catch existing field with value that *does not match* metadata
-    # and raise Exception
-    fn = 'sub-X_ses-Y_task-Z_run-01_part-mag_sbref'
+    with pytest.raises(RuntimeError):
+        update_complex_name(metadata, base_fn)
+
+    # Catch existing field with value (part is already in the filename)
+    # that *does not match* metadata and raise Exception
+    base_fn = 'sub-X_ses-Y_task-Z_run-01_part-mag_sbref'
     metadata = {'ImageType': ['ORIGINAL', 'PRIMARY', 'P', 'MB', 'TE3', 'ND', 'MOSAIC']}
-    suffix = 3
     with pytest.raises(BIDSError):
-        assert update_complex_name(metadata, fn, suffix)
+        update_complex_name(metadata, base_fn)
+
+    # Catch existing field with value (part is already in the filename)
+    # that *does match* metadata and do not update
+    base_fn = 'sub-X_ses-Y_task-Z_run-01_part-phase_sbref'
+    metadata = {'ImageType': ['ORIGINAL', 'PRIMARY', 'P', 'MB', 'TE3', 'ND', 'MOSAIC']}
+    out_fn_test = update_complex_name(metadata, base_fn)
+    assert out_fn_test == base_fn
 
 
 def test_update_multiecho_name():
     """Unit testing for heudiconv.convert.update_multiecho_name(), which updates
     filenames with the echo field if appropriate.
     """
     # Standard name update
-    fn = 'sub-X_ses-Y_task-Z_run-01_bold'
+    base_fn = 'sub-X_ses-Y_task-Z_run-01_bold'
     metadata = {'EchoTime': 0.01,
                 'EchoNumber': 1}
     echo_times = [0.01, 0.02, 0.03]
     out_fn_true = 'sub-X_ses-Y_task-Z_run-01_echo-1_bold'
-    out_fn_test = update_multiecho_name(metadata, fn, echo_times)
+    out_fn_test = update_multiecho_name(metadata, base_fn, echo_times)
     assert out_fn_test == out_fn_true
+
     # EchoNumber field is missing from metadata, so use echo_times
     metadata = {'EchoTime': 0.01}
-    out_fn_test = update_multiecho_name(metadata, fn, echo_times)
+    out_fn_test = update_multiecho_name(metadata, base_fn, echo_times)
     assert out_fn_test == out_fn_true
+
     # Catch an unsupported type and *do not* update
-    fn = 'sub-X_ses-Y_task-Z_run-01_phasediff'
-    out_fn_test = update_multiecho_name(metadata, fn, echo_times)
-    assert out_fn_test == fn
+    base_fn = 'sub-X_ses-Y_task-Z_run-01_phasediff'
+    out_fn_test = update_multiecho_name(metadata, base_fn, echo_times)
+    assert out_fn_test == base_fn
+
+    # EchoTime is missing, but use EchoNumber (which is the first thing it checks)
+    base_fn = 'sub-X_ses-Y_task-Z_run-01_bold'
+    out_fn_true = 'sub-X_ses-Y_task-Z_run-01_echo-1_bold'
+    metadata = {'EchoNumber': 1}
+    echo_times = [False, 0.02, 0.03]
+    out_fn_test = update_multiecho_name(metadata, base_fn, echo_times)
+    assert out_fn_test == out_fn_true
+
+    # Both EchoTime and EchoNumber are missing, which raises a KeyError
+    base_fn = 'sub-X_ses-Y_task-Z_run-01_bold'
+    metadata = {}
+    echo_times = [False, 0.02, 0.03]
+    with pytest.raises(KeyError):
+        update_multiecho_name(metadata, base_fn, echo_times)
+
+    # Providing echo times as something other than a list should raise a TypeError
+    base_fn = 'sub-X_ses-Y_task-Z_run-01_bold'
+    with pytest.raises(TypeError):
+        update_multiecho_name(metadata, base_fn, set(echo_times))
 
 
 def test_update_uncombined_name():
     """Unit testing for heudiconv.convert.update_uncombined_name(), which updates
     filenames with the ch field if appropriate.
     """
     # Standard name update
-    fn = 'sub-X_ses-Y_task-Z_run-01_bold'
+    base_fn = 'sub-X_ses-Y_task-Z_run-01_bold'
     metadata = {'CoilString': 'H1'}
     channel_names = ['H1', 'H2', 'H3', 'HEA;HEP']
     out_fn_true = 'sub-X_ses-Y_task-Z_run-01_ch-01_bold'
-    out_fn_test = update_uncombined_name(metadata, fn, channel_names)
+    out_fn_test = update_uncombined_name(metadata, base_fn, channel_names)
     assert out_fn_test == out_fn_true
-    # CoilString field has no number in it
+
+    # CoilString field has no number in it, so we index the channel_names list
     metadata = {'CoilString': 'HEA;HEP'}
     out_fn_true = 'sub-X_ses-Y_task-Z_run-01_ch-04_bold'
-    out_fn_test = update_uncombined_name(metadata, fn, channel_names)
+    out_fn_test = update_uncombined_name(metadata, base_fn, channel_names)
     assert out_fn_test == out_fn_true
 
+    # Extract the number from the CoilString and use that
+    channel_names = ['H1', 'B1', 'H3', 'HEA;HEP']
+    metadata = {'CoilString': 'H1'}
+    out_fn_true = 'sub-X_ses-Y_task-Z_run-01_ch-01_bold'
+    out_fn_test = update_uncombined_name(metadata, base_fn, channel_names)
+    assert out_fn_test == out_fn_true
+
+    # NOTE: Extracting the number does not protect against multiple coils with the same number
+    # (but, say, different letters)
+    # Note that this is still "ch-01"
+    metadata = {'CoilString': 'B1'}
+    out_fn_true = 'sub-X_ses-Y_task-Z_run-01_ch-01_bold'
+    out_fn_test = update_uncombined_name(metadata, base_fn, channel_names)
+    assert out_fn_test == out_fn_true
+
+    # Providing echo times as something other than a list should raise a TypeError
+    base_fn = 'sub-X_ses-Y_task-Z_run-01_bold'
+    with pytest.raises(TypeError):
+        update_uncombined_name(metadata, base_fn, set(channel_names))
+
 
 def test_b0dwi_for_fmap(tmpdir, caplog):
     """Make sure we raise a warning when .bvec and .bval files