Merge remote-tracking branch 'origin/master' into adds_populate_intended_for

yarikoptic · yarikoptic · commit 97b76f3b31a8 · 2022-02-23T13:17:07.000-05:00
* origin/master:
  Update test to match new behavior.
  Drop suffix parameter from update_complex_name().
  Remove unused imports.
  Enhance lists of entities in filename updaters.
  Use "base_fn" instead of "fn" in tests.
  Rename suffix to suffix_counter in update_complex_name.
  Work on fixing issues with multi-file converters.
  BF: Fix the order of the 'echo' entity in the filename
  try a simple fix for wrongly ordered files in tar file
  Use False instead of NaN bc filter wasn't catching NaNs.
  Convert variables to lists and check for lists in functions.

Conflicts:
	heudiconv/tests/test_convert.py -- just in imports
diff --git a/heudiconv/convert.py b/heudiconv/convert.py
@@ -2,9 +2,7 @@
 import os
 import os.path as op
 import logging
-from math import nan
 import shutil
-import sys
 import random
 import re
 
@@ -241,7 +239,7 @@ def prep_conversion(sid, dicoms, outdir, heuristic, converter, anon_sid,
                                     getattr(heuristic, 'DEFAULT_FIELDS', {}))
 
 
-def update_complex_name(metadata, filename, suffix):
+def update_complex_name(metadata, filename):
     """
     Insert `_part-<mag|phase>` entity into filename if data are from a
     sequence with magnitude/phase part.
@@ -252,14 +250,11 @@ def update_complex_name(metadata, filename, suffix):
         Scan metadata dictionary from BIDS sidecar file.
     filename : str
         Incoming filename
-    suffix : str
-        An index used for cases where a single scan produces multiple files,
-        but the differences between those files are unknown.
 
     Returns
     -------
     filename : str
-        Updated filename with rec entity added in appropriate position.
+        Updated filename with part entity added in appropriate position.
     """
     # Some scans separate magnitude/phase differently
     # A small note: _phase is deprecated, but this may add part-mag to
@@ -277,12 +272,12 @@ def update_complex_name(metadata, filename, suffix):
     elif 'P' in metadata.get('ImageType'):
         mag_or_phase = 'phase'
     else:
-        mag_or_phase = suffix
+        raise RuntimeError("Data type could not be inferred from the metadata.")
 
     # Determine scan suffix
     filetype = '_' + filename.split('_')[-1]
 
-    # Insert rec label
+    # Insert part label
     if not ('_part-%s' % mag_or_phase) in filename:
         # If "_part-" is specified, prepend the 'mag_or_phase' value.
         if '_part-' in filename:
@@ -292,7 +287,21 @@ def update_complex_name(metadata, filename, suffix):
             )
 
         # Insert it **before** the following string(s), whichever appears first.
-        for label in ['_recording', '_proc', '_space', filetype]:
+        # https://bids-specification.readthedocs.io/en/stable/99-appendices/09-entities.html
+        entities_after_part = [
+            "_proc",
+            "_hemi",
+            "_space",
+            "_split",
+            "_recording",
+            "_chunk",
+            "_res",
+            "_den",
+            "_label",
+            "_desc",
+            filetype,
+        ]
+        for label in entities_after_part:
             if (label == filetype) or (label in filename):
                 filename = filename.replace(
                     label, "_part-%s%s" % (mag_or_phase, label)
@@ -322,25 +331,52 @@ def update_multiecho_name(metadata, filename, echo_times):
     filename : str
         Updated filename with echo entity added, if appropriate.
     """
-    # Field maps separate echoes differently
+    # Field maps separate echoes differently, so do not attempt to update any filenames with these
+    # suffixes
     unsupported_types = [
         '_magnitude', '_magnitude1', '_magnitude2',
         '_phasediff', '_phase1', '_phase2', '_fieldmap'
     ]
     if any(ut in filename for ut in unsupported_types):
         return filename
 
-    # Get the EchoNumber from json file info.  If not present, use EchoTime
+    if not isinstance(echo_times, list):
+        raise TypeError(f'Argument "echo_times" must be a list, not a {type(echo_times)}')
+
+    # Get the EchoNumber from json file info.  If not present, use EchoTime.
     if 'EchoNumber' in metadata.keys():
         echo_number = metadata['EchoNumber']
-    else:
+    elif 'EchoTime' in metadata.keys():
         echo_number = echo_times.index(metadata['EchoTime']) + 1
+    else:
+        raise KeyError(
+            'Either "EchoNumber" or "EchoTime" must be in metadata keys. '
+            f'Keys detected: {metadata.keys()}'
+        )
 
     # Determine scan suffix
     filetype = '_' + filename.split('_')[-1]
 
     # Insert it **before** the following string(s), whichever appears first.
-    for label in ['_recording', '_proc', '_space', filetype]:
+    # https://bids-specification.readthedocs.io/en/stable/99-appendices/09-entities.html
+    entities_after_echo = [
+        "_flip",
+        "_inv",
+        "_mt",
+        "_part",
+        "_proc",
+        "_hemi",
+        "_space",
+        "_split",
+        "_recording",
+        "_chunk",
+        "_res",
+        "_den",
+        "_label",
+        "_desc",
+        filetype,
+    ]
+    for label in entities_after_echo:
         if (label == filetype) or (label in filename):
             filename = filename.replace(
                 label, "_echo-%s%s" % (echo_number, label)
@@ -375,6 +411,9 @@ def update_uncombined_name(metadata, filename, channel_names):
     if any(ut in filename for ut in unsupported_types):
         return filename
 
+    if not isinstance(channel_names, list):
+        raise TypeError(f'Argument "channel_names" must be a list, not a {type(channel_names)}')
+
     # Determine the channel number
     channel_number = ''.join([c for c in metadata['CoilString'] if c.isdigit()])
     if not channel_number:
@@ -386,7 +425,21 @@ def update_uncombined_name(metadata, filename, channel_names):
 
     # Insert it **before** the following string(s), whichever appears first.
     # Choosing to put channel near the end since it's not in the specification yet.
-    for label in ['_recording', '_proc', '_space', filetype]:
+    # See https://bids-specification.readthedocs.io/en/stable/99-appendices/09-entities.html
+    entities_after_ch = [
+        "_proc",
+        "_hemi",
+        "_space",
+        "_split",
+        "_recording",
+        "_chunk",
+        "_res",
+        "_den",
+        "_label",
+        "_desc",
+        filetype,
+    ]
+    for label in entities_after_ch:
         if (label == filetype) or (label in filename):
             filename = filename.replace(
                 label, "_ch-%s%s" % (channel_number, label)
@@ -756,12 +809,17 @@ def save_converted_files(res, item_dicoms, bids_options, outtype, prefix, outnam
         for metadata in bids_metas:
             if not metadata:
                 continue
-            echo_times.add(metadata.get('EchoTime', nan))
-            channel_names.add(metadata.get('CoilString', nan))
-            image_types.update(metadata.get('ImageType', [nan]))
+
+            # If the field is not available, fill that entry in the set with a False.
+            echo_times.add(metadata.get('EchoTime', False))
+            channel_names.add(metadata.get('CoilString', False))
+            image_types.update(metadata.get('ImageType', [False]))
+
         is_multiecho = len(set(filter(bool, echo_times))) > 1  # Check for varying echo times
         is_uncombined = len(set(filter(bool, channel_names))) > 1  # Check for uncombined data
         is_complex = 'M' in image_types and 'P' in image_types  # Determine if data are complex (magnitude + phase)
+        echo_times = sorted(echo_times)  # also converts to list
+        channel_names = sorted(channel_names)  # also converts to list
 
         ### Loop through the bids_files, set the output name and save files
         for fl, suffix, bids_file, bids_meta in zip(res_files, suffixes, bids_files, bids_metas):
@@ -781,7 +839,7 @@ def save_converted_files(res, item_dicoms, bids_options, outtype, prefix, outnam
 
                 if is_complex:
                     this_prefix_basename = update_complex_name(
-                        bids_meta, this_prefix_basename, suffix
+                        bids_meta, this_prefix_basename
                     )
 
                 if is_uncombined:
diff --git a/heudiconv/parser.py b/heudiconv/parser.py
@@ -97,7 +97,7 @@ def get_extracted_dicoms(fl):
         for tm in tmembers:
             tm.mode = 0o700
         # get all files, assemble full path in tmp dir
-        tf_content = [m.name for m in tmembers if m.isfile()]
+        tf_content = sorted([m.name for m in tmembers if m.isfile()])
         # store full paths to each file, so we don't need to drag along
         # tmpdir as some basedir
         sessions[session] = [op.join(tmpdir, f) for f in tf_content]
diff --git a/heudiconv/tests/test_convert.py b/heudiconv/tests/test_convert.py
@@ -4,88 +4,139 @@
 from glob import glob
 
 import pytest
-from .utils import TESTS_DATA_PATH
-
-from heudiconv.convert import (update_complex_name,
-                               update_multiecho_name,
-                               update_uncombined_name,
-                               DW_IMAGE_IN_FMAP_FOLDER_WARNING,
-                               )
 import heudiconv.convert
 from heudiconv.bids import BIDSError
 from heudiconv.utils import load_heuristic
 from heudiconv.cli.run import main as runner
+from heudiconv.convert import (
+    DW_IMAGE_IN_FMAP_FOLDER_WARNING,
+    update_complex_name,
+    update_multiecho_name,
+    update_uncombined_name,
+)
+
+from .utils import TESTS_DATA_PATH
 
 
 def test_update_complex_name():
     """Unit testing for heudiconv.convert.update_complex_name(), which updates
     filenames with the part field if appropriate.
     """
     # Standard name update
-    fn = 'sub-X_ses-Y_task-Z_run-01_sbref'
+    base_fn = 'sub-X_ses-Y_task-Z_run-01_sbref'
     metadata = {'ImageType': ['ORIGINAL', 'PRIMARY', 'P', 'MB', 'TE3', 'ND', 'MOSAIC']}
-    suffix = 3
     out_fn_true = 'sub-X_ses-Y_task-Z_run-01_part-phase_sbref'
-    out_fn_test = update_complex_name(metadata, fn, suffix)
+    out_fn_test = update_complex_name(metadata, base_fn)
     assert out_fn_test == out_fn_true
+
     # Catch an unsupported type and *do not* update
-    fn = 'sub-X_ses-Y_task-Z_run-01_phase'
-    out_fn_test = update_complex_name(metadata, fn, suffix)
-    assert out_fn_test == fn
-    # Data type is missing from metadata so use suffix
-    fn = 'sub-X_ses-Y_task-Z_run-01_sbref'
+    base_fn = 'sub-X_ses-Y_task-Z_run-01_phase'
+    out_fn_test = update_complex_name(metadata, base_fn)
+    assert out_fn_test == base_fn
+
+    # Data type is missing from metadata so raise a RuntimeError
+    base_fn = 'sub-X_ses-Y_task-Z_run-01_sbref'
     metadata = {'ImageType': ['ORIGINAL', 'PRIMARY', 'MB', 'TE3', 'ND', 'MOSAIC']}
-    out_fn_true = 'sub-X_ses-Y_task-Z_run-01_part-3_sbref'
-    out_fn_test = update_complex_name(metadata, fn, suffix)
-    assert out_fn_test == out_fn_true
-    # Catch existing field with value that *does not match* metadata
-    # and raise Exception
-    fn = 'sub-X_ses-Y_task-Z_run-01_part-mag_sbref'
+    with pytest.raises(RuntimeError):
+        update_complex_name(metadata, base_fn)
+
+    # Catch existing field with value (part is already in the filename)
+    # that *does not match* metadata and raise Exception
+    base_fn = 'sub-X_ses-Y_task-Z_run-01_part-mag_sbref'
     metadata = {'ImageType': ['ORIGINAL', 'PRIMARY', 'P', 'MB', 'TE3', 'ND', 'MOSAIC']}
-    suffix = 3
     with pytest.raises(BIDSError):
-        assert update_complex_name(metadata, fn, suffix)
+        update_complex_name(metadata, base_fn)
+
+    # Catch existing field with value (part is already in the filename)
+    # that *does match* metadata and do not update
+    base_fn = 'sub-X_ses-Y_task-Z_run-01_part-phase_sbref'
+    metadata = {'ImageType': ['ORIGINAL', 'PRIMARY', 'P', 'MB', 'TE3', 'ND', 'MOSAIC']}
+    out_fn_test = update_complex_name(metadata, base_fn)
+    assert out_fn_test == base_fn
 
 
 def test_update_multiecho_name():
     """Unit testing for heudiconv.convert.update_multiecho_name(), which updates
     filenames with the echo field if appropriate.
     """
     # Standard name update
-    fn = 'sub-X_ses-Y_task-Z_run-01_bold'
+    base_fn = 'sub-X_ses-Y_task-Z_run-01_bold'
     metadata = {'EchoTime': 0.01,
                 'EchoNumber': 1}
     echo_times = [0.01, 0.02, 0.03]
     out_fn_true = 'sub-X_ses-Y_task-Z_run-01_echo-1_bold'
-    out_fn_test = update_multiecho_name(metadata, fn, echo_times)
+    out_fn_test = update_multiecho_name(metadata, base_fn, echo_times)
     assert out_fn_test == out_fn_true
+
     # EchoNumber field is missing from metadata, so use echo_times
     metadata = {'EchoTime': 0.01}
-    out_fn_test = update_multiecho_name(metadata, fn, echo_times)
+    out_fn_test = update_multiecho_name(metadata, base_fn, echo_times)
     assert out_fn_test == out_fn_true
+
     # Catch an unsupported type and *do not* update
-    fn = 'sub-X_ses-Y_task-Z_run-01_phasediff'
-    out_fn_test = update_multiecho_name(metadata, fn, echo_times)
-    assert out_fn_test == fn
+    base_fn = 'sub-X_ses-Y_task-Z_run-01_phasediff'
+    out_fn_test = update_multiecho_name(metadata, base_fn, echo_times)
+    assert out_fn_test == base_fn
+
+    # EchoTime is missing, but use EchoNumber (which is the first thing it checks)
+    base_fn = 'sub-X_ses-Y_task-Z_run-01_bold'
+    out_fn_true = 'sub-X_ses-Y_task-Z_run-01_echo-1_bold'
+    metadata = {'EchoNumber': 1}
+    echo_times = [False, 0.02, 0.03]
+    out_fn_test = update_multiecho_name(metadata, base_fn, echo_times)
+    assert out_fn_test == out_fn_true
+
+    # Both EchoTime and EchoNumber are missing, which raises a KeyError
+    base_fn = 'sub-X_ses-Y_task-Z_run-01_bold'
+    metadata = {}
+    echo_times = [False, 0.02, 0.03]
+    with pytest.raises(KeyError):
+        update_multiecho_name(metadata, base_fn, echo_times)
+
+    # Providing echo times as something other than a list should raise a TypeError
+    base_fn = 'sub-X_ses-Y_task-Z_run-01_bold'
+    with pytest.raises(TypeError):
+        update_multiecho_name(metadata, base_fn, set(echo_times))
 
 
 def test_update_uncombined_name():
     """Unit testing for heudiconv.convert.update_uncombined_name(), which updates
     filenames with the ch field if appropriate.
     """
     # Standard name update
-    fn = 'sub-X_ses-Y_task-Z_run-01_bold'
+    base_fn = 'sub-X_ses-Y_task-Z_run-01_bold'
     metadata = {'CoilString': 'H1'}
     channel_names = ['H1', 'H2', 'H3', 'HEA;HEP']
     out_fn_true = 'sub-X_ses-Y_task-Z_run-01_ch-01_bold'
-    out_fn_test = update_uncombined_name(metadata, fn, channel_names)
+    out_fn_test = update_uncombined_name(metadata, base_fn, channel_names)
     assert out_fn_test == out_fn_true
-    # CoilString field has no number in it
+
+    # CoilString field has no number in it, so we index the channel_names list
     metadata = {'CoilString': 'HEA;HEP'}
     out_fn_true = 'sub-X_ses-Y_task-Z_run-01_ch-04_bold'
-    out_fn_test = update_uncombined_name(metadata, fn, channel_names)
+    out_fn_test = update_uncombined_name(metadata, base_fn, channel_names)
     assert out_fn_test == out_fn_true
 
+    # Extract the number from the CoilString and use that
+    channel_names = ['H1', 'B1', 'H3', 'HEA;HEP']
+    metadata = {'CoilString': 'H1'}
+    out_fn_true = 'sub-X_ses-Y_task-Z_run-01_ch-01_bold'
+    out_fn_test = update_uncombined_name(metadata, base_fn, channel_names)
+    assert out_fn_test == out_fn_true
+
+    # NOTE: Extracting the number does not protect against multiple coils with the same number
+    # (but, say, different letters)
+    # Note that this is still "ch-01"
+    metadata = {'CoilString': 'B1'}
+    out_fn_true = 'sub-X_ses-Y_task-Z_run-01_ch-01_bold'
+    out_fn_test = update_uncombined_name(metadata, base_fn, channel_names)
+    assert out_fn_test == out_fn_true
+
+    # Providing echo times as something other than a list should raise a TypeError
+    base_fn = 'sub-X_ses-Y_task-Z_run-01_bold'
+    with pytest.raises(TypeError):
+        update_uncombined_name(metadata, base_fn, set(channel_names))
+
 
 def test_b0dwi_for_fmap(tmpdir, caplog):
     """Make sure we raise a warning when .bvec and .bval files