new structure, names, tests

chrispycheng · chrispycheng · commit f476c48062af · 2018-07-04T00:24:48.000-04:00
diff --git a/nibabel/cmdline/diff.py b/nibabel/cmdline/diff.py
@@ -44,95 +44,175 @@ def get_opt_parser():
     return p
 
 
-def diff_values(compare1, compare2):
+def diff_values(first_item, second_item):
     """Generically compares two values, returns true if different"""
-    if np.any(compare1 != compare2):
+    if np.any(first_item != second_item):  # comparing items that are instances of class np.ndarray
         return True
-    elif type(compare1) != type(compare2):
+
+    elif type(first_item) != type(second_item):  # comparing items that differ in data type
         return True
-    else:
-        return compare1 != compare2
 
+    else:  # all other use cases
+        return first_item != second_item
+
+
+def diff_headers(files, fields):
+    """Iterates over all header fields of all files to find those that differ
+
+        Parameters
+        ----------
+        files: a given list of files to be compared
+        fields: the fields to be compared
+
+        Returns
+        -------
+        list
+          header fields whose values differ across files
+        """
+
+    headers = []
+
+    for f in range(len(files)):  # for each file
+        for h in fields:  # for each header
+
+            # each maneuver is encased in a try block after exceptions have previously occurred
+            # get the particular header field within the particular file
+
+            try:
+                field = files[f][h]
+
+            except ValueError:
+                continue
+
+            # filter numpy arrays with a NaN value
+            try:
+                if np.all(np.isnan(field)):
+                    continue
+
+            except TypeError:
+                pass
+
+            # compare current file with other files
+            for i in files[f+1:]:
+                other_field = i[h]
+
+                # sometimes field.item doesn't work
+                try:
+                    # converting bytes to be compared as strings
+                    if isinstance(field.item(0), bytes):
+                        field = field.item(0).decode("utf-8")
+
+                    # converting np.ndarray to lists to remove ambiguity
+                    if isinstance(field, np.ndarray):
+                        field = field.tolist()
+
+                    if isinstance(other_field.item(0), bytes):
+                        other_field = other_field.item(0).decode("utf-8")
+                    if isinstance(other_field, np.ndarray):
+                        other_field = other_field.tolist()
 
-def diff_header_fields(key, inputs):
-    """Iterates over a single header field of multiple files"""
+                except AttributeError:
+                    continue
+
+                # if the header values of the two files are different, append
+                if diff_values(field, other_field):
+                    headers.append(h)
+
+    if headers:  # return a list of headers for the files whose values differ
+        return headers
+
+
+def diff_header_fields(header_field, files):
+    """Iterates over a single header field of multiple files
+
+    Parameters
+    ----------
+    header_field: a given header field
+    files: the files to be compared
+
+    Returns
+    -------
+    list
+      str for each value corresponding to each file's given header field
+    """
 
     keyed_inputs = []
 
-    for i in inputs:  # stores each file's respective header files
+    for i in files:
+
+        # each maneuver is encased in a try block after exceptions have previously occurred
+        # get the particular header field within the particular file
+
         try:
-            field_value = i[key]
+            field_value = i[header_field]
         except ValueError:
             continue
 
-        try:  # filter numpy arrays
-            if np.all(np.isnan(field_value)):
-                continue
-        except TypeError:
-            pass
-
-        for x in inputs[1:]:  # compare different values, print all as soon as diff is found
+        # compare different data types, return all values as soon as diff is found
+        for x in files[1:]:
             try:
-                data_diff = diff_values(str(x[key].dtype), str(field_value.dtype))
+                data_diff = diff_values(str(x[header_field].dtype), str(field_value.dtype))
 
                 if data_diff:
                     break
             except ValueError:
                 continue
 
+        # string formatting of responses
         try:
-            if data_diff:  # prints data types if they're different and not if they're not
+
+            # if differences are found among data types
+            if data_diff:
+                # accounting for how to arrange arrays
                 if field_value.ndim < 1:
                     keyed_inputs.append("{}@{}".format(field_value, field_value.dtype))
                 elif field_value.ndim == 1:
                     keyed_inputs.append("{}@{}".format(list(field_value), field_value.dtype))
+
+            # if no differences are found among data types
             else:
                 if field_value.ndim < 1:
-                    keyed_inputs.append("{}".format(field_value))
+                    keyed_inputs.append(field_value)
                 elif field_value.ndim == 1:
-                    keyed_inputs.append("{}".format(list(field_value)))
+                    keyed_inputs.append(list(field_value))
+
         except UnboundLocalError:
             continue
 
-    if keyed_inputs:  # sometimes keyed_inputs is empty lol
-        comparison_input = keyed_inputs[0]
+    for i in range(len(keyed_inputs)):
+        keyed_inputs[i] = str(keyed_inputs[i])
 
-        for i in keyed_inputs[1:]:
-            if diff_values(comparison_input, i):
-                return keyed_inputs
+    return keyed_inputs
 
 
-def get_headers_diff(files, opts):
+def get_headers_diff(file_headers, headers):
     """Get difference between headers
 
     Parameters
     ----------
-    files: list of files
-    opts: any options included from the command line
+    file_headers: list of actual headers from files
+    headers: list of header fields that differ
 
     Returns
     -------
     dict
-      str: list  for each header field which differs, return list of
+      str: list for each header field which differs, return list of
       values per each file
     """
-
-    header_list = [nib.load(f).header for f in files]
     output = OrderedDict()
 
-    if opts.header_fields:  # will almost always have a header field
-        # signals "all fields"
-        if opts.header_fields == 'all':
-            # TODO: header fields might vary across file types, thus prior sensing would be needed
-            header_fields = header_list[0].keys()
-        else:
-            header_fields = opts.header_fields.split(',')
+    # if there are headers that differ
+    if headers:
 
-        for f in header_fields:
-            val = diff_header_fields(f, header_list)
+        # for each header
+        for header in headers:
 
+            # find the values corresponding to the files that differ
+            val = diff_header_fields(header, file_headers)
+
+            # store these values in a dictionary
             if val:
-                output[f] = val
+                output[header] = val
 
     return output
 
@@ -164,8 +244,19 @@ def main():
         # suppress nibabel format-compliance warnings
         nib.imageglobals.logger.level = 50
 
-    diff = get_headers_diff(files, opts)
+    file_headers = [nib.load(f).header for f in files]
+
+    if opts.header_fields:  # will almost always have a header field
+        # signals "all fields"
+        if opts.header_fields == 'all':
+            # TODO: header fields might vary across file types, thus prior sensing would be needed
+            header_fields = file_headers[0].keys()
+        else:
+            header_fields = opts.header_fields.split(',')
+    headers = diff_headers(file_headers, header_fields)
+    diff = get_headers_diff(file_headers, headers)
     data_diff = get_data_md5sums(files)
+
     if data_diff:
         diff['DATA(md5)'] = data_diff
 
diff --git a/nibabel/cmdline/tests/test_utils.py b/nibabel/cmdline/tests/test_utils.py
@@ -11,7 +11,14 @@
 from nose.tools import (assert_true, assert_false, assert_raises,
                         assert_equal, assert_not_equal)
 
+import nibabel as nib
 from nibabel.cmdline.utils import *
+from nibabel.cmdline.diff import diff_header_fields, diff_headers
+from os.path import (dirname, join as pjoin, abspath, splitext, basename,
+                     exists)
+
+
+DATA_PATH = abspath(pjoin(dirname(__file__), '../../tests/data'))
 
 
 def test_table2string():
@@ -42,3 +49,26 @@ def get_test(self):
 
     assert_equal(safe_get(test, "test"), 2)
     assert_equal(safe_get(test, "failtest"), "-")
+
+
+def test_diff_headers():
+    fnames = [pjoin(DATA_PATH, f)
+              for f in ('standard.nii.gz', 'example4d.nii.gz')]
+    file_headers = [nib.load(f).header for f in fnames]
+    headers = ['sizeof_hdr', 'data_type', 'db_name', 'extents', 'session_error', 'regular', 'dim_info', 'dim', 'intent_p1',
+     'intent_p2', 'intent_p3', 'intent_code', 'datatype', 'bitpix', 'slice_start', 'pixdim', 'vox_offset', 'scl_slope',
+     'scl_inter', 'slice_end', 'slice_code', 'xyzt_units', 'cal_max', 'cal_min', 'slice_duration', 'toffset', 'glmax',
+     'glmin', 'descrip', 'aux_file', 'qform_code', 'sform_code', 'quatern_b', 'quatern_c', 'quatern_d', 'qoffset_x',
+     'qoffset_y', 'qoffset_z', 'srow_x', 'srow_y', 'srow_z', 'intent_name', 'magic']
+
+    assert_equal(diff_headers(file_headers, headers), ['regular', 'dim_info', 'dim', 'datatype', 'bitpix', 'pixdim',
+                                                 'slice_end', 'xyzt_units', 'cal_max', 'descrip', 'qform_code',
+                                                 'sform_code', 'quatern_b', 'quatern_c', 'quatern_d', 'qoffset_x',
+                                                 'qoffset_y', 'qoffset_z', 'srow_x', 'srow_y', 'srow_z'])
+
+
+def test_diff_header_fields():
+    fnames = [pjoin(DATA_PATH, f)
+              for f in ('standard.nii.gz', 'example4d.nii.gz')]
+    file_headers = [nib.load(f).header for f in fnames]
+    assert_equal(diff_header_fields("dim_info", file_headers), ['0', '57'])
diff --git a/nibabel/tests/test_scripts.py b/nibabel/tests/test_scripts.py
@@ -18,6 +18,7 @@
 import difflib
 
 import nibabel as nib
+from nibabel.cmdline.diff import diff_headers
 from ..tmpdirs import InTemporaryDirectory
 from ..loadsave import load
 from ..orientations import flip_axis, aff2axcodes, inv_ornt_aff
@@ -69,35 +70,35 @@ def check_nib_ls_example4d(opts=[], hdrs_str="", other_str=""):
     assert_re_in(expected_re, stdout[len(fname):])
 
 
-def check_nib_diff_examples(opts=[], hdrs_str="", other_str=""):
+def check_nib_diff_examples():
     # test nib-diff script
     fnames = [pjoin(DATA_PATH, f)
-              for f in ('example4d.nii.gz', 'standard.nii.gz')]
+              for f in ('standard.nii.gz', 'example4d.nii.gz')]
     target_output = """\
 These files are different.
-Field      example4d.nii.gz                             standard.nii.gz                              
-regular    r                                                                                         
-dim_info   57                                           0                                            
-dim        [4, 128, 96, 24, 2, 1, 1, 1]                 [3, 4, 5, 7, 1, 1, 1, 1]                     
-datatype   4                                            2                                            
-bitpix     16                                           8                                            
-pixdim     [-1.0, 2.0, 2.0, 2.199999, 2000.0, 1.0, 1.0, 1.0][1.0, 1.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0]     
-slice_end  23                                           0                                            
-xyzt_units 10                                           0                                            
-cal_max    1162.0                                       0.0                                          
-descrip    FSL3.3? v2.25 NIfTI-1 Single file format                                                  
-qform_code 1                                            0                                            
-sform_code 1                                            2                                            
-quatern_b  -1.94510681403e-26                           0.0                                          
-quatern_c  -0.996708512306                              0.0                                          
-quatern_d  -0.081068739295                              0.0                                          
-qoffset_x  117.855102539                                0.0                                          
-qoffset_y  -35.7229423523                               0.0                                          
-qoffset_z  -7.24879837036                               0.0                                          
-srow_x     [-2.0, 6.7147157e-19, 9.0810245e-18, 117.8551][1.0, 0.0, 0.0, 0.0]                         
-srow_y     [-6.7147157e-19, 1.9737115, -0.35552824, -35.722942][0.0, 3.0, 0.0, 0.0]                         
-srow_z     [8.255481e-18, 0.32320762, 2.1710818, -7.2487984][0.0, 0.0, 2.0, 0.0]                         
-DATA(md5)  b0abbc492b4fd533b2c80d82570062cf             0a2576dd6badbb25bfb3b12076df986b"""
+Field      standard.nii.gz                              example4d.nii.gz                             
+regular    b''                                          b'r'                                         
+dim_info   0                                            57                                           
+dim        [3, 4, 5, 7, 1, 1, 1, 1]                     [4, 128, 96, 24, 2, 1, 1, 1]                 
+datatype   2                                            4                                            
+bitpix     8                                            16                                           
+pixdim     [1.0, 1.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0]     [-1.0, 2.0, 2.0, 2.1999991, 2000.0, 1.0, 1.0, 1.0]
+slice_end  0                                            23                                           
+xyzt_units 0                                            10                                           
+cal_max    0.0                                          1162.0                                       
+descrip    b''                                          b'FSL3.3? v2.25 NIfTI-1 Single file format'
+qform_code 0                                            1                                            
+sform_code 2                                            1                                            
+quatern_b  0.0                                          -1.9451068140294884e-26                      
+quatern_c  0.0                                          -0.9967085123062134                          
+quatern_d  0.0                                          -0.0810687392950058                          
+qoffset_x  0.0                                          117.8551025390625                            
+qoffset_y  0.0                                          -35.72294235229492                           
+qoffset_z  0.0                                          -7.248798370361328                           
+srow_x     [1.0, 0.0, 0.0, 0.0]                         [-2.0, 6.7147157e-19, 9.0810245e-18, 117.8551]
+srow_y     [0.0, 3.0, 0.0, 0.0]                         [-6.7147157e-19, 1.9737115, -0.35552824, -35.722942]
+srow_z     [0.0, 0.0, 2.0, 0.0]                         [8.2554809e-18, 0.32320762, 2.1710818, -7.2487984]
+DATA(md5)  0a2576dd6badbb25bfb3b12076df986b             b0abbc492b4fd533b2c80d82570062cf"""
     fnames2 = [pjoin(DATA_PATH, f)
               for f in ('example4d.nii.gz', 'example4d.nii.gz')]
     code, stdout, stderr = run_command(['nib-diff'] + fnames, check_code=False)