i think im going to cry. code cleaned and made more pythonic

chrispycheng · chrispycheng · commit ae7433929ca5 · 2018-07-11T03:54:38.000-04:00
diff --git a/nibabel/cmdline/diff.py b/nibabel/cmdline/diff.py
@@ -18,6 +18,7 @@
 from optparse import OptionParser, Option
 
 import numpy as np
+import functools.partial
 
 import nibabel as nib
 import nibabel.cmdline.utils
@@ -43,177 +44,54 @@ def get_opt_parser():
     return p
 
 
-def diff_values(first_item, second_item):
-    """Generically compares two values, returns true if different"""
-    if np.any(first_item != second_item):  # comparing items that are instances of class np.ndarray
-        return True
+def are_values_different(*values):
+    """Generically compares values, returns true if different"""
+    value0 = values[0]
+    values = values[1:]  # to ensure that the first value isn't compared with itself
 
-    elif type(first_item) != type(second_item):  # comparing items that differ in data type
-        return True
+    for value in values:
+        try:  # we don't want NaN values
+            if np.any(np.isnan(value0)) or np.any(np.isnan(value)):
+                break
 
-    else:  # all other use cases
-        return first_item != second_item
+        except TypeError:
+            pass
 
+        if type(value0) != type(value):  # if types are different, then we consider them different
+            return True
+        elif isinstance(value0, np.ndarray) and np.any(value0 != value):  # if they're a numpy array, special test
+            return True
+        elif value0 != value:
+            return True
 
-def diff_headers(files, fields):
-    """Iterates over all header fields of all files to find those that differ
+    return False
 
-        Parameters
-        ----------
-        files: a given list of files to be compared
-        fields: the fields to be compared
 
-        Returns
-        -------
-        list
-          header fields whose values differ across files
-        """
-
-    headers = []
-
-    for f in range(len(files)):  # for each file
-        for h in fields:  # for each header
-
-            # each maneuver is encased in a try block after exceptions have previously occurred
-            # get the particular header field within the particular file
-
-            try:
-                field = files[f][h]
-
-            except ValueError:
-                continue
-
-            # filter numpy arrays with a NaN value
-            try:
-                if np.all(np.isnan(field)):
-                    continue
-
-            except TypeError:
-                pass
-
-            # compare current file with other files
-            for i in files[f + 1:]:
-                other_field = i[h]
-
-                # sometimes field.item doesn't work
-                try:
-                    # converting bytes to be compared as strings
-                    if isinstance(field.item(0), bytes):
-                        field = field.item(0).decode("utf-8")
-
-                    # converting np.ndarray to lists to remove ambiguity
-                    if isinstance(field, np.ndarray):
-                        field = field.tolist()
-
-                    if isinstance(other_field.item(0), bytes):
-                        other_field = other_field.item(0).decode("utf-8")
-                    if isinstance(other_field, np.ndarray):
-                        other_field = other_field.tolist()
-
-                except AttributeError:
-                    continue
-
-                # if the header values of the two files are different, append
-                if diff_values(field, other_field):
-                    headers.append(h)
-
-    if headers:  # return a list of headers for the files whose values differ
-        return headers
-
-
-def diff_header_fields(header_field, files):
-    """Iterates over a single header field of multiple files
-
-    Parameters
-    ----------
-    header_field: a given header field
-    files: the files to be compared
-
-    Returns
-    -------
-    list
-      str for each value corresponding to each file's given header field
-    """
-
-    keyed_inputs = []
-
-    for i in files:
-
-        # each maneuver is encased in a try block after exceptions have previously occurred
-        # get the particular header field within the particular file
-
-        try:
-            field_value = i[header_field]
-        except ValueError:
-            continue
-
-        # compare different data types, return all values as soon as diff is found
-        for x in files[1:]:
-            try:
-                data_diff = diff_values(str(x[header_field].dtype), str(field_value.dtype))
-
-                if data_diff:
-                    break
-            except ValueError:
-                continue
-
-        # string formatting of responses
-        try:
-
-            # if differences are found among data types
-            if data_diff:
-                # accounting for how to arrange arrays
-                if field_value.ndim < 1:
-                    keyed_inputs.append("{}@{}".format(field_value, field_value.dtype))
-                elif field_value.ndim == 1:
-                    keyed_inputs.append("{}@{}".format(list(field_value), field_value.dtype))
-
-            # if no differences are found among data types
-            else:
-                if field_value.ndim < 1:
-                    keyed_inputs.append(field_value)
-                elif field_value.ndim == 1:
-                    keyed_inputs.append(list(field_value))
-
-        except UnboundLocalError:
-            continue
-
-    for i in range(len(keyed_inputs)):
-        keyed_inputs[i] = str(keyed_inputs[i])
-
-    return keyed_inputs
-
-
-def get_headers_diff(file_headers, headers):
+def get_headers_diff(file_headers, names=None):
     """Get difference between headers
 
     Parameters
     ----------
-    file_headers: list of actual headers from files
-    headers: list of header fields that differ
+    file_headers: list of actual headers (dicts) from files
+    names: list of header fields to test
 
     Returns
     -------
     dict
       str: list for each header field which differs, return list of
       values per each file
     """
-    output = OrderedDict()
-
-    # if there are headers that differ
-    if headers:
+    difference = OrderedDict()
 
-        # for each header
-        for header in headers:
+    # for each header field
+    for name in names:
+        values = [header.get(name) for header in file_headers]  # get corresponding value
 
-            # find the values corresponding to the files that differ
-            val = diff_header_fields(header, file_headers)
+        # if these values are different, store them in a dictionary
+        if are_values_different(*values):
+            difference[name] = values
 
-            # store these values in a dictionary
-            if val:
-                output[header] = val
-
-    return output
+    return difference
 
 
 def get_data_md5sums(files):
@@ -252,16 +130,16 @@ def main():
             header_fields = file_headers[0].keys()
         else:
             header_fields = opts.header_fields.split(',')
-    headers = diff_headers(file_headers, header_fields)
-    diff = get_headers_diff(file_headers, headers)
+
+    diff = get_headers_diff(file_headers, header_fields)
     data_diff = get_data_md5sums(files)
 
     if data_diff:
         diff['DATA(md5)'] = data_diff
 
     if diff:
         print("These files are different.")
-        print("{:<11}".format('Field'), end="")
+        print("{:<15}".format('Field'), end="")
 
         for f in files:
             output = ""
@@ -273,12 +151,12 @@ def main():
                     output += f[i]
                 i += 1
 
-            print("{:<45}".format(output), end="")
+            print("{:<55}".format(output), end="")
 
         print()
 
         for key, value in diff.items():
-            print("{:<11}".format(key), end="")
+            print("{:<15}".format(key), end="")
 
             for item in value:
                 item_str = str(item)
@@ -289,7 +167,7 @@ def main():
                 # and also replace some other invisible symbols with a question
                 # mark
                 item_str = re.sub('[\x00]', '?', item_str)
-                print("{:<45}".format(item_str), end="")
+                print("{:<55}".format(item_str), end="")
 
             print()
 
diff --git a/nibabel/tests/test_diff.py b/nibabel/tests/test_diff.py
@@ -5,29 +5,29 @@
 from __future__ import division, print_function, absolute_import
 
 from os.path import (dirname, join as pjoin, abspath)
+import numpy as np
 
 from hypothesis import given
 import hypothesis.strategies as st
 
 
 DATA_PATH = abspath(pjoin(dirname(__file__), 'data'))
 
-from nibabel.cmdline.diff import diff_values
+from nibabel.cmdline.diff import are_values_different
 
-# TODO: MAJOR TO DO IS TO FIGURE OUT HOW TO USE HYPOTHESIS FOR LONGER LIST
-# LENGTHS WHILE STILL CONTROLLING FOR OUTCOMES
+# TODO: MAJOR TO DO IS TO FIGURE OUT HOW TO USE HYPOTHESIS FOR LONGER LIST LENGTHS WHILE STILL CONTROLLING FOR OUTCOMES
 
 
 @given(st.data())
 def test_diff_values_int(data):
     x = data.draw(st.integers(), label='x')
-    y = data.draw(st.integers(min_value = x + 1), label='x+1')
-    z = data.draw(st.integers(max_value = x - 1), label='x-1')
+    y = data.draw(st.integers(min_value=x + 1), label='x+1')
+    z = data.draw(st.integers(max_value=x - 1), label='x-1')
 
-    assert not diff_values(x, x)
-    assert diff_values(x, y)
-    assert diff_values(x, z)
-    assert diff_values(y, z)
+    assert not are_values_different(x, x)
+    assert are_values_different(x, y)
+    assert are_values_different(x, z)
+    assert are_values_different(y, z)
 
 
 @given(st.data())
@@ -36,10 +36,10 @@ def test_diff_values_float(data):
     y = data.draw(st.floats(min_value=1e8), label='y')
     z = data.draw(st.floats(max_value=-1e8), label='z')
 
-    assert not diff_values(x, x)
-    assert diff_values(x, y)
-    assert diff_values(x, z)
-    assert diff_values(y, z)
+    assert not are_values_different(x, x)
+    assert are_values_different(x, y)
+    assert are_values_different(x, z)
+    assert are_values_different(y, z)
 
 
 @given(st.data())
@@ -48,10 +48,11 @@ def test_diff_values_mixed(data):
     type_int = data.draw(st.integers(), label='int')
     type_none = data.draw(st.none(), label='none')
 
-    assert diff_values(type_float, type_int)
-    assert diff_values(type_float, type_none)
-    assert diff_values(type_int, type_none)
-    assert not diff_values(type_none, type_none)
+    assert are_values_different(type_float, type_int)
+    assert are_values_different(type_float, type_none)
+    assert are_values_different(type_int, type_none)
+    assert are_values_different(np.ndarray([0]), 'hey')
+    assert not are_values_different(type_none, type_none)
 
 
 @given(st.data())
@@ -62,6 +63,6 @@ def test_diff_values_array(data):
     d = data.draw(st.lists(elements=st.floats(max_value=-1e8), min_size=1))
     # TODO: Figure out a way to include 0 in lists (arrays)
 
-    assert diff_values(a, b)
-    assert diff_values(c, d)
-    assert not diff_values(a, a)
+    assert are_values_different(a, b)
+    assert are_values_different(c, d)
+    assert not are_values_different(a, a)