18
18
from optparse import OptionParser , Option
19
19
20
20
import numpy as np
21
+ import functools .partial
21
22
22
23
import nibabel as nib
23
24
import nibabel .cmdline .utils
@@ -43,177 +44,54 @@ def get_opt_parser():
43
44
return p
44
45
45
46
46
- def diff_values ( first_item , second_item ):
47
- """Generically compares two values, returns true if different"""
48
- if np . any ( first_item != second_item ): # comparing items that are instances of class np.ndarray
49
- return True
47
+ def are_values_different ( * values ):
48
+ """Generically compares values, returns true if different"""
49
+ value0 = values [ 0 ]
50
+ values = values [ 1 :] # to ensure that the first value isn't compared with itself
50
51
51
- elif type (first_item ) != type (second_item ): # comparing items that differ in data type
52
- return True
52
+ for value in values :
53
+ try : # we don't want NaN values
54
+ if np .any (np .isnan (value0 )) or np .any (np .isnan (value )):
55
+ break
53
56
54
- else : # all other use cases
55
- return first_item != second_item
57
+ except TypeError :
58
+ pass
56
59
60
+ if type (value0 ) != type (value ): # if types are different, then we consider them different
61
+ return True
62
+ elif isinstance (value0 , np .ndarray ) and np .any (value0 != value ): # if they're a numpy array, special test
63
+ return True
64
+ elif value0 != value :
65
+ return True
57
66
58
- def diff_headers (files , fields ):
59
- """Iterates over all header fields of all files to find those that differ
67
+ return False
60
68
61
- Parameters
62
- ----------
63
- files: a given list of files to be compared
64
- fields: the fields to be compared
65
69
66
- Returns
67
- -------
68
- list
69
- header fields whose values differ across files
70
- """
71
-
72
- headers = []
73
-
74
- for f in range (len (files )): # for each file
75
- for h in fields : # for each header
76
-
77
- # each maneuver is encased in a try block after exceptions have previously occurred
78
- # get the particular header field within the particular file
79
-
80
- try :
81
- field = files [f ][h ]
82
-
83
- except ValueError :
84
- continue
85
-
86
- # filter numpy arrays with a NaN value
87
- try :
88
- if np .all (np .isnan (field )):
89
- continue
90
-
91
- except TypeError :
92
- pass
93
-
94
- # compare current file with other files
95
- for i in files [f + 1 :]:
96
- other_field = i [h ]
97
-
98
- # sometimes field.item doesn't work
99
- try :
100
- # converting bytes to be compared as strings
101
- if isinstance (field .item (0 ), bytes ):
102
- field = field .item (0 ).decode ("utf-8" )
103
-
104
- # converting np.ndarray to lists to remove ambiguity
105
- if isinstance (field , np .ndarray ):
106
- field = field .tolist ()
107
-
108
- if isinstance (other_field .item (0 ), bytes ):
109
- other_field = other_field .item (0 ).decode ("utf-8" )
110
- if isinstance (other_field , np .ndarray ):
111
- other_field = other_field .tolist ()
112
-
113
- except AttributeError :
114
- continue
115
-
116
- # if the header values of the two files are different, append
117
- if diff_values (field , other_field ):
118
- headers .append (h )
119
-
120
- if headers : # return a list of headers for the files whose values differ
121
- return headers
122
-
123
-
124
- def diff_header_fields (header_field , files ):
125
- """Iterates over a single header field of multiple files
126
-
127
- Parameters
128
- ----------
129
- header_field: a given header field
130
- files: the files to be compared
131
-
132
- Returns
133
- -------
134
- list
135
- str for each value corresponding to each file's given header field
136
- """
137
-
138
- keyed_inputs = []
139
-
140
- for i in files :
141
-
142
- # each maneuver is encased in a try block after exceptions have previously occurred
143
- # get the particular header field within the particular file
144
-
145
- try :
146
- field_value = i [header_field ]
147
- except ValueError :
148
- continue
149
-
150
- # compare different data types, return all values as soon as diff is found
151
- for x in files [1 :]:
152
- try :
153
- data_diff = diff_values (str (x [header_field ].dtype ), str (field_value .dtype ))
154
-
155
- if data_diff :
156
- break
157
- except ValueError :
158
- continue
159
-
160
- # string formatting of responses
161
- try :
162
-
163
- # if differences are found among data types
164
- if data_diff :
165
- # accounting for how to arrange arrays
166
- if field_value .ndim < 1 :
167
- keyed_inputs .append ("{}@{}" .format (field_value , field_value .dtype ))
168
- elif field_value .ndim == 1 :
169
- keyed_inputs .append ("{}@{}" .format (list (field_value ), field_value .dtype ))
170
-
171
- # if no differences are found among data types
172
- else :
173
- if field_value .ndim < 1 :
174
- keyed_inputs .append (field_value )
175
- elif field_value .ndim == 1 :
176
- keyed_inputs .append (list (field_value ))
177
-
178
- except UnboundLocalError :
179
- continue
180
-
181
- for i in range (len (keyed_inputs )):
182
- keyed_inputs [i ] = str (keyed_inputs [i ])
183
-
184
- return keyed_inputs
185
-
186
-
187
- def get_headers_diff (file_headers , headers ):
70
+ def get_headers_diff (file_headers , names = None ):
188
71
"""Get difference between headers
189
72
190
73
Parameters
191
74
----------
192
- file_headers: list of actual headers from files
193
- headers : list of header fields that differ
75
+ file_headers: list of actual headers (dicts) from files
76
+ names : list of header fields to test
194
77
195
78
Returns
196
79
-------
197
80
dict
198
81
str: list for each header field which differs, return list of
199
82
values per each file
200
83
"""
201
- output = OrderedDict ()
202
-
203
- # if there are headers that differ
204
- if headers :
84
+ difference = OrderedDict ()
205
85
206
- # for each header
207
- for header in headers :
86
+ # for each header field
87
+ for name in names :
88
+ values = [header .get (name ) for header in file_headers ] # get corresponding value
208
89
209
- # find the values corresponding to the files that differ
210
- val = diff_header_fields (header , file_headers )
90
+ # if these values are different, store them in a dictionary
91
+ if are_values_different (* values ):
92
+ difference [name ] = values
211
93
212
- # store these values in a dictionary
213
- if val :
214
- output [header ] = val
215
-
216
- return output
94
+ return difference
217
95
218
96
219
97
def get_data_md5sums (files ):
@@ -252,16 +130,16 @@ def main():
252
130
header_fields = file_headers [0 ].keys ()
253
131
else :
254
132
header_fields = opts .header_fields .split (',' )
255
- headers = diff_headers ( file_headers , header_fields )
256
- diff = get_headers_diff (file_headers , headers )
133
+
134
+ diff = get_headers_diff (file_headers , header_fields )
257
135
data_diff = get_data_md5sums (files )
258
136
259
137
if data_diff :
260
138
diff ['DATA(md5)' ] = data_diff
261
139
262
140
if diff :
263
141
print ("These files are different." )
264
- print ("{:<11 }" .format ('Field' ), end = "" )
142
+ print ("{:<15 }" .format ('Field' ), end = "" )
265
143
266
144
for f in files :
267
145
output = ""
@@ -273,12 +151,12 @@ def main():
273
151
output += f [i ]
274
152
i += 1
275
153
276
- print ("{:<45 }" .format (output ), end = "" )
154
+ print ("{:<55 }" .format (output ), end = "" )
277
155
278
156
print ()
279
157
280
158
for key , value in diff .items ():
281
- print ("{:<11 }" .format (key ), end = "" )
159
+ print ("{:<15 }" .format (key ), end = "" )
282
160
283
161
for item in value :
284
162
item_str = str (item )
@@ -289,7 +167,7 @@ def main():
289
167
# and also replace some other invisible symbols with a question
290
168
# mark
291
169
item_str = re .sub ('[\x00 ]' , '?' , item_str )
292
- print ("{:<45 }" .format (item_str ), end = "" )
170
+ print ("{:<55 }" .format (item_str ), end = "" )
293
171
294
172
print ()
295
173
0 commit comments