@@ -44,95 +44,175 @@ def get_opt_parser():
44
44
return p
45
45
46
46
47
- def diff_values (compare1 , compare2 ):
47
+ def diff_values (first_item , second_item ):
48
48
"""Generically compares two values, returns true if different"""
49
- if np .any (compare1 != compare2 ):
49
+ if np .any (first_item != second_item ): # comparing items that are instances of class np.ndarray
50
50
return True
51
- elif type (compare1 ) != type (compare2 ):
51
+
52
+ elif type (first_item ) != type (second_item ): # comparing items that differ in data type
52
53
return True
53
- else :
54
- return compare1 != compare2
55
54
55
+ else : # all other use cases
56
+ return first_item != second_item
57
+
58
+
59
+ def diff_headers (files , fields ):
60
+ """Iterates over all header fields of all files to find those that differ
61
+
62
+ Parameters
63
+ ----------
64
+ files: a given list of files to be compared
65
+ fields: the fields to be compared
66
+
67
+ Returns
68
+ -------
69
+ list
70
+ header fields whose values differ across files
71
+ """
72
+
73
+ headers = []
74
+
75
+ for f in range (len (files )): # for each file
76
+ for h in fields : # for each header
77
+
78
+ # each maneuver is encased in a try block after exceptions have previously occurred
79
+ # get the particular header field within the particular file
80
+
81
+ try :
82
+ field = files [f ][h ]
83
+
84
+ except ValueError :
85
+ continue
86
+
87
+ # filter numpy arrays with a NaN value
88
+ try :
89
+ if np .all (np .isnan (field )):
90
+ continue
91
+
92
+ except TypeError :
93
+ pass
94
+
95
+ # compare current file with other files
96
+ for i in files [f + 1 :]:
97
+ other_field = i [h ]
98
+
99
+ # sometimes field.item doesn't work
100
+ try :
101
+ # converting bytes to be compared as strings
102
+ if isinstance (field .item (0 ), bytes ):
103
+ field = field .item (0 ).decode ("utf-8" )
104
+
105
+ # converting np.ndarray to lists to remove ambiguity
106
+ if isinstance (field , np .ndarray ):
107
+ field = field .tolist ()
108
+
109
+ if isinstance (other_field .item (0 ), bytes ):
110
+ other_field = other_field .item (0 ).decode ("utf-8" )
111
+ if isinstance (other_field , np .ndarray ):
112
+ other_field = other_field .tolist ()
56
113
57
- def diff_header_fields (key , inputs ):
58
- """Iterates over a single header field of multiple files"""
114
+ except AttributeError :
115
+ continue
116
+
117
+ # if the header values of the two files are different, append
118
+ if diff_values (field , other_field ):
119
+ headers .append (h )
120
+
121
+ if headers : # return a list of headers for the files whose values differ
122
+ return headers
123
+
124
+
125
+ def diff_header_fields (header_field , files ):
126
+ """Iterates over a single header field of multiple files
127
+
128
+ Parameters
129
+ ----------
130
+ header_field: a given header field
131
+ files: the files to be compared
132
+
133
+ Returns
134
+ -------
135
+ list
136
+ str for each value corresponding to each file's given header field
137
+ """
59
138
60
139
keyed_inputs = []
61
140
62
- for i in inputs : # stores each file's respective header files
141
+ for i in files :
142
+
143
+ # each maneuver is encased in a try block after exceptions have previously occurred
144
+ # get the particular header field within the particular file
145
+
63
146
try :
64
- field_value = i [key ]
147
+ field_value = i [header_field ]
65
148
except ValueError :
66
149
continue
67
150
68
- try : # filter numpy arrays
69
- if np .all (np .isnan (field_value )):
70
- continue
71
- except TypeError :
72
- pass
73
-
74
- for x in inputs [1 :]: # compare different values, print all as soon as diff is found
151
+ # compare different data types, return all values as soon as diff is found
152
+ for x in files [1 :]:
75
153
try :
76
- data_diff = diff_values (str (x [key ].dtype ), str (field_value .dtype ))
154
+ data_diff = diff_values (str (x [header_field ].dtype ), str (field_value .dtype ))
77
155
78
156
if data_diff :
79
157
break
80
158
except ValueError :
81
159
continue
82
160
161
+ # string formatting of responses
83
162
try :
84
- if data_diff : # prints data types if they're different and not if they're not
163
+
164
+ # if differences are found among data types
165
+ if data_diff :
166
+ # accounting for how to arrange arrays
85
167
if field_value .ndim < 1 :
86
168
keyed_inputs .append ("{}@{}" .format (field_value , field_value .dtype ))
87
169
elif field_value .ndim == 1 :
88
170
keyed_inputs .append ("{}@{}" .format (list (field_value ), field_value .dtype ))
171
+
172
+ # if no differences are found among data types
89
173
else :
90
174
if field_value .ndim < 1 :
91
- keyed_inputs .append ("{}" . format ( field_value ) )
175
+ keyed_inputs .append (field_value )
92
176
elif field_value .ndim == 1 :
93
- keyed_inputs .append ("{}" .format (list (field_value )))
177
+ keyed_inputs .append (list (field_value ))
178
+
94
179
except UnboundLocalError :
95
180
continue
96
181
97
- if keyed_inputs : # sometimes keyed_inputs is empty lol
98
- comparison_input = keyed_inputs [0 ]
182
+ for i in range ( len ( keyed_inputs )):
183
+ keyed_inputs [ i ] = str ( keyed_inputs [i ])
99
184
100
- for i in keyed_inputs [1 :]:
101
- if diff_values (comparison_input , i ):
102
- return keyed_inputs
185
+ return keyed_inputs
103
186
104
187
105
- def get_headers_diff (files , opts ):
188
+ def get_headers_diff (file_headers , headers ):
106
189
"""Get difference between headers
107
190
108
191
Parameters
109
192
----------
110
- files : list of files
111
- opts: any options included from the command line
193
+ file_headers : list of actual headers from files
194
+ headers: list of header fields that differ
112
195
113
196
Returns
114
197
-------
115
198
dict
116
- str: list for each header field which differs, return list of
199
+ str: list for each header field which differs, return list of
117
200
values per each file
118
201
"""
119
-
120
- header_list = [nib .load (f ).header for f in files ]
121
202
output = OrderedDict ()
122
203
123
- if opts .header_fields : # will almost always have a header field
124
- # signals "all fields"
125
- if opts .header_fields == 'all' :
126
- # TODO: header fields might vary across file types, thus prior sensing would be needed
127
- header_fields = header_list [0 ].keys ()
128
- else :
129
- header_fields = opts .header_fields .split (',' )
204
+ # if there are headers that differ
205
+ if headers :
130
206
131
- for f in header_fields :
132
- val = diff_header_fields ( f , header_list )
207
+ # for each header
208
+ for header in headers :
133
209
210
+ # find the values corresponding to the files that differ
211
+ val = diff_header_fields (header , file_headers )
212
+
213
+ # store these values in a dictionary
134
214
if val :
135
- output [f ] = val
215
+ output [header ] = val
136
216
137
217
return output
138
218
@@ -164,8 +244,19 @@ def main():
164
244
# suppress nibabel format-compliance warnings
165
245
nib .imageglobals .logger .level = 50
166
246
167
- diff = get_headers_diff (files , opts )
247
+ file_headers = [nib .load (f ).header for f in files ]
248
+
249
+ if opts .header_fields : # will almost always have a header field
250
+ # signals "all fields"
251
+ if opts .header_fields == 'all' :
252
+ # TODO: header fields might vary across file types, thus prior sensing would be needed
253
+ header_fields = file_headers [0 ].keys ()
254
+ else :
255
+ header_fields = opts .header_fields .split (',' )
256
+ headers = diff_headers (file_headers , header_fields )
257
+ diff = get_headers_diff (file_headers , headers )
168
258
data_diff = get_data_md5sums (files )
259
+
169
260
if data_diff :
170
261
diff ['DATA(md5)' ] = data_diff
171
262
0 commit comments