@@ -39,6 +39,21 @@ def get_opt_parser():
39
39
Option ("-H" , "--header-fields" ,
40
40
dest = "header_fields" , default = 'all' ,
41
41
help = "Header fields (comma separated) to be printed as well (if present)" ),
42
+
43
+ Option ("--ma" , "--data-max-abs-diff" ,
44
+ dest = "data_max_abs_diff" ,
45
+ type = float ,
46
+ default = 0.0 ,
47
+ help = "Maximal absolute difference in data between files to tolerate." ),
48
+
49
+ Option ("--mr" , "--data-max-rel-diff" ,
50
+ dest = "data_max_rel_diff" ,
51
+ type = float ,
52
+ default = 0.0 ,
53
+ help = "Maximal relative difference in data between files to tolerate."
54
+ " If also --data-max-abs-diff specified, only the data points "
55
+ " with absolute difference greater than that value would be "
56
+ " considered for relative difference check." ),
42
57
])
43
58
44
59
return p
@@ -101,8 +116,8 @@ def get_headers_diff(file_headers, names=None):
101
116
return difference
102
117
103
118
104
- def get_data_diff (files ):
105
- """Get difference between md5 values
119
+ def get_data_md5_diff (files ):
120
+ """Get difference between md5 values of data
106
121
107
122
Parameters
108
123
----------
@@ -125,6 +140,65 @@ def get_data_diff(files):
125
140
return md5sums
126
141
127
142
143
+ def get_data_diff (files , max_abs = 0 , max_rel = 0 ):
144
+ """Get difference between data
145
+
146
+ Parameters
147
+ ----------
148
+ max_abs: float, optional
149
+ Maximal absolute difference to tolerate.
150
+ max_rel: float, optional
151
+ Maximal relative (`abs(diff)/mean(diff)`) difference to tolerate.
152
+ If `max_abs` is specified, then those data points with lesser than that
153
+ absolute difference, are not considered for relative difference testing
154
+
155
+ Returns
156
+ -------
157
+ TODO
158
+ """
159
+ # we are doomed to keep them in RAM now
160
+ data = [nib .load (f ).get_data () for f in files ]
161
+ diffs = OrderedDict ()
162
+ for i , d1 in enumerate (data [:- 1 ]):
163
+ # populate empty entries for non-compared
164
+ diffs1 = [None ] * (i + 1 )
165
+
166
+ for j , d2 in enumerate (data [i + 1 :], i + 1 ):
167
+ abs_diff = np .abs (d1 - d2 )
168
+ mean_abs = (np .abs (d1 ) + np .abs (d2 )) * 0.5
169
+ candidates = np .logical_or (mean_abs != 0 , abs_diff != 0 )
170
+
171
+ if max_abs :
172
+ candidates [abs_diff <= max_abs ] = False
173
+
174
+ max_abs_diff = np .max (abs_diff )
175
+ if np .any (candidates ):
176
+ rel_diff = abs_diff [candidates ] / mean_abs [candidates ]
177
+ if max_rel :
178
+ sub_thr = rel_diff <= max_rel
179
+ # Since we operated on sub-selected values already, we need
180
+ # to plug them back in
181
+ candidates [
182
+ tuple ((indexes [sub_thr ] for indexes in np .where (candidates )))
183
+ ] = False
184
+ max_rel_diff = np .max (rel_diff )
185
+ else :
186
+ max_rel_diff = 0
187
+
188
+ if np .any (candidates ):
189
+ diff_rec = OrderedDict () # so that abs goes before relative
190
+ diff_rec ['abs' ] = max_abs_diff
191
+ diff_rec ['rel' ] = max_rel_diff
192
+ diffs1 .append (diff_rec )
193
+ else :
194
+ diffs1 .append (None )
195
+
196
+ if any (diffs1 ):
197
+ diffs ['DATA(diff %d:)' % (i + 1 )] = diffs1
198
+
199
+ return diffs
200
+
201
+
128
202
def display_diff (files , diff ):
129
203
"""Format header differences into a nice string
130
204
@@ -143,18 +217,23 @@ def display_diff(files, diff):
143
217
value_width = "{:<55}"
144
218
145
219
output += "These files are different.\n "
146
- output += field_width .format ('Field' )
220
+ output += field_width .format ('Field/File ' )
147
221
148
- for f in files :
149
- output += value_width .format (os .path .basename (f ))
222
+ for i , f in enumerate ( files , 1 ) :
223
+ output += "%d:%s" % ( i , value_width .format (os .path .basename (f ) ))
150
224
151
225
output += "\n "
152
226
153
227
for key , value in diff .items ():
154
228
output += field_width .format (key )
155
229
156
230
for item in value :
157
- item_str = str (item )
231
+ if isinstance (item , dict ):
232
+ item_str = ', ' .join ('%s: %s' % i for i in item .items ())
233
+ elif item is None :
234
+ item_str = '-'
235
+ else :
236
+ item_str = str (item )
158
237
# Value might start/end with some invisible spacing characters so we
159
238
# would "condition" it on both ends a bit
160
239
item_str = re .sub ('^[ \t ]+' , '<' , item_str )
@@ -169,8 +248,37 @@ def display_diff(files, diff):
169
248
return output
170
249
171
250
251
+ def diff (files , header_fields = 'all' , data_max_abs_diff = None , data_max_rel_diff = None ):
252
+ assert len (files ) >= 2 , "Please enter at least two files"
253
+
254
+ file_headers = [nib .load (f ).header for f in files ]
255
+
256
+ # signals "all fields"
257
+ if header_fields == 'all' :
258
+ # TODO: header fields might vary across file types, thus prior sensing would be needed
259
+ header_fields = file_headers [0 ].keys ()
260
+ else :
261
+ header_fields = header_fields .split (',' )
262
+
263
+ diff = get_headers_diff (file_headers , header_fields )
264
+
265
+ data_md5_diffs = get_data_md5_diff (files )
266
+ if data_md5_diffs :
267
+ # provide details, possibly triggering the ignore of the difference
268
+ # in data
269
+ data_diffs = get_data_diff (files ,
270
+ max_abs = data_max_abs_diff ,
271
+ max_rel = data_max_rel_diff )
272
+ if data_diffs :
273
+ diff ['DATA(md5)' ] = data_md5_diffs
274
+ diff .update (data_diffs )
275
+
276
+ return diff
277
+
278
+
172
279
def main (args = None , out = None ):
173
280
"""Getting the show on the road"""
281
+
174
282
out = out or sys .stdout
175
283
parser = get_opt_parser ()
176
284
(opts , files ) = parser .parse_args (args )
@@ -181,27 +289,16 @@ def main(args=None, out=None):
181
289
# suppress nibabel format-compliance warnings
182
290
nib .imageglobals .logger .level = 50
183
291
184
- assert len (files ) >= 2 , "Please enter at least two files"
185
-
186
- file_headers = [nib .load (f ).header for f in files ]
187
-
188
- # signals "all fields"
189
- if opts .header_fields == 'all' :
190
- # TODO: header fields might vary across file types, thus prior sensing would be needed
191
- header_fields = file_headers [0 ].keys ()
192
- else :
193
- header_fields = opts .header_fields .split (',' )
292
+ files_diff = diff (
293
+ files ,
294
+ header_fields = opts .header_fields ,
295
+ data_max_abs_diff = opts .data_max_abs_diff ,
296
+ data_max_rel_diff = opts .data_max_rel_diff
297
+ )
194
298
195
- diff = get_headers_diff (file_headers , header_fields )
196
- data_diff = get_data_diff (files )
197
-
198
- if data_diff :
199
- diff ['DATA(md5)' ] = data_diff
200
-
201
- if diff :
202
- out .write (display_diff (files , diff ))
299
+ if files_diff :
300
+ out .write (display_diff (files , files_diff ))
203
301
raise SystemExit (1 )
204
-
205
302
else :
206
303
out .write ("These files are identical.\n " )
207
304
raise SystemExit (0 )
0 commit comments