Skip to content

Commit 833b4df

Browse files
committed
changed as commented out in the pull request
1 parent be35aca commit 833b4df

File tree

3 files changed

+127
-30
lines changed

3 files changed

+127
-30
lines changed

nibabel/cmdline/diff.py

Lines changed: 122 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,21 @@ def get_opt_parser():
3939
Option("-H", "--header-fields",
4040
dest="header_fields", default='all',
4141
help="Header fields (comma separated) to be printed as well (if present)"),
42+
43+
Option("--ma", "--data-max-abs-diff",
44+
dest="data_max_abs_diff",
45+
type=float,
46+
default=0.0,
47+
help="Maximal absolute difference in data between files to tolerate."),
48+
49+
Option("--mr", "--data-max-rel-diff",
50+
dest="data_max_rel_diff",
51+
type=float,
52+
default=0.0,
53+
help="Maximal relative difference in data between files to tolerate."
54+
" If also --data-max-abs-diff specified, only the data points "
55+
" with absolute difference greater than that value would be "
56+
" considered for relative difference check."),
4257
])
4358

4459
return p
@@ -101,8 +116,8 @@ def get_headers_diff(file_headers, names=None):
101116
return difference
102117

103118

104-
def get_data_diff(files):
105-
"""Get difference between md5 values
119+
def get_data_md5_diff(files):
120+
"""Get difference between md5 values of data
106121
107122
Parameters
108123
----------
@@ -125,6 +140,65 @@ def get_data_diff(files):
125140
return md5sums
126141

127142

143+
def get_data_diff(files, max_abs=0, max_rel=0):
144+
"""Get difference between data
145+
146+
Parameters
147+
----------
148+
max_abs: float, optional
149+
Maximal absolute difference to tolerate.
150+
max_rel: float, optional
151+
Maximal relative (`abs(diff)/mean(diff)`) difference to tolerate.
152+
If `max_abs` is specified, then those data points with lesser than that
153+
absolute difference, are not considered for relative difference testing
154+
155+
Returns
156+
-------
157+
TODO
158+
"""
159+
# we are doomed to keep them in RAM now
160+
data = [nib.load(f).get_data() for f in files]
161+
diffs = OrderedDict()
162+
for i, d1 in enumerate(data[:-1]):
163+
# populate empty entries for non-compared
164+
diffs1 = [None] * (i + 1)
165+
166+
for j, d2 in enumerate(data[i + 1:], i + 1):
167+
abs_diff = np.abs(d1 - d2)
168+
mean_abs = (np.abs(d1) + np.abs(d2)) * 0.5
169+
candidates = np.logical_or(mean_abs != 0, abs_diff != 0)
170+
171+
if max_abs:
172+
candidates[abs_diff <= max_abs] = False
173+
174+
max_abs_diff = np.max(abs_diff)
175+
if np.any(candidates):
176+
rel_diff = abs_diff[candidates] / mean_abs[candidates]
177+
if max_rel:
178+
sub_thr = rel_diff <= max_rel
179+
# Since we operated on sub-selected values already, we need
180+
# to plug them back in
181+
candidates[
182+
tuple((indexes[sub_thr] for indexes in np.where(candidates)))
183+
] = False
184+
max_rel_diff = np.max(rel_diff)
185+
else:
186+
max_rel_diff = 0
187+
188+
if np.any(candidates):
189+
diff_rec = OrderedDict() # so that abs goes before relative
190+
diff_rec['abs'] = max_abs_diff
191+
diff_rec['rel'] = max_rel_diff
192+
diffs1.append(diff_rec)
193+
else:
194+
diffs1.append(None)
195+
196+
if any(diffs1):
197+
diffs['DATA(diff %d:)' % (i + 1)] = diffs1
198+
199+
return diffs
200+
201+
128202
def display_diff(files, diff):
129203
"""Format header differences into a nice string
130204
@@ -143,18 +217,23 @@ def display_diff(files, diff):
143217
value_width = "{:<55}"
144218

145219
output += "These files are different.\n"
146-
output += field_width.format('Field')
220+
output += field_width.format('Field/File')
147221

148-
for f in files:
149-
output += value_width.format(os.path.basename(f))
222+
for i, f in enumerate(files, 1):
223+
output += "%d:%s" % (i, value_width.format(os.path.basename(f)))
150224

151225
output += "\n"
152226

153227
for key, value in diff.items():
154228
output += field_width.format(key)
155229

156230
for item in value:
157-
item_str = str(item)
231+
if isinstance(item, dict):
232+
item_str = ', '.join('%s: %s' % i for i in item.items())
233+
elif item is None:
234+
item_str = '-'
235+
else:
236+
item_str = str(item)
158237
# Value might start/end with some invisible spacing characters so we
159238
# would "condition" it on both ends a bit
160239
item_str = re.sub('^[ \t]+', '<', item_str)
@@ -169,8 +248,37 @@ def display_diff(files, diff):
169248
return output
170249

171250

251+
def diff(files, header_fields='all', data_max_abs_diff=None, data_max_rel_diff=None):
252+
assert len(files) >= 2, "Please enter at least two files"
253+
254+
file_headers = [nib.load(f).header for f in files]
255+
256+
# signals "all fields"
257+
if header_fields == 'all':
258+
# TODO: header fields might vary across file types, thus prior sensing would be needed
259+
header_fields = file_headers[0].keys()
260+
else:
261+
header_fields = header_fields.split(',')
262+
263+
diff = get_headers_diff(file_headers, header_fields)
264+
265+
data_md5_diffs = get_data_md5_diff(files)
266+
if data_md5_diffs:
267+
# provide details, possibly triggering the ignore of the difference
268+
# in data
269+
data_diffs = get_data_diff(files,
270+
max_abs=data_max_abs_diff,
271+
max_rel=data_max_rel_diff)
272+
if data_diffs:
273+
diff['DATA(md5)'] = data_md5_diffs
274+
diff.update(data_diffs)
275+
276+
return diff
277+
278+
172279
def main(args=None, out=None):
173280
"""Getting the show on the road"""
281+
174282
out = out or sys.stdout
175283
parser = get_opt_parser()
176284
(opts, files) = parser.parse_args(args)
@@ -181,27 +289,16 @@ def main(args=None, out=None):
181289
# suppress nibabel format-compliance warnings
182290
nib.imageglobals.logger.level = 50
183291

184-
assert len(files) >= 2, "Please enter at least two files"
185-
186-
file_headers = [nib.load(f).header for f in files]
187-
188-
# signals "all fields"
189-
if opts.header_fields == 'all':
190-
# TODO: header fields might vary across file types, thus prior sensing would be needed
191-
header_fields = file_headers[0].keys()
192-
else:
193-
header_fields = opts.header_fields.split(',')
292+
files_diff = diff(
293+
files,
294+
header_fields=opts.header_fields,
295+
data_max_abs_diff=opts.data_max_abs_diff,
296+
data_max_rel_diff=opts.data_max_rel_diff
297+
)
194298

195-
diff = get_headers_diff(file_headers, header_fields)
196-
data_diff = get_data_diff(files)
197-
198-
if data_diff:
199-
diff['DATA(md5)'] = data_diff
200-
201-
if diff:
202-
out.write(display_diff(files, diff))
299+
if files_diff:
300+
out.write(display_diff(files, files_diff))
203301
raise SystemExit(1)
204-
205302
else:
206303
out.write("These files are identical.\n")
207304
raise SystemExit(0)

nibabel/cmdline/tests/test_utils.py

100644100755
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import nibabel as nib
1212
import numpy as np
1313
from nibabel.cmdline.utils import *
14-
from nibabel.cmdline.diff import get_headers_diff, display_diff, main, get_data_diff
14+
from nibabel.cmdline.diff import get_headers_diff, display_diff, main, get_data_md5_diff
1515
from os.path import (join as pjoin)
1616
from nibabel.testing import data_path
1717
from collections import OrderedDict
@@ -96,8 +96,8 @@ def test_display_diff():
9696
("bitpix", [np.array(8).astype(dtype="uint8"), np.array(16).astype(dtype="uint8")])
9797
])
9898

99-
expected_output = "These files are different.\n" + "Field hellokitty.nii.gz" \
100-
" " \
99+
expected_output = "These files are different.\n" + "Field/File hellokitty.nii.gz" \
100+
" " \
101101
"privettovarish.nii.gz \n" \
102102
"datatype " \
103103
"2 " \
@@ -114,7 +114,7 @@ def test_get_data_diff():
114114
# testing for identical files specifically as md5 may vary by computer
115115
test_names = [pjoin(data_path, f)
116116
for f in ('standard.nii.gz', 'standard.nii.gz')]
117-
assert_equal(get_data_diff(test_names), [])
117+
assert_equal(get_data_md5_diff(test_names), [])
118118

119119

120120
def test_main():

nibabel/tests/test_scripts.py

100644100755
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ def check_nib_diff_examples():
7272
fnames = [pjoin(DATA_PATH, f)
7373
for f in ('standard.nii.gz', 'example4d.nii.gz')]
7474
code, stdout, stderr = run_command(['nib-diff'] + fnames, check_code=False)
75-
checked_fields = ["Field", "regular", "dim_info", "dim", "datatype", "bitpix", "pixdim", "slice_end",
75+
checked_fields = ["Field/File", "regular", "dim_info", "dim", "datatype", "bitpix", "pixdim", "slice_end",
7676
"xyzt_units", "cal_max", "descrip", "qform_code", "sform_code", "quatern_b",
7777
"quatern_c", "quatern_d", "qoffset_x", "qoffset_y", "qoffset_z", "srow_x",
7878
"srow_y", "srow_z", "DATA(md5)"]

0 commit comments

Comments
 (0)