Skip to content

Commit dd0b8ef

Browse files
committed
Merge remote-tracking branch 'upstream/master' into rel/2.3.1
2 parents 65cb2b9 + 679aa5b commit dd0b8ef

File tree

7 files changed

+307
-100
lines changed

7 files changed

+307
-100
lines changed

.travis.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ cache:
1313
- $HOME/.cache/pip
1414
env:
1515
global:
16-
- DEPENDS="six numpy scipy matplotlib h5py pillow pydicom hypothesis"
16+
- DEPENDS="six numpy scipy matplotlib h5py pillow pydicom"
1717
- OPTIONAL_DEPENDS=""
1818
- INSTALL_TYPE="setup"
1919
- EXTRA_WHEELS="https://5cf40426d9f06eb7461d-6fe47d9331aba7cd62fc36c7196769e4.ssl.cf2.rackcdn.com"
@@ -97,7 +97,7 @@ before_install:
9797
- source venv/bin/activate
9898
- python --version # just to check
9999
- pip install -U pip wheel # needed at one point
100-
- retry pip install nose flake8 mock hypothesis # always
100+
- retry pip install nose flake8 mock # always
101101
- pip install $EXTRA_PIP_FLAGS $DEPENDS $OPTIONAL_DEPENDS
102102
- if [ "${COVERAGE}" == "1" ]; then
103103
pip install coverage;

appveyor.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ install:
2222
- SET PATH=%PYTHON%;%PYTHON%\Scripts;%PATH%
2323

2424
# Install the dependencies of the project.
25-
- pip install numpy scipy matplotlib nose h5py mock hypothesis pydicom
25+
- pip install numpy scipy matplotlib nose h5py mock pydicom
2626
- pip install .
2727
- SET NIBABEL_DATA_DIR=%CD%\nibabel-data
2828

dev-requirements.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,3 @@
22
-r requirements.txt
33
nose
44
mock
5-
hypothesis

nibabel/cmdline/diff.py

Lines changed: 199 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -38,32 +38,82 @@ def get_opt_parser():
3838

3939
Option("-H", "--header-fields",
4040
dest="header_fields", default='all',
41-
help="Header fields (comma separated) to be printed as well (if present)"),
41+
help="Header fields (comma separated) to be printed as well"
42+
" (if present)"),
43+
44+
Option("--ma", "--data-max-abs-diff",
45+
dest="data_max_abs_diff",
46+
type=float,
47+
default=0.0,
48+
help="Maximal absolute difference in data between files"
49+
" to tolerate."),
50+
51+
Option("--mr", "--data-max-rel-diff",
52+
dest="data_max_rel_diff",
53+
type=float,
54+
default=0.0,
55+
help="Maximal relative difference in data between files to"
56+
" tolerate. If --data-max-abs-diff is also specified,"
57+
" only the data points with absolute difference greater"
58+
" than that value would be considered for relative"
59+
" difference check."),
60+
Option("--dt", "--datatype",
61+
dest="dtype",
62+
default=np.float64,
63+
help="Enter a numpy datatype such as 'float32'.")
4264
])
4365

4466
return p
4567

4668

4769
def are_values_different(*values):
48-
"""Generically compares values, returns true if different"""
49-
value0 = values[0]
50-
values = values[1:] # to ensure that the first value isn't compared with itself
51-
52-
for value in values:
53-
try: # we sometimes don't want NaN values
54-
if np.any(np.isnan(value0)) and np.any(np.isnan(value)): # if they're both NaN
55-
break
56-
elif np.any(np.isnan(value0)) or np.any(np.isnan(value)): # if only 1 is NaN
57-
return True
70+
"""Generically compare values, return True if different
5871
59-
except TypeError:
60-
pass
72+
Note that comparison is targetting reporting of comparison of the headers
73+
so has following specifics:
74+
- even a difference in data types is considered a difference, i.e. 1 != 1.0
75+
- nans are considered to be the "same", although generally nan != nan
76+
"""
77+
value0 = values[0]
6178

79+
# to not recompute over again
80+
if isinstance(value0, np.ndarray):
81+
try:
82+
# np.asarray for elderly numpys, e.g. 1.7.1 where for
83+
# degenerate arrays (shape ()) it would return a pure scalar
84+
value0_nans = np.asanyarray(np.isnan(value0))
85+
value0_nonnans = np.asanyarray(np.logical_not(value0_nans))
86+
# if value0_nans.size == 1:
87+
# import pdb; pdb.set_trace()
88+
if not np.any(value0_nans):
89+
value0_nans = None
90+
except TypeError as exc:
91+
str_exc = str(exc)
92+
# Not implemented in numpy 1.7.1
93+
if "not supported" in str_exc or "ot implemented" in str_exc:
94+
value0_nans = None
95+
else:
96+
raise
97+
98+
for value in values[1:]:
6299
if type(value0) != type(value): # if types are different, then we consider them different
63100
return True
64101
elif isinstance(value0, np.ndarray):
65-
return np.any(value0 != value)
66-
102+
if value0.dtype != value.dtype or \
103+
value0.shape != value.shape:
104+
return True
105+
# there might be nans and they need special treatment
106+
if value0_nans is not None:
107+
value_nans = np.isnan(value)
108+
if np.any(value0_nans != value_nans):
109+
return True
110+
if np.any(value0[value0_nonnans] != value[value0_nonnans]):
111+
return True
112+
elif np.any(value0 != value):
113+
return True
114+
elif value0 is np.nan:
115+
if value is not np.nan:
116+
return True
67117
elif value0 != value:
68118
return True
69119

@@ -101,8 +151,8 @@ def get_headers_diff(file_headers, names=None):
101151
return difference
102152

103153

104-
def get_data_diff(files):
105-
"""Get difference between md5 values
154+
def get_data_hash_diff(files, dtype=np.float64):
155+
"""Get difference between md5 values of data
106156
107157
Parameters
108158
----------
@@ -115,7 +165,7 @@ def get_data_diff(files):
115165
"""
116166

117167
md5sums = [
118-
hashlib.md5(np.ascontiguousarray(nib.load(f).get_data(), dtype=np.float32)).hexdigest()
168+
hashlib.md5(np.ascontiguousarray(nib.load(f).get_fdata(dtype=dtype))).hexdigest()
119169
for f in files
120170
]
121171

@@ -125,6 +175,86 @@ def get_data_diff(files):
125175
return md5sums
126176

127177

178+
def get_data_diff(files, max_abs=0, max_rel=0, dtype=np.float64):
179+
"""Get difference between data
180+
181+
Parameters
182+
----------
183+
files: list of (str or ndarray)
184+
If list of strings is provided -- they must be existing file names
185+
max_abs: float, optional
186+
Maximal absolute difference to tolerate.
187+
max_rel: float, optional
188+
Maximal relative (`abs(diff)/mean(diff)`) difference to tolerate.
189+
If `max_abs` is specified, then those data points with lesser than that
190+
absolute difference, are not considered for relative difference testing
191+
dtype: np, optional
192+
Datatype to be used when extracting data from files
193+
194+
Returns
195+
-------
196+
diffs: OrderedDict
197+
An ordered dict with a record per each file which has differences
198+
with other files subsequent detected. Each record is a list of
199+
difference records, one per each file pair.
200+
Each difference record is an Ordered Dict with possible keys
201+
'abs' or 'rel' showing maximal absolute or relative differences
202+
in the file or the record ('CMP': 'incompat') if file shapes
203+
are incompatible.
204+
"""
205+
206+
# we are doomed to keep them in RAM now
207+
data = [f if isinstance(f, np.ndarray) else nib.load(f).get_fdata(dtype=dtype)
208+
for f in files]
209+
diffs = OrderedDict()
210+
for i, d1 in enumerate(data[:-1]):
211+
# populate empty entries for non-compared
212+
diffs1 = [None] * (i + 1)
213+
214+
for j, d2 in enumerate(data[i + 1:], i + 1):
215+
216+
if d1.shape == d2.shape:
217+
abs_diff = np.abs(d1 - d2)
218+
mean_abs = (np.abs(d1) + np.abs(d2)) * 0.5
219+
candidates = np.logical_or(mean_abs != 0, abs_diff != 0)
220+
221+
if max_abs:
222+
candidates[abs_diff <= max_abs] = False
223+
224+
max_abs_diff = np.max(abs_diff)
225+
if np.any(candidates):
226+
rel_diff = abs_diff[candidates] / mean_abs[candidates]
227+
if max_rel:
228+
sub_thr = rel_diff <= max_rel
229+
# Since we operated on sub-selected values already, we need
230+
# to plug them back in
231+
candidates[
232+
tuple((indexes[sub_thr] for indexes in np.where(candidates)))
233+
] = False
234+
max_rel_diff = np.max(rel_diff)
235+
else:
236+
max_rel_diff = 0
237+
238+
if np.any(candidates):
239+
240+
diff_rec = OrderedDict() # so that abs goes before relative
241+
242+
diff_rec['abs'] = max_abs_diff.astype(dtype)
243+
diff_rec['rel'] = max_rel_diff.astype(dtype)
244+
diffs1.append(diff_rec)
245+
else:
246+
diffs1.append(None)
247+
248+
else:
249+
diffs1.append({'CMP': "incompat"})
250+
251+
if any(diffs1):
252+
253+
diffs['DATA(diff %d:)' % (i + 1)] = diffs1
254+
255+
return diffs
256+
257+
128258
def display_diff(files, diff):
129259
"""Format header differences into a nice string
130260
@@ -140,21 +270,27 @@ def display_diff(files, diff):
140270
"""
141271
output = ""
142272
field_width = "{:<15}"
273+
filename_width = "{:<53}"
143274
value_width = "{:<55}"
144275

145276
output += "These files are different.\n"
146-
output += field_width.format('Field')
277+
output += field_width.format('Field/File')
147278

148-
for f in files:
149-
output += value_width.format(os.path.basename(f))
279+
for i, f in enumerate(files, 1):
280+
output += "%d:%s" % (i, filename_width.format(os.path.basename(f)))
150281

151282
output += "\n"
152283

153284
for key, value in diff.items():
154285
output += field_width.format(key)
155286

156287
for item in value:
157-
item_str = str(item)
288+
if isinstance(item, dict):
289+
item_str = ', '.join('%s: %s' % i for i in item.items())
290+
elif item is None:
291+
item_str = '-'
292+
else:
293+
item_str = str(item)
158294
# Value might start/end with some invisible spacing characters so we
159295
# would "condition" it on both ends a bit
160296
item_str = re.sub('^[ \t]+', '<', item_str)
@@ -169,8 +305,40 @@ def display_diff(files, diff):
169305
return output
170306

171307

308+
def diff(files, header_fields='all', data_max_abs_diff=None,
309+
data_max_rel_diff=None, dtype=np.float64):
310+
assert len(files) >= 2, "Please enter at least two files"
311+
312+
file_headers = [nib.load(f).header for f in files]
313+
314+
# signals "all fields"
315+
if header_fields == 'all':
316+
# TODO: header fields might vary across file types,
317+
# thus prior sensing would be needed
318+
header_fields = file_headers[0].keys()
319+
else:
320+
header_fields = header_fields.split(',')
321+
322+
diff = get_headers_diff(file_headers, header_fields)
323+
324+
data_md5_diffs = get_data_hash_diff(files, dtype)
325+
if data_md5_diffs:
326+
# provide details, possibly triggering the ignore of the difference
327+
# in data
328+
data_diffs = get_data_diff(files,
329+
max_abs=data_max_abs_diff,
330+
max_rel=data_max_rel_diff,
331+
dtype=dtype)
332+
if data_diffs:
333+
diff['DATA(md5)'] = data_md5_diffs
334+
diff.update(data_diffs)
335+
336+
return diff
337+
338+
172339
def main(args=None, out=None):
173340
"""Getting the show on the road"""
341+
174342
out = out or sys.stdout
175343
parser = get_opt_parser()
176344
(opts, files) = parser.parse_args(args)
@@ -181,27 +349,17 @@ def main(args=None, out=None):
181349
# suppress nibabel format-compliance warnings
182350
nib.imageglobals.logger.level = 50
183351

184-
assert len(files) >= 2, "Please enter at least two files"
352+
files_diff = diff(
353+
files,
354+
header_fields=opts.header_fields,
355+
data_max_abs_diff=opts.data_max_abs_diff,
356+
data_max_rel_diff=opts.data_max_rel_diff,
357+
dtype=opts.dtype
358+
)
185359

186-
file_headers = [nib.load(f).header for f in files]
187-
188-
# signals "all fields"
189-
if opts.header_fields == 'all':
190-
# TODO: header fields might vary across file types, thus prior sensing would be needed
191-
header_fields = file_headers[0].keys()
192-
else:
193-
header_fields = opts.header_fields.split(',')
194-
195-
diff = get_headers_diff(file_headers, header_fields)
196-
data_diff = get_data_diff(files)
197-
198-
if data_diff:
199-
diff['DATA(md5)'] = data_diff
200-
201-
if diff:
202-
out.write(display_diff(files, diff))
360+
if files_diff:
361+
out.write(display_diff(files, files_diff))
203362
raise SystemExit(1)
204-
205363
else:
206364
out.write("These files are identical.\n")
207365
raise SystemExit(0)

0 commit comments

Comments
 (0)