Skip to content

Commit 0408783

Browse files
authored
Merge pull request #617 from yarikoptic/nf-nib-diff
NF nib-diff to visualize difference between multiple neuroimaging files
2 parents 0f947a4 + baf6cdc commit 0408783

File tree

9 files changed

+457
-9
lines changed

9 files changed

+457
-9
lines changed

.travis.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ cache:
1414
- $HOME/.cache/pip
1515
env:
1616
global:
17-
- DEPENDS="six numpy scipy matplotlib h5py pillow pydicom"
17+
- DEPENDS="six numpy scipy matplotlib h5py pillow pydicom hypothesis"
1818
- OPTIONAL_DEPENDS=""
1919
- INSTALL_TYPE="setup"
2020
- EXTRA_WHEELS="https://5cf40426d9f06eb7461d-6fe47d9331aba7cd62fc36c7196769e4.ssl.cf2.rackcdn.com"
@@ -95,7 +95,7 @@ before_install:
9595
- source venv/bin/activate
9696
- python --version # just to check
9797
- pip install -U pip wheel # needed at one point
98-
- retry pip install nose flake8 mock # always
98+
- retry pip install nose flake8 mock hypothesis # always
9999
- pip install $EXTRA_PIP_FLAGS $DEPENDS $OPTIONAL_DEPENDS
100100
- if [ "${COVERAGE}" == "1" ]; then
101101
pip install coverage;

appveyor.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,7 @@ install:
2020
- SET PATH=%PYTHON%;%PYTHON%\Scripts;%PATH%
2121

2222
# Install the dependencies of the project.
23-
- pip install numpy scipy matplotlib nose h5py mock
24-
- pip install pydicom
23+
- pip install numpy scipy matplotlib nose h5py mock hypothesis pydicom
2524
- pip install .
2625
- SET NIBABEL_DATA_DIR=%CD%\nibabel-data
2726

bin/nib-diff

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#!python
2+
# emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*-
3+
# vi: set ft=python sts=4 ts=4 sw=4 et:
4+
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
5+
#
6+
# See COPYING file distributed along with the NiBabel package for the
7+
# copyright and license terms.
8+
#
9+
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
10+
"""
11+
Quick diff summary for a set of neuroimaging files
12+
"""
13+
14+
from nibabel.cmdline.diff import main
15+
16+
if __name__ == '__main__':
17+
main()

dev-requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@
22
-r requirements.txt
33
nose
44
mock
5+
hypothesis

nibabel/cmdline/diff.py

Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
#!python
2+
# emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*-
3+
# vi: set ft=python sts=4 ts=4 sw=4 et:
4+
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
5+
#
6+
# See COPYING file distributed along with the NiBabel package for the
7+
# copyright and license terms.
8+
#
9+
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
10+
"""
11+
Quick summary of the differences among a set of neuroimaging files
12+
"""
13+
from __future__ import division, print_function, absolute_import
14+
15+
import re
16+
import sys
17+
from collections import OrderedDict
18+
from optparse import OptionParser, Option
19+
20+
import numpy as np
21+
22+
import nibabel as nib
23+
import nibabel.cmdline.utils
24+
import hashlib
25+
import os
26+
27+
28+
def get_opt_parser():
29+
# use module docstring for help output
30+
p = OptionParser(
31+
usage="%s [OPTIONS] [FILE ...]\n\n" % sys.argv[0] + __doc__,
32+
version="%prog " + nib.__version__)
33+
34+
p.add_options([
35+
Option("-v", "--verbose", action="count",
36+
dest="verbose", default=0,
37+
help="Make more noise. Could be specified multiple times"),
38+
39+
Option("-H", "--header-fields",
40+
dest="header_fields", default='all',
41+
help="Header fields (comma separated) to be printed as well (if present)"),
42+
])
43+
44+
return p
45+
46+
47+
def are_values_different(*values):
48+
"""Generically compares values, returns true if different"""
49+
value0 = values[0]
50+
values = values[1:] # to ensure that the first value isn't compared with itself
51+
52+
for value in values:
53+
try: # we sometimes don't want NaN values
54+
if np.any(np.isnan(value0)) and np.any(np.isnan(value)): # if they're both NaN
55+
break
56+
elif np.any(np.isnan(value0)) or np.any(np.isnan(value)): # if only 1 is NaN
57+
return True
58+
59+
except TypeError:
60+
pass
61+
62+
if type(value0) != type(value): # if types are different, then we consider them different
63+
return True
64+
elif isinstance(value0, np.ndarray):
65+
return np.any(value0 != value)
66+
67+
elif value0 != value:
68+
return True
69+
70+
return False
71+
72+
73+
def get_headers_diff(file_headers, names=None):
74+
"""Get difference between headers
75+
76+
Parameters
77+
----------
78+
file_headers: list of actual headers (dicts) from files
79+
names: list of header fields to test
80+
81+
Returns
82+
-------
83+
dict
84+
str: list for each header field which differs, return list of
85+
values per each file
86+
"""
87+
difference = OrderedDict()
88+
fields = names
89+
90+
if names is None:
91+
fields = file_headers[0].keys()
92+
93+
# for each header field
94+
for field in fields:
95+
values = [header.get(field) for header in file_headers] # get corresponding value
96+
97+
# if these values are different, store them in a dictionary
98+
if are_values_different(*values):
99+
difference[field] = values
100+
101+
return difference
102+
103+
104+
def get_data_diff(files):
105+
"""Get difference between md5 values
106+
107+
Parameters
108+
----------
109+
files: list of actual files
110+
111+
Returns
112+
-------
113+
list
114+
np.array: md5 values of respective files
115+
"""
116+
117+
md5sums = [
118+
hashlib.md5(np.ascontiguousarray(nib.load(f).get_data(), dtype=np.float32)).hexdigest()
119+
for f in files
120+
]
121+
122+
if len(set(md5sums)) == 1:
123+
return []
124+
125+
return md5sums
126+
127+
128+
def display_diff(files, diff):
129+
"""Format header differences into a nice string
130+
131+
Parameters
132+
----------
133+
files: list of files that were compared so we can print their names
134+
diff: dict of different valued header fields
135+
136+
Returns
137+
-------
138+
str
139+
string-formatted table of differences
140+
"""
141+
output = ""
142+
field_width = "{:<15}"
143+
value_width = "{:<55}"
144+
145+
output += "These files are different.\n"
146+
output += field_width.format('Field')
147+
148+
for f in files:
149+
output += value_width.format(os.path.basename(f))
150+
151+
output += "\n"
152+
153+
for key, value in diff.items():
154+
output += field_width.format(key)
155+
156+
for item in value:
157+
item_str = str(item)
158+
# Value might start/end with some invisible spacing characters so we
159+
# would "condition" it on both ends a bit
160+
item_str = re.sub('^[ \t]+', '<', item_str)
161+
item_str = re.sub('[ \t]+$', '>', item_str)
162+
# and also replace some other invisible symbols with a question
163+
# mark
164+
item_str = re.sub('[\x00]', '?', item_str)
165+
output += value_width.format(item_str)
166+
167+
output += "\n"
168+
169+
return output
170+
171+
172+
def main(args=None, out=None):
173+
"""Getting the show on the road"""
174+
out = out or sys.stdout
175+
parser = get_opt_parser()
176+
(opts, files) = parser.parse_args(args)
177+
178+
nibabel.cmdline.utils.verbose_level = opts.verbose
179+
180+
if nibabel.cmdline.utils.verbose_level < 3:
181+
# suppress nibabel format-compliance warnings
182+
nib.imageglobals.logger.level = 50
183+
184+
assert len(files) >= 2, "Please enter at least two files"
185+
186+
file_headers = [nib.load(f).header for f in files]
187+
188+
# signals "all fields"
189+
if opts.header_fields == 'all':
190+
# TODO: header fields might vary across file types, thus prior sensing would be needed
191+
header_fields = file_headers[0].keys()
192+
else:
193+
header_fields = opts.header_fields.split(',')
194+
195+
diff = get_headers_diff(file_headers, header_fields)
196+
data_diff = get_data_diff(files)
197+
198+
if data_diff:
199+
diff['DATA(md5)'] = data_diff
200+
201+
if diff:
202+
out.write(display_diff(files, diff))
203+
raise SystemExit(1)
204+
205+
else:
206+
out.write("These files are identical.\n")
207+
raise SystemExit(0)

0 commit comments

Comments
 (0)