|
| 1 | +#!python |
| 2 | +# emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*- |
| 3 | +# vi: set ft=python sts=4 ts=4 sw=4 et: |
| 4 | +### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## |
| 5 | +# |
| 6 | +# See COPYING file distributed along with the NiBabel package for the |
| 7 | +# copyright and license terms. |
| 8 | +# |
| 9 | +### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## |
| 10 | +""" |
| 11 | +Quick summary of the differences among a set of neuroimaging files |
| 12 | +""" |
| 13 | +from __future__ import division, print_function, absolute_import |
| 14 | + |
| 15 | +import re |
| 16 | +import sys |
| 17 | +from collections import OrderedDict |
| 18 | +from optparse import OptionParser, Option |
| 19 | + |
| 20 | +import numpy as np |
| 21 | + |
| 22 | +import nibabel as nib |
| 23 | +import nibabel.cmdline.utils |
| 24 | +import hashlib |
| 25 | +import os |
| 26 | + |
| 27 | + |
| 28 | +def get_opt_parser(): |
| 29 | + # use module docstring for help output |
| 30 | + p = OptionParser( |
| 31 | + usage="%s [OPTIONS] [FILE ...]\n\n" % sys.argv[0] + __doc__, |
| 32 | + version="%prog " + nib.__version__) |
| 33 | + |
| 34 | + p.add_options([ |
| 35 | + Option("-v", "--verbose", action="count", |
| 36 | + dest="verbose", default=0, |
| 37 | + help="Make more noise. Could be specified multiple times"), |
| 38 | + |
| 39 | + Option("-H", "--header-fields", |
| 40 | + dest="header_fields", default='all', |
| 41 | + help="Header fields (comma separated) to be printed as well (if present)"), |
| 42 | + ]) |
| 43 | + |
| 44 | + return p |
| 45 | + |
| 46 | + |
| 47 | +def are_values_different(*values): |
| 48 | + """Generically compares values, returns true if different""" |
| 49 | + value0 = values[0] |
| 50 | + values = values[1:] # to ensure that the first value isn't compared with itself |
| 51 | + |
| 52 | + for value in values: |
| 53 | + try: # we sometimes don't want NaN values |
| 54 | + if np.any(np.isnan(value0)) and np.any(np.isnan(value)): # if they're both NaN |
| 55 | + break |
| 56 | + elif np.any(np.isnan(value0)) or np.any(np.isnan(value)): # if only 1 is NaN |
| 57 | + return True |
| 58 | + |
| 59 | + except TypeError: |
| 60 | + pass |
| 61 | + |
| 62 | + if type(value0) != type(value): # if types are different, then we consider them different |
| 63 | + return True |
| 64 | + elif isinstance(value0, np.ndarray): |
| 65 | + return np.any(value0 != value) |
| 66 | + |
| 67 | + elif value0 != value: |
| 68 | + return True |
| 69 | + |
| 70 | + return False |
| 71 | + |
| 72 | + |
| 73 | +def get_headers_diff(file_headers, names=None): |
| 74 | + """Get difference between headers |
| 75 | +
|
| 76 | + Parameters |
| 77 | + ---------- |
| 78 | + file_headers: list of actual headers (dicts) from files |
| 79 | + names: list of header fields to test |
| 80 | +
|
| 81 | + Returns |
| 82 | + ------- |
| 83 | + dict |
| 84 | + str: list for each header field which differs, return list of |
| 85 | + values per each file |
| 86 | + """ |
| 87 | + difference = OrderedDict() |
| 88 | + fields = names |
| 89 | + |
| 90 | + if names is None: |
| 91 | + fields = file_headers[0].keys() |
| 92 | + |
| 93 | + # for each header field |
| 94 | + for field in fields: |
| 95 | + values = [header.get(field) for header in file_headers] # get corresponding value |
| 96 | + |
| 97 | + # if these values are different, store them in a dictionary |
| 98 | + if are_values_different(*values): |
| 99 | + difference[field] = values |
| 100 | + |
| 101 | + return difference |
| 102 | + |
| 103 | + |
| 104 | +def get_data_diff(files): |
| 105 | + """Get difference between md5 values |
| 106 | +
|
| 107 | + Parameters |
| 108 | + ---------- |
| 109 | + files: list of actual files |
| 110 | +
|
| 111 | + Returns |
| 112 | + ------- |
| 113 | + list |
| 114 | + np.array: md5 values of respective files |
| 115 | + """ |
| 116 | + |
| 117 | + md5sums = [ |
| 118 | + hashlib.md5(np.ascontiguousarray(nib.load(f).get_data(), dtype=np.float32)).hexdigest() |
| 119 | + for f in files |
| 120 | + ] |
| 121 | + |
| 122 | + if len(set(md5sums)) == 1: |
| 123 | + return [] |
| 124 | + |
| 125 | + return md5sums |
| 126 | + |
| 127 | + |
| 128 | +def display_diff(files, diff): |
| 129 | + """Format header differences into a nice string |
| 130 | +
|
| 131 | + Parameters |
| 132 | + ---------- |
| 133 | + files: list of files that were compared so we can print their names |
| 134 | + diff: dict of different valued header fields |
| 135 | +
|
| 136 | + Returns |
| 137 | + ------- |
| 138 | + str |
| 139 | + string-formatted table of differences |
| 140 | + """ |
| 141 | + output = "" |
| 142 | + field_width = "{:<15}" |
| 143 | + value_width = "{:<55}" |
| 144 | + |
| 145 | + output += "These files are different.\n" |
| 146 | + output += field_width.format('Field') |
| 147 | + |
| 148 | + for f in files: |
| 149 | + output += value_width.format(os.path.basename(f)) |
| 150 | + |
| 151 | + output += "\n" |
| 152 | + |
| 153 | + for key, value in diff.items(): |
| 154 | + output += field_width.format(key) |
| 155 | + |
| 156 | + for item in value: |
| 157 | + item_str = str(item) |
| 158 | + # Value might start/end with some invisible spacing characters so we |
| 159 | + # would "condition" it on both ends a bit |
| 160 | + item_str = re.sub('^[ \t]+', '<', item_str) |
| 161 | + item_str = re.sub('[ \t]+$', '>', item_str) |
| 162 | + # and also replace some other invisible symbols with a question |
| 163 | + # mark |
| 164 | + item_str = re.sub('[\x00]', '?', item_str) |
| 165 | + output += value_width.format(item_str) |
| 166 | + |
| 167 | + output += "\n" |
| 168 | + |
| 169 | + return output |
| 170 | + |
| 171 | + |
| 172 | +def main(args=None, out=None): |
| 173 | + """Getting the show on the road""" |
| 174 | + out = out or sys.stdout |
| 175 | + parser = get_opt_parser() |
| 176 | + (opts, files) = parser.parse_args(args) |
| 177 | + |
| 178 | + nibabel.cmdline.utils.verbose_level = opts.verbose |
| 179 | + |
| 180 | + if nibabel.cmdline.utils.verbose_level < 3: |
| 181 | + # suppress nibabel format-compliance warnings |
| 182 | + nib.imageglobals.logger.level = 50 |
| 183 | + |
| 184 | + assert len(files) >= 2, "Please enter at least two files" |
| 185 | + |
| 186 | + file_headers = [nib.load(f).header for f in files] |
| 187 | + |
| 188 | + # signals "all fields" |
| 189 | + if opts.header_fields == 'all': |
| 190 | + # TODO: header fields might vary across file types, thus prior sensing would be needed |
| 191 | + header_fields = file_headers[0].keys() |
| 192 | + else: |
| 193 | + header_fields = opts.header_fields.split(',') |
| 194 | + |
| 195 | + diff = get_headers_diff(file_headers, header_fields) |
| 196 | + data_diff = get_data_diff(files) |
| 197 | + |
| 198 | + if data_diff: |
| 199 | + diff['DATA(md5)'] = data_diff |
| 200 | + |
| 201 | + if diff: |
| 202 | + out.write(display_diff(files, diff)) |
| 203 | + raise SystemExit(1) |
| 204 | + |
| 205 | + else: |
| 206 | + out.write("These files are identical.\n") |
| 207 | + raise SystemExit(0) |
0 commit comments