Merge pull request #437 from yarikoptic/enh-ls-counts

matthew-brett · matthew-brett · commit 53a8606aa8b0 · 2016-04-21T12:19:32.000-07:00
MRG: nib-ls -c/--counts to report counts for each value (useful for ROI maps)

Add option to output counts of unique values
diff --git a/bin/nib-ls b/bin/nib-ls
@@ -12,24 +12,34 @@ Output a summary table for neuroimaging files (resolution, dimensionality, etc.)
 """
 from __future__ import division, print_function, absolute_import
 
-__author__ = 'Yaroslav Halchenko'
-__copyright__ = 'Copyright (c) 2011-2015 Yaroslav Halchenko ' \
-                'and NiBabel contributors'
-__license__ = 'MIT'
-
 import re
 import sys
+
+import numpy as np
+import nibabel as nib
+
 from math import ceil
+from collections import defaultdict
 from optparse import OptionParser, Option
 from io import StringIO
+from nibabel.py3k import asunicode
+from nibabel.externals.six.moves import xrange
 
-import numpy as np
+__author__ = 'Yaroslav Halchenko'
+__copyright__ = 'Copyright (c) 2011-2016 Yaroslav Halchenko ' \
+                'and NiBabel contributors'
+__license__ = 'MIT'
 
-import nibabel as nib
-from nibabel.py3k import asunicode
 
 # global verbosity switch
 verbose_level = 0
+MAX_UNIQUE = 1000  # maximal number of unique values to report for --counts
+
+def _err(msg=None):
+    """To return a string to signal "error" in output table"""
+    if msg is None:
+        msg = 'error'
+    return '!' + msg
 
 def verbose(l, msg):
     """Print `s` if `l` is less than the `verbose_level`
@@ -40,11 +50,10 @@ def verbose(l, msg):
 
 
 def error(msg, exit_code):
-    print  >> sys.stderr, msg
+    print >> sys.stderr, msg
     sys.exit(exit_code)
 
 
-
 def table2string(table, out=None):
     """Given list of lists figure out their common widths and print to out
 
@@ -65,18 +74,19 @@ def table2string(table, out=None):
         out = StringIO()
 
     # equalize number of elements in each row
-    Nelements_max = len(table) \
-                    and max(len(x) for x in table)
+    nelements_max = \
+        len(table) and \
+        max(len(x) for x in table)
 
     for i, table_ in enumerate(table):
-        table[i] += [''] * (Nelements_max - len(table_))
+        table[i] += [''] * (nelements_max - len(table_))
 
     # figure out lengths within each column
     atable = np.asarray(table)
     # eat whole entry while computing width for @w (for wide)
     markup_strip = re.compile('^@([lrc]|w.*)')
-    col_width = [ max( [len(markup_strip.sub('', x))
-                        for x in column] ) for column in atable.T ]
+    col_width = [max([len(markup_strip.sub('', x))
+                      for x in column]) for column in atable.T]
     string = ""
     for i, table_ in enumerate(table):
         string_ = ""
@@ -85,26 +95,26 @@ def table2string(table, out=None):
             if item.startswith('@'):
                 align = item[1]
                 item = item[2:]
-                if not align in ['l', 'r', 'c', 'w']:
+                if align not in ['l', 'r', 'c', 'w']:
                     raise ValueError('Unknown alignment %s. Known are l,r,c' %
                                      align)
             else:
                 align = 'c'
 
-            NspacesL = max(ceil((col_width[j] - len(item))/2.0), 0)
-            NspacesR = max(col_width[j] - NspacesL - len(item), 0)
+            nspacesl = max(ceil((col_width[j] - len(item)) / 2.0), 0)
+            nspacesr = max(col_width[j] - nspacesl - len(item), 0)
 
             if align in ['w', 'c']:
                 pass
             elif align == 'l':
-                NspacesL, NspacesR = 0, NspacesL + NspacesR
+                nspacesl, nspacesr = 0, nspacesl + nspacesr
             elif align == 'r':
-                NspacesL, NspacesR = NspacesL + NspacesR, 0
+                nspacesl, nspacesr = nspacesl + nspacesr, 0
             else:
                 raise RuntimeError('Should not get here with align=%s' % align)
 
             string_ += "%%%ds%%s%%%ds " \
-                       % (NspacesL, NspacesR) % ('', item, '')
+                       % (nspacesl, nspacesr) % ('', item, '')
         string += string_.rstrip() + '\n'
     out.write(asunicode(string))
 
@@ -113,15 +123,17 @@ def table2string(table, out=None):
         out.close()
         return value
 
-def ap(l, format, sep=', '):
+
+def ap(l, format_, sep=', '):
     """Little helper to enforce consistency"""
     if l == '-':
         return l
-    ls = [format % x for x in l]
+    ls = [format_ % x for x in l]
     return sep.join(ls)
 
+
 def safe_get(obj, name):
-    """
+    """A getattr which would return '-' if getattr fails
     """
     try:
         f = getattr(obj, 'get_' + name)
@@ -130,11 +142,12 @@ def safe_get(obj, name):
         verbose(2, "get_%s() failed -- %s" % (name, e))
         return '-'
 
+
 def get_opt_parser():
     # use module docstring for help output
     p = OptionParser(
-                usage="%s [OPTIONS] [FILE ...]\n\n" % sys.argv[0] + __doc__,
-                version="%prog " + nib.__version__)
+        usage="%s [OPTIONS] [FILE ...]\n\n" % sys.argv[0] + __doc__,
+        version="%prog " + nib.__version__)
 
     p.add_options([
         Option("-v", "--verbose", action="count",
@@ -149,13 +162,23 @@ def get_opt_parser():
                action="store_true", dest='stats', default=False,
                help="Output basic data statistics"),
 
+        Option("-c", "--counts",
+               action="store_true", dest='counts', default=False,
+               help="Output counts - number of entries for each numeric value "
+                    "(useful for int ROI maps)"),
+
+        Option("--all-counts",
+               action="store_true", dest='all_counts', default=False,
+               help="Output all counts, even if number of unique values > %d" % MAX_UNIQUE),
+
         Option("-z", "--zeros",
                action="store_true", dest='stats_zeros', default=False,
-               help="Include zeros into output basic data statistics (--stats)"),
-        ])
+               help="Include zeros into output basic data statistics (--stats, --counts)"),
+    ])
 
     return p
 
+
 def proc_file(f, opts):
     verbose(1, "Loading %s" % f)
 
@@ -168,21 +191,21 @@ def proc_file(f, opts):
         verbose(2, "Failed to gather information -- %s" % str(e))
         return row
 
-    row += [ str(safe_get(h, 'data_dtype')),
-             '@l[%s]' %ap(safe_get(h, 'data_shape'), '%3g'),
-             '@l%s' % ap(safe_get(h, 'zooms'), '%.2f', 'x') ]
+    row += [str(safe_get(h, 'data_dtype')),
+            '@l[%s]' % ap(safe_get(h, 'data_shape'), '%3g'),
+            '@l%s' % ap(safe_get(h, 'zooms'), '%.2f', 'x')]
     # Slope
-    if (hasattr(h, 'has_data_slope')
-        and (h.has_data_slope or h.has_data_intercept)) \
-       and not h.get_slope_inter() in [(1.0, 0.0), (None, None)]:
+    if hasattr(h, 'has_data_slope') and \
+            (h.has_data_slope or h.has_data_intercept) and \
+            not h.get_slope_inter() in [(1.0, 0.0), (None, None)]:
         row += ['@l*%.3g+%.3g' % h.get_slope_inter()]
     else:
-        row += [ '' ]
+        row += ['']
 
-    if (hasattr(h, 'extensions') and len(h.extensions)):
+    if hasattr(h, 'extensions') and len(h.extensions):
         row += ['@l#exts: %d' % len(h.extensions)]
     else:
-        row += [ '' ]
+        row += ['']
 
     if opts.header_fields:
         # signals "all fields"
@@ -194,16 +217,16 @@ def proc_file(f, opts):
             header_fields = opts.header_fields.split(',')
 
         for f in header_fields:
-            if not f: # skip empty
+            if not f:  # skip empty
                 continue
             try:
                 row += [str(h[f])]
             except (KeyError, ValueError):
-                row += [ 'error' ]
+                row += [_err()]
 
     try:
-        if (hasattr(h, 'get_qform') and hasattr(h, 'get_sform')
-            and (h.get_qform() != h.get_sform()).any()):
+        if (hasattr(h, 'get_qform') and hasattr(h, 'get_sform') and
+                (h.get_qform() != h.get_sform()).any()):
             row += ['sform']
         else:
             row += ['']
@@ -212,21 +235,34 @@ def proc_file(f, opts):
         if isinstance(h, nib.AnalyzeHeader):
             row += ['']
         else:
-            row += ['error']
+            row += [_err()]
 
-    if opts.stats:
+    if opts.stats or opts.counts:
         # We are doomed to load data
         try:
             d = vol.get_data()
             if not opts.stats_zeros:
                 d = d[np.nonzero(d)]
-            # just # of elements
-            row += ["[%d] " % np.prod(d.shape)]
-            # stats
-            row += [len(d) and '%.2g:%.2g' % (np.min(d), np.max(d)) or '-']
-        except Exception as e:
-            verbose(2, "Failed to obtain stats -- %s" % str(e))
-            row += ['error']
+            else:
+                # at least flatten it -- functionality below doesn't
+                # depend on the original shape, so let's use a flat view
+                d = d.reshape(-1)
+            if opts.stats:
+                # just # of elements
+                row += ["@l[%d]" % np.prod(d.shape)]
+                # stats
+                row += [len(d) and '@l[%.2g, %.2g]' % (np.min(d), np.max(d)) or '-']
+            if opts.counts:
+                items, inv = np.unique(d, return_inverse=True)
+                if len(items) > 1000 and not opts.all_counts:
+                    counts = _err("%d uniques. Use --all-counts" % len(items))
+                else:
+                    freq = np.bincount(inv)
+                    counts = " ".join("%g:%d" % (i, f) for i, f in zip(items, freq))
+                row += ["@l" + counts]
+        except IOError as e:
+            verbose(2, "Failed to obtain stats/counts -- %s" % str(e))
+            row += [_err()]
     return row
 
 
diff --git a/nibabel/tests/test_scripts.py b/nibabel/tests/test_scripts.py
@@ -53,12 +53,12 @@ def script_test(func):
 DATA_PATH = abspath(pjoin(dirname(__file__), 'data'))
 
 
-def check_nib_ls_example4d(opts=[], hdrs_str=""):
+def check_nib_ls_example4d(opts=[], hdrs_str="", other_str=""):
     # test nib-ls script
     fname = pjoin(DATA_PATH, 'example4d.nii.gz')
     expected_re = (" (int16|[<>]i2) \[128,  96,  24,   2\] "
-                   "2.00x2.00x2.20x2000.00  #exts: 2%s sform$"
-                   % hdrs_str)
+                   "2.00x2.00x2.20x2000.00  #exts: 2%s sform%s$"
+                   % (hdrs_str, other_str))
     cmd = ['nib-ls'] + opts + [fname]
     code, stdout, stderr = run_command(cmd)
     assert_equal(fname, stdout[:len(fname)])
@@ -68,7 +68,16 @@ def check_nib_ls_example4d(opts=[], hdrs_str=""):
 def test_nib_ls():
     yield check_nib_ls_example4d
     yield check_nib_ls_example4d, \
-          ['-H', 'dim,bitpix'], " \[  4 128  96  24   2   1   1   1\] 16"
+        ['-H', 'dim,bitpix'], " \[  4 128  96  24   2   1   1   1\] 16"
+    yield check_nib_ls_example4d, ['-c'], "", " !1030 uniques. Use --all-counts"
+    yield check_nib_ls_example4d, ['-c', '--all-counts'], "", " 2:3 3:2 4:1 5:1.*"
+    # both stats and counts
+    yield check_nib_ls_example4d, \
+        ['-c', '-s', '--all-counts'], "", " \[229725\] \[2, 1.2e\+03\] 2:3 3:2 4:1 5:1.*"
+    # and must not error out if we allow for zeros
+    yield check_nib_ls_example4d, \
+        ['-c', '-s', '-z', '--all-counts'], "", " \[589824\] \[0, 1.2e\+03\] 0:360099 2:3 3:2 4:1 5:1.*"
+
 
 @script_test
 def test_nib_ls_multiple():
@@ -109,10 +118,10 @@ def test_nib_ls_multiple():
     assert_equal(
         [l[l.index('['):] for l in stdout_lines],
         [
-            '[128,  96,  24,   2] 2.00x2.00x2.20x2000.00  #exts: 2 sform [229725]   2:1.2e+03',
-            '[ 32,  20,  12,   2] 2.00x2.00x2.20x2000.00  #exts: 2 sform  [15360]  46:7.6e+02',
-            '[ 18,  28,  29]      9.00x8.00x7.00                          [14616]    0.12:93',
-            '[ 91, 109,  91]      2.00x2.00x2.00                           error'
+            '[128,  96,  24,   2] 2.00x2.00x2.20x2000.00  #exts: 2 sform [229725] [2, 1.2e+03]',
+            '[ 32,  20,  12,   2] 2.00x2.00x2.20x2000.00  #exts: 2 sform [15360]  [46, 7.6e+02]',
+            '[ 18,  28,  29]      9.00x8.00x7.00                         [14616]  [0.12, 93]',
+            '[ 91, 109,  91]      2.00x2.00x2.00                          !error'
         ]
     )