BF - more robust dtype mapper to fix errors with numpy 1.2.1

matthew-brett · matthew-brett · commit be45df8f56ac · 2011-05-03T14:14:21.000-04:00
diff --git a/nibabel/tests/test_recoder.py b/nibabel/tests/test_recoder.py
@@ -8,9 +8,11 @@
 ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
 ''' Tests recoder class '''
 
-from nose.tools import assert_equal, assert_raises, assert_true, assert_false
+import numpy as np
+
+from ..volumeutils import Recoder, DtypeMapper, native_code, swapped_code
 
-from ..volumeutils import Recoder
+from nose.tools import assert_equal, assert_raises, assert_true, assert_false
 
 def test_recoder():
     # simplest case, no aliases
@@ -54,6 +56,33 @@ def test_recoder():
     # Don't allow funny names
     yield assert_raises, KeyError, Recoder, codes, ['field1']
 
+
+def test_custom_dicter():
+    # Allow custom dict-like object in constructor
+    class MyDict(object):
+        def __init__(self):
+            self._keys = []
+        def __setitem__(self, key, value):
+            self._keys.append(key)
+        def __getitem__(self, key):
+            if key in self._keys:
+                return 'spam'
+            return 'eggs'
+        def keys(self):
+            return ['some', 'keys']
+        def values(self):
+            return ['funny', 'list']
+    # code, label, aliases
+    codes = ((1,'one','1','first'), (2,'two'))
+    rc = Recoder(codes, map_maker=MyDict)
+    yield assert_equal, rc.code[1], 'spam'
+    yield assert_equal, rc.code['one'], 'spam'
+    yield assert_equal, rc.code['first'], 'spam'
+    yield assert_equal, rc.code['bizarre'], 'eggs'
+    yield assert_equal, rc.value_set(), set(['funny', 'list'])
+    yield assert_equal, list(rc.keys()), ['some', 'keys']
+
+
 def test_add_codes():
     codes = ((1,'one','1','first'), (2,'two'))
     rc = Recoder(codes)
@@ -63,6 +92,7 @@ def test_add_codes():
     yield assert_equal, rc.code['three'], 3
     yield assert_equal, rc.code['number 1'], 1
 
+
 def test_sugar():
     # Syntactic sugar for recoder class
     codes = ((1,'one','1','first'), (2,'two'))
@@ -84,3 +114,42 @@ def test_sugar():
     yield assert_true, 'one' in rc
     yield assert_false, 'three' in rc
 
+
+def test_dtmapper():
+    # dict-like that will lookup on dtypes, even if they don't hash properly
+    d = DtypeMapper()
+    assert_raises(KeyError, d.__getitem__, 1)
+    d[1] = 'something'
+    assert_equal(d[1], 'something')
+    assert_equal(list(d.keys()), [1])
+    assert_equal(list(d.values()), ['something'])
+    intp_dt = np.dtype('intp')
+    if intp_dt == np.dtype('int32'):
+        canonical_dt = np.dtype('int32')
+    elif intp_dt == np.dtype('int64'):
+        canonical_dt = np.dtype('int64')
+    else:
+        raise RuntimeError('Can I borrow your computer?')
+    native_dt = canonical_dt.newbyteorder('=')
+    explicit_dt = canonical_dt.newbyteorder(native_code)
+    d[canonical_dt] = 'spam'
+    assert_equal(d[canonical_dt], 'spam')
+    assert_equal(d[native_dt], 'spam')
+    assert_equal(d[explicit_dt], 'spam')
+    # Test keys, values
+    d = DtypeMapper()
+    assert_equal(list(d.keys()), [])
+    assert_equal(list(d.keys()), [])
+    d[canonical_dt] = 'spam'
+    assert_equal(list(d.keys()), [canonical_dt])
+    assert_equal(list(d.values()), ['spam'])
+    # With other byte order
+    d = DtypeMapper()
+    sw_dt = canonical_dt.newbyteorder(swapped_code)
+    d[sw_dt] = 'spam'
+    assert_raises(KeyError, d.__getitem__, canonical_dt)
+    assert_equal(d[sw_dt], 'spam')
+    sw_intp_dt = intp_dt.newbyteorder(swapped_code)
+    assert_equal(d[sw_intp_dt], 'spam')
+
+
diff --git a/nibabel/tests/test_utils.py b/nibabel/tests/test_utils.py
@@ -298,7 +298,7 @@ def test_dtypes():
     # check we have the fields we were expecting
     assert_equal(dtr.value_set(), set((16,)))
     assert_equal(dtr.fields, ('code', 'label', 'type',
-                              'dtype', 'native_dtype', 'sw_dtype'))
+                              'dtype', 'sw_dtype'))
     # These of course should pass regardless of dtype
     assert_equal(dtr[np.float32], 16)
     assert_equal(dtr['float32'], 16)
@@ -314,13 +314,13 @@ def test_dtypes():
     assert_equal(dtr[np.dtype('f4').newbyteorder('S')], 16)
     assert_equal(dtr.value_set(), set((16,)))
     assert_equal(dtr.fields, ('code', 'label', 'type', 'niistring',
-                              'dtype', 'native_dtype', 'sw_dtype'))
+                              'dtype', 'sw_dtype'))
     assert_equal(dtr.niistring[16], 'ASTRING')
     # And that unequal elements raises error
     dt_defs = ((16, 'float32', np.float32, 'ASTRING'),
                (16, 'float32', np.float32))
     assert_raises(ValueError, make_dt_codes, dt_defs)
-    # And that 2 or 5 elements raises error 
+    # And that 2 or 5 elements raises error
     dt_defs = ((16, 'float32'),)
     assert_raises(ValueError, make_dt_codes, dt_defs)
     dt_defs = ((16, 'float32', np.float32, 'ASTRING', 'ANOTHERSTRING'),)
diff --git a/nibabel/volumeutils.py b/nibabel/volumeutils.py
@@ -70,7 +70,7 @@ class Recoder(object):
     >>> recodes[2]
     2
     '''
-    def __init__(self, codes, fields=('code',)):
+    def __init__(self, codes, fields=('code',), map_maker=dict):
         ''' Create recoder object
 
         ``codes`` give a sequence of code, alias sequences
@@ -92,16 +92,20 @@ def __init__(self, codes, fields=('code',)):
         codes : seqence of sequences
             Each sequence defines values (codes) that are equivalent
         fields : {('code',) string sequence}, optional
-            names by which elements in sequences can be accesssed
-
+            names by which elements in sequences can be accessed
+        map_maker: callable, optional
+            constructor for dict-like objects used to store key value pairs.
+            Default is ``dict``.  ``map_maker()`` generates an empty mapping.
+            The mapping need only implement ``__getitem__, __setitem__, keys,
+            values``.
         '''
         self.fields = tuple(fields)
         self.field1 = {} # a placeholder for the check below
         for name in fields:
             if name in self.__dict__:
                 raise KeyError('Input name %s already in object dict'
                                % name)
-            self.__dict__[name] = {}
+            self.__dict__[name] = map_maker()
         self.field1 = self.__dict__[fields[0]]
         self.add_codes(codes)
 
@@ -153,7 +157,11 @@ def __getitem__(self, key):
     def __contains__(self, key):
         """ True if field1 in recoder contains `key`
         """
-        return key in self.field1
+        try:
+            self.field1[key]
+        except KeyError:
+            return False
+        return True
 
     def keys(self):
         ''' Return all available code and alias values
@@ -191,7 +199,6 @@ def value_set(self, name=None):
         >>> rc = Recoder(codes, fields=('code', 'label'))
         >>> rc.value_set('label') == set(('one', 'two', 'repeat value'))
         True
-
         '''
         if name is None:
             d = self.field1
@@ -204,6 +211,59 @@ def value_set(self, name=None):
 endian_codes = Recoder(endian_codes)
 
 
+class DtypeMapper(object):
+    """ Specialized mapper for numpy dtypes
+
+    We pass this mapper into the Recoder class to deal with numpy dtype hashing.
+
+    The hashing problem is that dtypes that compare equal may not have the same
+    hash.  This is true for numpys up to the current at time of writing (1.6.0).
+    For numpy 1.2.1 at least, even dtypes that look exactly the same in terms of
+    fields don't always have the same hash.  This makes dtypes difficult to use
+    as keys in a dictionary.
+
+    This class wraps a dictionary in order to implement a __getitem__ to deal
+    with dtype hashing. If the key doesn't appear to be in the mapping, and it
+    is a dtype, we compare (using ==) all known dtype keys to the input key, and
+    return any matching values for the matching key.
+    """
+    def __init__(self):
+        self._dict = {}
+        self._dtype_keys = []
+
+    def keys(self):
+        return self._dict.keys()
+
+    def values(self):
+        return self._dict.values()
+
+    def __setitem__(self, key, value):
+        """ Set item into mapping, checking for dtype keys
+
+        Cache dtype keys for comparison test in __getitem__
+        """
+        self._dict[key] = value
+        if hasattr(key, 'subdtype'):
+            self._dtype_keys.append(key)
+
+    def __getitem__(self, key):
+        """ Get item from mapping, checking for dtype keys
+
+        First do simple hash lookup, then check for a dtype key that has failed
+        the hash lookup.  Look then for any known dtype keys that compare equal
+        to `key`.
+        """
+        try:
+            return self._dict[key]
+        except KeyError:
+            pass
+        if hasattr(key, 'subdtype'):
+            for dt in self._dtype_keys:
+                if key == dt:
+                    return self._dict[dt]
+        raise KeyError(key)
+
+
 def pretty_mapping(mapping, getterfunc=None):
     ''' Make pretty string from mapping
 
@@ -265,7 +325,7 @@ def pretty_mapping(mapping, getterfunc=None):
 
 
 def make_dt_codes(codes_seqs):
-    ''' Create full dt codes object from datatype codes
+    ''' Create full dt codes Recoder instance from datatype codes
 
     Include created numpy dtype (from numpy type) and opposite endian
     numpy dtype
@@ -299,23 +359,10 @@ def make_dt_codes(codes_seqs):
             raise ValueError('Sequences must all have the same length')
         np_type = seq[2]
         this_dt = np.dtype(np_type)
-        code_syns = list(seq)
-        dtypes = [this_dt]
-        # intp type is effectively same as int32 on 32 bit and int64 on 64 bit.
-        # They compare equal, but in some (all?) numpy versions, they may hash
-        # differently.  If so we need to add them
-        if this_dt == intp_dt and hash(this_dt) != hash(intp_dt):
-            dtypes.append(intp_dt)
-        # To satisfy an oddness in numpy dtype hashing, we need to add the dtype
-        # with explicit native order as well as the default dtype (=) order
-        for dt in dtypes:
-            code_syns +=[dt,
-                         dt.newbyteorder(native_code),
-                         dt.newbyteorder(swapped_code)]
+        # Add swapped dtype to synonyms
+        code_syns = list(seq) + [this_dt, this_dt.newbyteorder(swapped_code)]
         dt_codes.append(code_syns)
-    return Recoder(dt_codes, fields + ['dtype',
-                                       'native_dtype',
-                                       'sw_dtype'])
+    return Recoder(dt_codes, fields + ['dtype', 'sw_dtype'], DtypeMapper)
 
 
 def can_cast(in_type, out_type, has_intercept=False, has_slope=False):