@@ -70,7 +70,7 @@ class Recoder(object):
70
70
>>> recodes[2]
71
71
2
72
72
'''
73
- def __init__ (self , codes , fields = ('code' ,)):
73
+ def __init__ (self , codes , fields = ('code' ,), map_maker = dict ):
74
74
''' Create recoder object
75
75
76
76
``codes`` give a sequence of code, alias sequences
@@ -92,16 +92,20 @@ def __init__(self, codes, fields=('code',)):
92
92
codes : seqence of sequences
93
93
Each sequence defines values (codes) that are equivalent
94
94
fields : {('code',) string sequence}, optional
95
- names by which elements in sequences can be accesssed
96
-
95
+ names by which elements in sequences can be accessed
96
+ map_maker: callable, optional
97
+ constructor for dict-like objects used to store key value pairs.
98
+ Default is ``dict``. ``map_maker()`` generates an empty mapping.
99
+ The mapping need only implement ``__getitem__, __setitem__, keys,
100
+ values``.
97
101
'''
98
102
self .fields = tuple (fields )
99
103
self .field1 = {} # a placeholder for the check below
100
104
for name in fields :
101
105
if name in self .__dict__ :
102
106
raise KeyError ('Input name %s already in object dict'
103
107
% name )
104
- self .__dict__ [name ] = {}
108
+ self .__dict__ [name ] = map_maker ()
105
109
self .field1 = self .__dict__ [fields [0 ]]
106
110
self .add_codes (codes )
107
111
@@ -153,7 +157,11 @@ def __getitem__(self, key):
153
157
def __contains__ (self , key ):
154
158
""" True if field1 in recoder contains `key`
155
159
"""
156
- return key in self .field1
160
+ try :
161
+ self .field1 [key ]
162
+ except KeyError :
163
+ return False
164
+ return True
157
165
158
166
def keys (self ):
159
167
''' Return all available code and alias values
@@ -191,7 +199,6 @@ def value_set(self, name=None):
191
199
>>> rc = Recoder(codes, fields=('code', 'label'))
192
200
>>> rc.value_set('label') == set(('one', 'two', 'repeat value'))
193
201
True
194
-
195
202
'''
196
203
if name is None :
197
204
d = self .field1
@@ -204,6 +211,59 @@ def value_set(self, name=None):
204
211
endian_codes = Recoder (endian_codes )
205
212
206
213
214
+ class DtypeMapper (object ):
215
+ """ Specialized mapper for numpy dtypes
216
+
217
+ We pass this mapper into the Recoder class to deal with numpy dtype hashing.
218
+
219
+ The hashing problem is that dtypes that compare equal may not have the same
220
+ hash. This is true for numpys up to the current at time of writing (1.6.0).
221
+ For numpy 1.2.1 at least, even dtypes that look exactly the same in terms of
222
+ fields don't always have the same hash. This makes dtypes difficult to use
223
+ as keys in a dictionary.
224
+
225
+ This class wraps a dictionary in order to implement a __getitem__ to deal
226
+ with dtype hashing. If the key doesn't appear to be in the mapping, and it
227
+ is a dtype, we compare (using ==) all known dtype keys to the input key, and
228
+ return any matching values for the matching key.
229
+ """
230
+ def __init__ (self ):
231
+ self ._dict = {}
232
+ self ._dtype_keys = []
233
+
234
+ def keys (self ):
235
+ return self ._dict .keys ()
236
+
237
+ def values (self ):
238
+ return self ._dict .values ()
239
+
240
+ def __setitem__ (self , key , value ):
241
+ """ Set item into mapping, checking for dtype keys
242
+
243
+ Cache dtype keys for comparison test in __getitem__
244
+ """
245
+ self ._dict [key ] = value
246
+ if hasattr (key , 'subdtype' ):
247
+ self ._dtype_keys .append (key )
248
+
249
+ def __getitem__ (self , key ):
250
+ """ Get item from mapping, checking for dtype keys
251
+
252
+ First do simple hash lookup, then check for a dtype key that has failed
253
+ the hash lookup. Look then for any known dtype keys that compare equal
254
+ to `key`.
255
+ """
256
+ try :
257
+ return self ._dict [key ]
258
+ except KeyError :
259
+ pass
260
+ if hasattr (key , 'subdtype' ):
261
+ for dt in self ._dtype_keys :
262
+ if key == dt :
263
+ return self ._dict [dt ]
264
+ raise KeyError (key )
265
+
266
+
207
267
def pretty_mapping (mapping , getterfunc = None ):
208
268
''' Make pretty string from mapping
209
269
@@ -265,7 +325,7 @@ def pretty_mapping(mapping, getterfunc=None):
265
325
266
326
267
327
def make_dt_codes (codes_seqs ):
268
- ''' Create full dt codes object from datatype codes
328
+ ''' Create full dt codes Recoder instance from datatype codes
269
329
270
330
Include created numpy dtype (from numpy type) and opposite endian
271
331
numpy dtype
@@ -299,23 +359,10 @@ def make_dt_codes(codes_seqs):
299
359
raise ValueError ('Sequences must all have the same length' )
300
360
np_type = seq [2 ]
301
361
this_dt = np .dtype (np_type )
302
- code_syns = list (seq )
303
- dtypes = [this_dt ]
304
- # intp type is effectively same as int32 on 32 bit and int64 on 64 bit.
305
- # They compare equal, but in some (all?) numpy versions, they may hash
306
- # differently. If so we need to add them
307
- if this_dt == intp_dt and hash (this_dt ) != hash (intp_dt ):
308
- dtypes .append (intp_dt )
309
- # To satisfy an oddness in numpy dtype hashing, we need to add the dtype
310
- # with explicit native order as well as the default dtype (=) order
311
- for dt in dtypes :
312
- code_syns += [dt ,
313
- dt .newbyteorder (native_code ),
314
- dt .newbyteorder (swapped_code )]
362
+ # Add swapped dtype to synonyms
363
+ code_syns = list (seq ) + [this_dt , this_dt .newbyteorder (swapped_code )]
315
364
dt_codes .append (code_syns )
316
- return Recoder (dt_codes , fields + ['dtype' ,
317
- 'native_dtype' ,
318
- 'sw_dtype' ])
365
+ return Recoder (dt_codes , fields + ['dtype' , 'sw_dtype' ], DtypeMapper )
319
366
320
367
321
368
def can_cast (in_type , out_type , has_intercept = False , has_slope = False ):
0 commit comments