49
49
pd_version = tuple ([int (x ) for x in re .match (r'^(\d+)\.(\d+)\.(\d+)' ,
50
50
pd .__version__ ).groups ()])
51
51
52
+ if pd_version >= (0 , 23 , 0 ):
53
+ concat_sort = dict (sort = False )
54
+ else :
55
+ concat_sort = dict ()
56
+
52
57
OPERATOR_NAMES = {
53
58
'+' : 'add' ,
54
59
'-' : 'sub' ,
@@ -545,8 +550,8 @@ class CASTableRowScalarAccessor(CASTableAccessor):
545
550
def __getitem__ (self , pos ):
546
551
tbl = self ._table ()
547
552
if isinstance (tbl , CASColumn ):
548
- return tbl .get_value ( pos , 0 )
549
- return tbl .get_value (* pos )
553
+ return tbl .iat [ pos , 0 ]
554
+ return tbl .iat [ slice (* pos )]
550
555
551
556
552
557
class CASTableLabelScalarAccessor (CASTableAccessor ):
@@ -557,10 +562,10 @@ def __getitem__(self, pos):
557
562
if isinstance (tbl , CASColumn ):
558
563
if pos < 0 or pos >= tbl ._numrows :
559
564
raise KeyError (pos )
560
- return tbl .get_value ( pos , 0 )
565
+ return tbl .iat [ pos , 0 ]
561
566
if pos [0 ] < 0 or pos [0 ] >= tbl ._numrows :
562
567
raise KeyError (pos )
563
- return tbl .get_value (* pos )
568
+ return tbl .iat [ slice (* pos )]
564
569
565
570
566
571
def _get_table_selection (table , args ):
@@ -1240,6 +1245,7 @@ class CASTable(ParamManager, ActionParamManager):
1240
1245
def __init__ (self , name , ** table_params ):
1241
1246
ParamManager .__init__ (self , name = name , ** table_params )
1242
1247
ActionParamManager .__init__ (self )
1248
+ self ._pandas_enabled = True
1243
1249
self ._connection = None
1244
1250
self ._contexts = []
1245
1251
@@ -1266,6 +1272,22 @@ def __init__(self, name, **table_params):
1266
1272
doc = doc .split ('Returns' )[0 ].rstrip ()
1267
1273
self .params .set_doc (doc )
1268
1274
1275
+ def _disable_pandas (self ):
1276
+ '''
1277
+ Disable selected pandas DataFrame features
1278
+
1279
+ Some versions of pandas cause lookups of attributes on CASTables
1280
+ that can cause interruptions of running actions. These
1281
+ features can be disable temporarily to bypass the pandas
1282
+ features where needed.
1283
+
1284
+ '''
1285
+ self ._pandas_enabled = False
1286
+
1287
+ def _enable_pandas (self ):
1288
+ ''' Re-enable pandas features '''
1289
+ self ._pandas_enabled = True
1290
+
1269
1291
def append_columns (self , * items , ** kwargs ):
1270
1292
'''
1271
1293
Append variable names to action inputs parameter
@@ -2287,7 +2309,9 @@ def _numrows(self):
2287
2309
return self .copy (exclude = 'groupby' )._retrieve ('simple.numrows' )['numrows' ]
2288
2310
2289
2311
def __len__ (self ):
2290
- return self ._numrows
2312
+ if self ._pandas_enabled :
2313
+ return self ._numrows
2314
+ raise AttributeError ('__len__' )
2291
2315
2292
2316
# NOTE: Workaround to keep the DataFrame text renderer from trying
2293
2317
# to fetch all the values in the table.
@@ -2401,7 +2425,7 @@ def as_matrix(self, columns=None, n=None):
2401
2425
% n , RuntimeWarning )
2402
2426
tbl = self .copy ()
2403
2427
tbl ._intersect_columns (columns , inplace = True )
2404
- return tbl ._fetch (to = n ).as_matrix ()
2428
+ return tbl ._fetch (to = n ).values
2405
2429
2406
2430
@getattr_safe_property
2407
2431
def dtypes (self ):
@@ -3017,7 +3041,7 @@ def get_value(self, index, col, **kwargs):
3017
3041
if index < 0 :
3018
3042
index = index + numrows
3019
3043
out = self ._fetch (from_ = index + 1 , to = index + 1 )
3020
- return out .get_value ( out .index .values [0 ], col , ** kwargs )
3044
+ return out .at [ out .index .values [0 ], col ]
3021
3045
3022
3046
def lookup (self , row_labels , col_labels ):
3023
3047
''' Retrieve values indicated by row_labels, col_labels positions '''
@@ -3907,8 +3931,8 @@ def _expand_items(into, key, items):
3907
3931
else :
3908
3932
summ .drop (['min' , 'max' ], inplace = True )
3909
3933
3910
- out = pd .concat (x for x in [topk_val , pct , summ , topk_freq ]
3911
- if x is not None )
3934
+ out = pd .concat (( x for x in [topk_val , pct , summ , topk_freq ]
3935
+ if x is not None ), ** concat_sort )
3912
3936
3913
3937
else :
3914
3938
if stats is None :
@@ -3917,7 +3941,8 @@ def _expand_items(into, key, items):
3917
3941
labels = ['count' , 'unique' , 'top' , 'freq' , 'min' , 'max' ]
3918
3942
else :
3919
3943
labels = stats
3920
- out = pd .concat (x for x in [topk_freq , topk_val ] if x is not None )
3944
+ out = pd .concat ((x for x in [topk_freq , topk_val ] if x is not None ),
3945
+ ** concat_sort )
3921
3946
3922
3947
groups = self .get_groupby_vars ()
3923
3948
idx = tuple ([slice (None ) for x in groups ] + [labels ])
@@ -3970,8 +3995,15 @@ def _expand_items(into, key, items):
3970
3995
tmpname = str (uuid .uuid4 ())
3971
3996
out .index .names = groups + [tmpname ]
3972
3997
out .reset_index (inplace = True )
3973
- out [tmpname ] = out [tmpname ].astype ('category' , categories = categories ,
3974
- ordered = True )
3998
+ if pd_version >= (0 , 21 , 0 ):
3999
+ from pandas .api .types import CategoricalDtype
4000
+ out [tmpname ] = out [tmpname ].astype (CategoricalDtype (
4001
+ categories = categories ,
4002
+ ordered = True ))
4003
+ else :
4004
+ out [tmpname ] = out [tmpname ].astype ('category' ,
4005
+ categories = categories ,
4006
+ ordered = True )
3975
4007
out .sort_values (groups + [tmpname ], inplace = True )
3976
4008
out .set_index (groups + [tmpname ], inplace = True )
3977
4009
out .index .names = groups + [None ]
@@ -8178,7 +8210,7 @@ def get(self, key, default=None):
8178
8210
'''
8179
8211
out = self ._fetch (from_ = key + 1 , to = key + 1 )
8180
8212
try :
8181
- return out .get_value ( out .index .values [0 ], self ._columns [0 ])
8213
+ return out .at [ out .index .values [0 ], self ._columns [0 ]]
8182
8214
except (KeyError , IndexError ):
8183
8215
pass
8184
8216
return default
@@ -9012,7 +9044,7 @@ def unique(self):
9012
9044
del out [tmpname ]
9013
9045
return out .groupby (names )[var ].unique ()
9014
9046
9015
- return pd .Series (out .index , name = self .name ).as_matrix ()
9047
+ return pd .Series (out .index , name = self .name ).values
9016
9048
9017
9049
def nunique (self , dropna = True ):
9018
9050
'''
0 commit comments