Skip to content

Commit 5ae8e69

Browse files
author
Kevin D Smith
committed
Clean up pandas warnings; fixes for pypi compatibility
1 parent 7fb1711 commit 5ae8e69

File tree

7 files changed

+116
-44
lines changed

7 files changed

+116
-44
lines changed

setup.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@
2222
from setuptools import setup, find_packages
2323

2424
try:
25-
README = open('README.rst', 'r').read()
25+
README = open('README.md', 'r').read()
2626
except:
27-
README = 'See README.rst'
27+
README = 'See README.md'
2828

2929
if glob.glob('swat/lib/*/tk*'):
3030
LICENSE = 'Apache v2.0 (SWAT) + SAS Additional Functionality (SAS TK)'
@@ -50,6 +50,8 @@
5050
'six >= 1.9.0',
5151
'requests',
5252
],
53+
platforms='any',
54+
python_requires='>=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*',
5355
classifiers=[
5456
'Development Status :: 5 - Production/Stable',
5557
'Environment :: Console',

swat/cas/table.py

Lines changed: 46 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,11 @@
4949
pd_version = tuple([int(x) for x in re.match(r'^(\d+)\.(\d+)\.(\d+)',
5050
pd.__version__).groups()])
5151

52+
if pd_version >= (0, 23, 0):
53+
concat_sort = dict(sort=False)
54+
else:
55+
concat_sort = dict()
56+
5257
OPERATOR_NAMES = {
5358
'+': 'add',
5459
'-': 'sub',
@@ -545,8 +550,8 @@ class CASTableRowScalarAccessor(CASTableAccessor):
545550
def __getitem__(self, pos):
546551
tbl = self._table()
547552
if isinstance(tbl, CASColumn):
548-
return tbl.get_value(pos, 0)
549-
return tbl.get_value(*pos)
553+
return tbl.iat[pos, 0]
554+
return tbl.iat[slice(*pos)]
550555

551556

552557
class CASTableLabelScalarAccessor(CASTableAccessor):
@@ -557,10 +562,10 @@ def __getitem__(self, pos):
557562
if isinstance(tbl, CASColumn):
558563
if pos < 0 or pos >= tbl._numrows:
559564
raise KeyError(pos)
560-
return tbl.get_value(pos, 0)
565+
return tbl.iat[pos, 0]
561566
if pos[0] < 0 or pos[0] >= tbl._numrows:
562567
raise KeyError(pos)
563-
return tbl.get_value(*pos)
568+
return tbl.iat[slice(*pos)]
564569

565570

566571
def _get_table_selection(table, args):
@@ -1240,6 +1245,7 @@ class CASTable(ParamManager, ActionParamManager):
12401245
def __init__(self, name, **table_params):
12411246
ParamManager.__init__(self, name=name, **table_params)
12421247
ActionParamManager.__init__(self)
1248+
self._pandas_enabled = True
12431249
self._connection = None
12441250
self._contexts = []
12451251

@@ -1266,6 +1272,22 @@ def __init__(self, name, **table_params):
12661272
doc = doc.split('Returns')[0].rstrip()
12671273
self.params.set_doc(doc)
12681274

1275+
def _disable_pandas(self):
1276+
'''
1277+
Disable selected pandas DataFrame features
1278+
1279+
Some versions of pandas cause lookups of attributes on CASTables
1280+
that can cause interruptions of running actions. These
1281+
features can be disable temporarily to bypass the pandas
1282+
features where needed.
1283+
1284+
'''
1285+
self._pandas_enabled = False
1286+
1287+
def _enable_pandas(self):
1288+
''' Re-enable pandas features '''
1289+
self._pandas_enabled = True
1290+
12691291
def append_columns(self, *items, **kwargs):
12701292
'''
12711293
Append variable names to action inputs parameter
@@ -2287,7 +2309,9 @@ def _numrows(self):
22872309
return self.copy(exclude='groupby')._retrieve('simple.numrows')['numrows']
22882310

22892311
def __len__(self):
2290-
return self._numrows
2312+
if self._pandas_enabled:
2313+
return self._numrows
2314+
raise AttributeError('__len__')
22912315

22922316
# NOTE: Workaround to keep the DataFrame text renderer from trying
22932317
# to fetch all the values in the table.
@@ -2401,7 +2425,7 @@ def as_matrix(self, columns=None, n=None):
24012425
% n, RuntimeWarning)
24022426
tbl = self.copy()
24032427
tbl._intersect_columns(columns, inplace=True)
2404-
return tbl._fetch(to=n).as_matrix()
2428+
return tbl._fetch(to=n).values
24052429

24062430
@getattr_safe_property
24072431
def dtypes(self):
@@ -3017,7 +3041,7 @@ def get_value(self, index, col, **kwargs):
30173041
if index < 0:
30183042
index = index + numrows
30193043
out = self._fetch(from_=index + 1, to=index + 1)
3020-
return out.get_value(out.index.values[0], col, **kwargs)
3044+
return out.at[out.index.values[0], col]
30213045

30223046
def lookup(self, row_labels, col_labels):
30233047
''' Retrieve values indicated by row_labels, col_labels positions '''
@@ -3907,8 +3931,8 @@ def _expand_items(into, key, items):
39073931
else:
39083932
summ.drop(['min', 'max'], inplace=True)
39093933

3910-
out = pd.concat(x for x in [topk_val, pct, summ, topk_freq]
3911-
if x is not None)
3934+
out = pd.concat((x for x in [topk_val, pct, summ, topk_freq]
3935+
if x is not None), **concat_sort)
39123936

39133937
else:
39143938
if stats is None:
@@ -3917,7 +3941,8 @@ def _expand_items(into, key, items):
39173941
labels = ['count', 'unique', 'top', 'freq', 'min', 'max']
39183942
else:
39193943
labels = stats
3920-
out = pd.concat(x for x in [topk_freq, topk_val] if x is not None)
3944+
out = pd.concat((x for x in [topk_freq, topk_val] if x is not None),
3945+
**concat_sort)
39213946

39223947
groups = self.get_groupby_vars()
39233948
idx = tuple([slice(None) for x in groups] + [labels])
@@ -3970,8 +3995,15 @@ def _expand_items(into, key, items):
39703995
tmpname = str(uuid.uuid4())
39713996
out.index.names = groups + [tmpname]
39723997
out.reset_index(inplace=True)
3973-
out[tmpname] = out[tmpname].astype('category', categories=categories,
3974-
ordered=True)
3998+
if pd_version >= (0, 21, 0):
3999+
from pandas.api.types import CategoricalDtype
4000+
out[tmpname] = out[tmpname].astype(CategoricalDtype(
4001+
categories=categories,
4002+
ordered=True))
4003+
else:
4004+
out[tmpname] = out[tmpname].astype('category',
4005+
categories=categories,
4006+
ordered=True)
39754007
out.sort_values(groups + [tmpname], inplace=True)
39764008
out.set_index(groups + [tmpname], inplace=True)
39774009
out.index.names = groups + [None]
@@ -8178,7 +8210,7 @@ def get(self, key, default=None):
81788210
'''
81798211
out = self._fetch(from_=key + 1, to=key + 1)
81808212
try:
8181-
return out.get_value(out.index.values[0], self._columns[0])
8213+
return out.at[out.index.values[0], self._columns[0]]
81828214
except (KeyError, IndexError):
81838215
pass
81848216
return default
@@ -9012,7 +9044,7 @@ def unique(self):
90129044
del out[tmpname]
90139045
return out.groupby(names)[var].unique()
90149046

9015-
return pd.Series(out.index, name=self.name).as_matrix()
9047+
return pd.Series(out.index, name=self.name).values
90169048

90179049
def nunique(self, dropna=True):
90189050
'''

swat/cas/transformers.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -413,10 +413,16 @@ def ctb2tabular(_sw_table, soptions='', connection=None):
413413
tables = []
414414
for lib, tbl in zip(cdf[caslib], cdf[tablename]):
415415
if connection is not None:
416-
tables.append(connection.CASTable(tbl, caslib=lib))
416+
tbl = connection.CASTable(tbl, caslib=lib)
417417
else:
418-
tables.append(CASTable(tbl, caslib=lib))
418+
tbl = CASTable(tbl, caslib=lib)
419+
tbl._disable_pandas()
420+
tables.append(tbl)
421+
# In newer versions of pandas, this causes the __len__ method to
422+
# be called, this can cause CAS results to be truncated due to
423+
# additional CAS actions being called.
419424
cdf['casTable'] = pd.Series(tables, name='casTable')
425+
cdf['casTable'].apply(lambda x: x._enable_pandas())
420426
cdf.colinfo['casTable'] = SASColumnSpec('casTable', label='Table', dtype='object')
421427

422428
if tformat == 'dataframe':

swat/dataframe.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -520,6 +520,8 @@ def insert(self, *args, **kwargs):
520520
'''
521521
result = super(SASDataFrame, self).insert(*args, **kwargs)
522522
for col in self.columns:
523+
if isinstance(col, (tuple, list)) and col:
524+
col = col[0]
523525
if col not in self.colinfo:
524526
self.colinfo[col] = SASColumnSpec(col)
525527
return result

swat/tests/cas/test_echo.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ def setUp(self):
3636
swat.reset_option()
3737
swat.options.cas.print_messages = False
3838
swat.options.interactive_mode = False
39+
swat.options.trace_actions = True
3940

4041
self.s = swat.CAS(HOST, PORT, USER, PASSWD, protocol=PROTOCOL)
4142

0 commit comments

Comments
 (0)