Clean up pandas warnings; fixes for pypi compatibility

Kevin D Smith · Kevin D Smith · commit 5ae8e69c7d17 · 2018-05-31T15:47:34.000-04:00
diff --git a/setup.py b/setup.py
@@ -22,9 +22,9 @@
 from setuptools import setup, find_packages
 
 try:
-    README = open('README.rst', 'r').read()
+    README = open('README.md', 'r').read()
 except:
-    README = 'See README.rst'
+    README = 'See README.md'
 
 if glob.glob('swat/lib/*/tk*'):
     LICENSE = 'Apache v2.0 (SWAT) + SAS Additional Functionality (SAS TK)'
@@ -50,6 +50,8 @@
         'six >= 1.9.0',
         'requests',
     ],
+    platforms='any',
+    python_requires='>=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*',
     classifiers=[
         'Development Status :: 5 - Production/Stable',
         'Environment :: Console',
diff --git a/swat/cas/table.py b/swat/cas/table.py
@@ -49,6 +49,11 @@
 pd_version = tuple([int(x) for x in re.match(r'^(\d+)\.(\d+)\.(\d+)',
                                              pd.__version__).groups()])
 
+if pd_version >= (0, 23, 0):
+    concat_sort = dict(sort=False)
+else:
+    concat_sort = dict()
+
 OPERATOR_NAMES = {
     '+': 'add',
     '-': 'sub',
@@ -545,8 +550,8 @@ class CASTableRowScalarAccessor(CASTableAccessor):
     def __getitem__(self, pos):
         tbl = self._table()
         if isinstance(tbl, CASColumn):
-            return tbl.get_value(pos, 0)
-        return tbl.get_value(*pos)
+            return tbl.iat[pos, 0]
+        return tbl.iat[slice(*pos)]
 
 
 class CASTableLabelScalarAccessor(CASTableAccessor):
@@ -557,10 +562,10 @@ def __getitem__(self, pos):
         if isinstance(tbl, CASColumn):
             if pos < 0 or pos >= tbl._numrows:
                 raise KeyError(pos)
-            return tbl.get_value(pos, 0)
+            return tbl.iat[pos, 0]
         if pos[0] < 0 or pos[0] >= tbl._numrows:
             raise KeyError(pos)
-        return tbl.get_value(*pos)
+        return tbl.iat[slice(*pos)]
 
 
 def _get_table_selection(table, args):
@@ -1240,6 +1245,7 @@ class CASTable(ParamManager, ActionParamManager):
     def __init__(self, name, **table_params):
         ParamManager.__init__(self, name=name, **table_params)
         ActionParamManager.__init__(self)
+        self._pandas_enabled = True
         self._connection = None
         self._contexts = []
 
@@ -1266,6 +1272,22 @@ def __init__(self, name, **table_params):
             doc = doc.split('Returns')[0].rstrip()
             self.params.set_doc(doc)
 
+    def _disable_pandas(self):
+        '''
+        Disable selected pandas DataFrame features
+
+        Some versions of pandas cause lookups of attributes on CASTables
+        that can cause interruptions of running actions.  These
+        features can be disable temporarily to bypass the pandas
+        features where needed.
+
+        '''
+        self._pandas_enabled = False
+
+    def _enable_pandas(self):
+        '''  Re-enable pandas features '''
+        self._pandas_enabled = True
+
     def append_columns(self, *items, **kwargs):
         '''
         Append variable names to action inputs parameter
@@ -2287,7 +2309,9 @@ def _numrows(self):
         return self.copy(exclude='groupby')._retrieve('simple.numrows')['numrows']
 
     def __len__(self):
-        return self._numrows
+        if self._pandas_enabled:
+            return self._numrows
+        raise AttributeError('__len__')
 
     # NOTE: Workaround to keep the DataFrame text renderer from trying
     #       to fetch all the values in the table.
@@ -2401,7 +2425,7 @@ def as_matrix(self, columns=None, n=None):
                                % n, RuntimeWarning)
         tbl = self.copy()
         tbl._intersect_columns(columns, inplace=True)
-        return tbl._fetch(to=n).as_matrix()
+        return tbl._fetch(to=n).values
 
     @getattr_safe_property
     def dtypes(self):
@@ -3017,7 +3041,7 @@ def get_value(self, index, col, **kwargs):
         if index < 0:
             index = index + numrows
         out = self._fetch(from_=index + 1, to=index + 1)
-        return out.get_value(out.index.values[0], col, **kwargs)
+        return out.at[out.index.values[0], col]
 
     def lookup(self, row_labels, col_labels):
         ''' Retrieve values indicated by row_labels, col_labels positions '''
@@ -3907,8 +3931,8 @@ def _expand_items(into, key, items):
             else:
                 summ.drop(['min', 'max'], inplace=True)
 
-            out = pd.concat(x for x in [topk_val, pct, summ, topk_freq]
-                            if x is not None)
+            out = pd.concat((x for x in [topk_val, pct, summ, topk_freq]
+                             if x is not None), **concat_sort)
 
         else:
             if stats is None:
@@ -3917,7 +3941,8 @@ def _expand_items(into, key, items):
                 labels = ['count', 'unique', 'top', 'freq', 'min', 'max']
             else:
                 labels = stats
-            out = pd.concat(x for x in [topk_freq, topk_val] if x is not None)
+            out = pd.concat((x for x in [topk_freq, topk_val] if x is not None),
+                            **concat_sort)
 
         groups = self.get_groupby_vars()
         idx = tuple([slice(None) for x in groups] + [labels])
@@ -3970,8 +3995,15 @@ def _expand_items(into, key, items):
         tmpname = str(uuid.uuid4())
         out.index.names = groups + [tmpname]
         out.reset_index(inplace=True)
-        out[tmpname] = out[tmpname].astype('category', categories=categories,
-                                           ordered=True)
+        if pd_version >= (0, 21, 0):
+            from pandas.api.types import CategoricalDtype
+            out[tmpname] = out[tmpname].astype(CategoricalDtype(
+                                                   categories=categories,
+                                                   ordered=True))
+        else:
+            out[tmpname] = out[tmpname].astype('category',
+                                               categories=categories,
+                                               ordered=True)
         out.sort_values(groups + [tmpname], inplace=True)
         out.set_index(groups + [tmpname], inplace=True)
         out.index.names = groups + [None]
@@ -8178,7 +8210,7 @@ def get(self, key, default=None):
         '''
         out = self._fetch(from_=key + 1, to=key + 1)
         try:
-            return out.get_value(out.index.values[0], self._columns[0])
+            return out.at[out.index.values[0], self._columns[0]]
         except (KeyError, IndexError):
             pass
         return default
@@ -9012,7 +9044,7 @@ def unique(self):
             del out[tmpname]
             return out.groupby(names)[var].unique()
 
-        return pd.Series(out.index, name=self.name).as_matrix()
+        return pd.Series(out.index, name=self.name).values
 
     def nunique(self, dropna=True):
         '''
diff --git a/swat/cas/transformers.py b/swat/cas/transformers.py
@@ -413,10 +413,16 @@ def ctb2tabular(_sw_table, soptions='', connection=None):
         tables = []
         for lib, tbl in zip(cdf[caslib], cdf[tablename]):
             if connection is not None:
-                tables.append(connection.CASTable(tbl, caslib=lib))
+                tbl = connection.CASTable(tbl, caslib=lib)
             else:
-                tables.append(CASTable(tbl, caslib=lib))
+                tbl = CASTable(tbl, caslib=lib)
+            tbl._disable_pandas()
+            tables.append(tbl)
+        # In newer versions of pandas, this causes the __len__ method to
+        # be called, this can cause CAS results to be truncated due to
+        # additional CAS actions being called.
         cdf['casTable'] = pd.Series(tables, name='casTable')
+        cdf['casTable'].apply(lambda x: x._enable_pandas())
         cdf.colinfo['casTable'] = SASColumnSpec('casTable', label='Table', dtype='object')
 
     if tformat == 'dataframe':
diff --git a/swat/dataframe.py b/swat/dataframe.py
@@ -520,6 +520,8 @@ def insert(self, *args, **kwargs):
         '''
         result = super(SASDataFrame, self).insert(*args, **kwargs)
         for col in self.columns:
+            if isinstance(col, (tuple, list)) and col:
+                col = col[0]
             if col not in self.colinfo:
                 self.colinfo[col] = SASColumnSpec(col)
         return result
diff --git a/swat/tests/cas/test_echo.py b/swat/tests/cas/test_echo.py
@@ -36,6 +36,7 @@ def setUp(self):
         swat.reset_option()
         swat.options.cas.print_messages = False
         swat.options.interactive_mode = False
+        swat.options.trace_actions = True
 
         self.s = swat.CAS(HOST, PORT, USER, PASSWD, protocol=PROTOCOL)
 
diff --git a/swat/tests/cas/test_table.py b/swat/tests/cas/test_table.py
diff --git a/tox.ini b/tox.ini