check more for missing symbols in control tables

JohnMount · JohnMount · commit 3a811a2f0dc2 · 2020-01-31T14:54:35.000-08:00
diff --git a/Examples/cdata/ranking_pivot_example.ipynb b/Examples/cdata/ranking_pivot_example.ipynb
@@ -52,7 +52,7 @@
     "|  6 | 2004-01-09 00:00:00 | B   | None                  | None          | None                  | None  |\n",
     "</blockquote>\n",
     "\n",
-    "That is: for each `ID` pick the first three operations ordered by date, merging operations with the same timestamp.  Then write these results into a single row for each `ID`.\n",
+    "That is: for each `ID` pick the first three operations ordered by date, merging operations with the same timestamp.  Then write these results into a single row for each `ID`.  As is common in working with `Pandas`, we will have to consider `NaN` and `None` as fairly interchangeable.\n",
     "\n",
     "## The solution\n",
     "\n",
diff --git a/Examples/cdata/ranking_pivot_example.md b/Examples/cdata/ranking_pivot_example.md
@@ -37,7 +37,7 @@ Into this format:
 |  6 | 2004-01-09 00:00:00 | B   | None                  | None          | None                  | None  |
 </blockquote>
 
-That is: for each `ID` pick the first three operations ordered by date, merging operations with the same timestamp.  Then write these results into a single row for each `ID`.
+That is: for each `ID` pick the first three operations ordered by date, merging operations with the same timestamp.  Then write these results into a single row for each `ID`.  As is common in working with `Pandas`, we will have to consider `NaN` and `None` as fairly interchangeable.
 
 ## The solution
 
@@ -87,7 +87,19 @@ d
 
 
 <div>
-
+<style scoped>
+    .dataframe tbody tr th:only-of-type {
+        vertical-align: middle;
+    }
+
+    .dataframe tbody tr th {
+        vertical-align: top;
+    }
+
+    .dataframe thead th {
+        text-align: right;
+    }
+</style>
 <table border="1" class="dataframe">
   <thead>
     <tr style="text-align: right;">
@@ -196,7 +208,19 @@ d2
 
 
 <div>
-
+<style scoped>
+    .dataframe tbody tr th:only-of-type {
+        vertical-align: middle;
+    }
+
+    .dataframe tbody tr th {
+        vertical-align: top;
+    }
+
+    .dataframe thead th {
+        text-align: right;
+    }
+</style>
 <table border="1" class="dataframe">
   <thead>
     <tr style="text-align: right;">
@@ -338,7 +362,19 @@ diagram
 
 
 <div>
-
+<style scoped>
+    .dataframe tbody tr th:only-of-type {
+        vertical-align: middle;
+    }
+
+    .dataframe tbody tr th {
+        vertical-align: top;
+    }
+
+    .dataframe thead th {
+        text-align: right;
+    }
+</style>
 <table border="1" class="dataframe">
   <thead>
     <tr style="text-align: right;">
@@ -395,7 +431,19 @@ row_record
 
 
 <div>
-
+<style scoped>
+    .dataframe tbody tr th:only-of-type {
+        vertical-align: middle;
+    }
+
+    .dataframe tbody tr th {
+        vertical-align: top;
+    }
+
+    .dataframe thead th {
+        text-align: right;
+    }
+</style>
 <table border="1" class="dataframe">
   <thead>
     <tr style="text-align: right;">
@@ -481,7 +529,19 @@ res
 
 
 <div>
-
+<style scoped>
+    .dataframe tbody tr th:only-of-type {
+        vertical-align: middle;
+    }
+
+    .dataframe tbody tr th {
+        vertical-align: top;
+    }
+
+    .dataframe thead th {
+        text-align: right;
+    }
+</style>
 <table border="1" class="dataframe">
   <thead>
     <tr style="text-align: right;">
@@ -590,7 +650,19 @@ res
 
 
 <div>
-
+<style scoped>
+    .dataframe tbody tr th:only-of-type {
+        vertical-align: middle;
+    }
+
+    .dataframe tbody tr th {
+        vertical-align: top;
+    }
+
+    .dataframe thead th {
+        text-align: right;
+    }
+</style>
 <table border="1" class="dataframe">
   <thead>
     <tr style="text-align: right;">
@@ -717,7 +789,19 @@ res_db
 
 
 <div>
-
+<style scoped>
+    .dataframe tbody tr th:only-of-type {
+        vertical-align: middle;
+    }
+
+    .dataframe tbody tr th {
+        vertical-align: top;
+    }
+
+    .dataframe thead th {
+        text-align: right;
+    }
+</style>
 <table border="1" class="dataframe">
   <thead>
     <tr style="text-align: right;">
@@ -809,7 +893,19 @@ res_db[['ID', 'DATE1', 'OP1', 'DATE2', 'OP2', 'DATE3', 'OP3']]
 
 
 <div>
-
+<style scoped>
+    .dataframe tbody tr th:only-of-type {
+        vertical-align: middle;
+    }
+
+    .dataframe tbody tr th {
+        vertical-align: top;
+    }
+
+    .dataframe thead th {
+        text-align: right;
+    }
+</style>
 <table border="1" class="dataframe">
   <thead>
     <tr style="text-align: right;">
@@ -920,7 +1016,19 @@ res
 
 
 <div>
-
+<style scoped>
+    .dataframe tbody tr th:only-of-type {
+        vertical-align: middle;
+    }
+
+    .dataframe tbody tr th {
+        vertical-align: top;
+    }
+
+    .dataframe thead th {
+        text-align: right;
+    }
+</style>
 <table border="1" class="dataframe">
   <thead>
     <tr style="text-align: right;">
@@ -1020,7 +1128,19 @@ res_db
 
 
 <div>
-
+<style scoped>
+    .dataframe tbody tr th:only-of-type {
+        vertical-align: middle;
+    }
+
+    .dataframe tbody tr th {
+        vertical-align: top;
+    }
+
+    .dataframe thead th {
+        text-align: right;
+    }
+</style>
 <table border="1" class="dataframe">
   <thead>
     <tr style="text-align: right;">
@@ -1111,4 +1231,3 @@ con.close()
 ```python
 
 ```
-
diff --git a/build/lib/data_algebra/cdata.py b/build/lib/data_algebra/cdata.py
@@ -9,11 +9,15 @@
 
 class RecordSpecification:
     def __init__(
-        self, control_table, *, record_keys=None, control_table_keys=None, strict=False
+        self, control_table, *, record_keys=None, control_table_keys=None, strict=False, pd=None
     ):
+        if pd is None:
+            pd = data_algebra.pd
         control_table = control_table.reset_index(inplace=False, drop=True)
         if control_table.shape[0] < 1:
             raise ValueError("control table should have at least 1 row")
+        if any(data_algebra.util.is_bad(control_table.columns)):
+            raise ValueError("control table column names can not be NA/NaN/inf/None")
         if len(control_table.columns) != len(set(control_table.columns)):
             raise ValueError("control table columns should be unique")
         self.control_table = control_table.reset_index(drop=True)
@@ -39,6 +43,9 @@ def __init__(
             raise ValueError(
                 "columns common to record_keys and control_table_keys: " + str(confused)
             )
+        for ck in self.control_table_keys:
+            if any(data_algebra.util.is_bad(control_table[ck], pd=pd)):
+                raise ValueError("NA/NaN/inf/None not allowed as control table keys")
         if strict:
             if not data_algebra.util.table_is_keyed_by_columns(
                 self.control_table, self.control_table_keys
diff --git a/coverage.txt b/coverage.txt
@@ -61,7 +61,7 @@ data_algebra/SQLite.py                   101      6    94%
 data_algebra/SparkSQL.py                  19      4    79%
 data_algebra/__init__.py                   5      0   100%
 data_algebra/arrow.py                    173     39    77%
-data_algebra/cdata.py                    325     91    72%
+data_algebra/cdata.py                    332     93    72%
 data_algebra/cdata_impl.py                10      1    90%
 data_algebra/connected_components.py      49      1    98%
 data_algebra/custom_functions.py          18      1    94%
@@ -80,10 +80,10 @@ data_algebra/flow_text.py                 17      0   100%
 data_algebra/near_sql.py                 140     16    89%
 data_algebra/pandas_model.py             197     22    89%
 data_algebra/test_util.py                126     18    86%
-data_algebra/util.py                      45      6    87%
+data_algebra/util.py                      45      4    91%
 data_algebra/yaml.py                      95     11    88%
 ----------------------------------------------------------
-TOTAL                                   4143    933    77%
+TOTAL                                   4150    933    78%
 
 
-============================== 91 passed in 8.60s ==============================
+============================== 91 passed in 8.56s ==============================
diff --git a/data_algebra/cdata.py b/data_algebra/cdata.py
@@ -9,11 +9,15 @@
 
 class RecordSpecification:
     def __init__(
-        self, control_table, *, record_keys=None, control_table_keys=None, strict=False
+        self, control_table, *, record_keys=None, control_table_keys=None, strict=False, pd=None
     ):
+        if pd is None:
+            pd = data_algebra.pd
         control_table = control_table.reset_index(inplace=False, drop=True)
         if control_table.shape[0] < 1:
             raise ValueError("control table should have at least 1 row")
+        if any(data_algebra.util.is_bad(control_table.columns)):
+            raise ValueError("control table column names can not be NA/NaN/inf/None")
         if len(control_table.columns) != len(set(control_table.columns)):
             raise ValueError("control table columns should be unique")
         self.control_table = control_table.reset_index(drop=True)
@@ -39,6 +43,9 @@ def __init__(
             raise ValueError(
                 "columns common to record_keys and control_table_keys: " + str(confused)
             )
+        for ck in self.control_table_keys:
+            if any(data_algebra.util.is_bad(control_table[ck], pd=pd)):
+                raise ValueError("NA/NaN/inf/None not allowed as control table keys")
         if strict:
             if not data_algebra.util.table_is_keyed_by_columns(
                 self.control_table, self.control_table_keys
diff --git a/dist/data_algebra-0.4.7-py3-none-any.whl b/dist/data_algebra-0.4.7-py3-none-any.whl
diff --git a/dist/data_algebra-0.4.7.tar.gz b/dist/data_algebra-0.4.7.tar.gz