NA entries and more tests

JohnMount · JohnMount · commit ed308bb3ce83 · 2019-09-19T09:09:03.000-07:00
diff --git a/build/lib/data_algebra/cdata_impl.py b/build/lib/data_algebra/cdata_impl.py
@@ -132,7 +132,7 @@ def transform(
 
     def compose(self, other):
         """
-        Compose transforms
+        Experimental method to compose transforms
         (self.compose(other)).transform(data) == self.transform(other.transform(data))
 
         :param other: another data_algebra.cdata_impl.RecordMap
@@ -183,7 +183,7 @@ def __rrshift__(self, other):  # override other >> self
         if other is None:
             return self
         if isinstance(other, RecordMap):
-            # data >> other >> self
+            # (data >> other) >> self == data >> (other >> self)
             return self.compose(other)
         return self.transform(other)
 
diff --git a/build/lib/data_algebra/util.py b/build/lib/data_algebra/util.py
@@ -34,6 +34,16 @@ def is_bad(x):
 
 # for testing
 
+def pandas_to_example_str(obj):
+    if not isinstance(obj, pandas.DataFrame):
+        raise TypeError("Expect obj to be pandas.DataFrame")
+    pstr = 'pandas.DataFrame({'
+    for k in obj.columns:
+        cells = ['None' if pandas.isnull(v) else v.__repr__() for v in obj[k]]
+        pstr = pstr + "\n    " + k.__repr__() + ": [" + ', '.join(cells) + "],"
+    pstr = pstr + "\n    })"
+    return pstr
+
 
 def equivalent_frames(
     a,
diff --git a/coverage.txt b/coverage.txt
@@ -1,56 +1,56 @@
 ============================= test session starts ==============================
-platform linux -- Python 3.6.9, pytest-5.0.1, py-1.8.0, pluggy-0.12.0
-rootdir: /home/john/Documents/work/data_algebra
+platform darwin -- Python 3.6.9, pytest-5.0.1, py-1.8.0, pluggy-0.12.0
+rootdir: /Users/johnmount/Documents/work/data_algebra
 plugins: cov-2.7.1
-collected 24 items
+collected 27 items
 
-tests/test_R_yaml.py .                                                   [  4%]
-tests/test_apply.py .                                                    [  8%]
-tests/test_cdata1.py .                                                   [ 12%]
-tests/test_cdata_example.py ..                                           [ 20%]
-tests/test_dask.py ..                                                    [ 29%]
-tests/test_datatable.py .                                                [ 33%]
-tests/test_drop_columns.py .                                             [ 37%]
-tests/test_example_data_ops.py .                                         [ 41%]
-tests/test_exp.py .                                                      [ 45%]
-tests/test_export_neg.py .                                               [ 50%]
-tests/test_free_expr.py .                                                [ 54%]
-tests/test_natural_join.py .                                             [ 58%]
+tests/test_R_yaml.py .                                                   [  3%]
+tests/test_apply.py .                                                    [  7%]
+tests/test_cdata1.py .                                                   [ 11%]
+tests/test_cdata_example.py ....                                         [ 25%]
+tests/test_dask.py ..                                                    [ 33%]
+tests/test_datatable.py .                                                [ 37%]
+tests/test_drop_columns.py .                                             [ 40%]
+tests/test_example_data_ops.py .                                         [ 44%]
+tests/test_exp.py .                                                      [ 48%]
+tests/test_export_neg.py .                                               [ 51%]
+tests/test_free_expr.py .                                                [ 55%]
+tests/test_natural_join.py .                                             [ 59%]
 tests/test_neg.py .                                                      [ 62%]
 tests/test_null_bad.py .                                                 [ 66%]
 tests/test_parse.py .                                                    [ 70%]
-tests/test_poject.py .                                                   [ 75%]
-tests/test_scoring_example.py .                                          [ 79%]
-tests/test_simple.py ....                                                [ 95%]
+tests/test_poject.py .                                                   [ 74%]
+tests/test_scoring_example.py .                                          [ 77%]
+tests/test_simple.py .....                                               [ 96%]
 tests/test_sqlite.py .                                                   [100%]
 
------------ coverage: platform linux, python 3.6.9-final-0 -----------
+---------- coverage: platform darwin, python 3.6.9-final-0 -----------
 Name                              Stmts   Miss  Cover
 -----------------------------------------------------
 data_algebra/PostgreSQL.py           21      4    81%
 data_algebra/SQLite.py               43      5    88%
 data_algebra/SparkSQL.py             21     21     0%
-data_algebra/__init__.py             36      6    83%
-data_algebra/cdata.py               101     20    80%
-data_algebra/cdata_impl.py          151     60    60%
-data_algebra/dask_model.py          119    102    14%
+data_algebra/__init__.py             36     10    72%
+data_algebra/cdata.py               101     19    81%
+data_algebra/cdata_impl.py          151     59    61%
+data_algebra/dask_model.py          119     23    81%
 data_algebra/data_model.py           41     15    63%
-data_algebra/data_ops.py            764    212    72%
+data_algebra/data_ops.py            764    176    77%
 data_algebra/data_pipe.py           170     38    78%
-data_algebra/data_types.py           39     22    44%
-data_algebra/datatable_model.py     131    107    18%
+data_algebra/data_types.py           39     19    51%
+data_algebra/datatable_model.py     131     81    38%
 data_algebra/db_model.py            353     82    77%
 data_algebra/diagram.py              52     52     0%
 data_algebra/env.py                  46      7    85%
 data_algebra/expr.py                 21      4    81%
-data_algebra/expr_rep.py            306     82    73%
+data_algebra/expr_rep.py            306     81    74%
 data_algebra/pandas_model.py        135     22    84%
 data_algebra/pending_eval.py         34     34     0%
 data_algebra/pipe.py                 65     19    71%
-data_algebra/util.py                 72      6    92%
+data_algebra/util.py                 81      7    91%
 data_algebra/yaml.py                113     13    88%
 -----------------------------------------------------
-TOTAL                              2834    933    67%
+TOTAL                              2843    791    72%
 
 
-========================== 24 passed in 2.63 seconds ===========================
+========================== 27 passed in 6.69 seconds ===========================
diff --git a/data_algebra/util.py b/data_algebra/util.py
@@ -34,6 +34,16 @@ def is_bad(x):
 
 # for testing
 
+def pandas_to_example_str(obj):
+    if not isinstance(obj, pandas.DataFrame):
+        raise TypeError("Expect obj to be pandas.DataFrame")
+    pstr = 'pandas.DataFrame({'
+    for k in obj.columns:
+        cells = ['None' if pandas.isnull(v) else v.__repr__() for v in obj[k]]
+        pstr = pstr + "\n    " + k.__repr__() + ": [" + ', '.join(cells) + "],"
+    pstr = pstr + "\n    })"
+    return pstr
+
 
 def equivalent_frames(
     a,
diff --git a/dist/data_algebra-0.1.8-py3-none-any.whl b/dist/data_algebra-0.1.8-py3-none-any.whl
diff --git a/dist/data_algebra-0.1.8.tar.gz b/dist/data_algebra-0.1.8.tar.gz
diff --git a/tests/test_cdata_example.py b/tests/test_cdata_example.py
@@ -1,6 +1,7 @@
 
 import io
 import re
+import numpy
 
 import pandas
 
@@ -72,6 +73,7 @@ def test_keras_example():
     })
     assert data_algebra.util.equivalent_frames(res, expect)
 
+
 def test_cdata_block():
     data = pandas.DataFrame({
         'record_id': [1, 1, 1, 2, 2, 2],
@@ -115,3 +117,52 @@ def test_cdata_block():
     back = inv.transform(res)
 
     assert data_algebra.util.equivalent_frames(data, back)
+
+
+def test_cdata_missing():
+    data = pandas.DataFrame({
+        'record_id': [1, 1, 1, 2, 2, 2],
+        'row': ['row1', 'row2', 'row3', 'row1', 'row2', 'row3'],
+        'col1': [1, 4, 7, 11, 14, 17],
+        'col2': [2, 5, 8, 12, 15, 18],
+        'col3': [3, 6, 9, 13, 16, 19],
+    })
+
+    record_keys = ['record_id']
+
+    incoming_shape = pandas.DataFrame({
+        'row': ['row1', 'row2', 'row3'],
+        'col1': ['v11', 'v21', 'v31'],
+        'col2': [None, 'v22', 'v32'],
+        'col3': ['v13', 'v23', 'v33'],
+    })
+
+    outgoing_shape = pandas.DataFrame({
+        'column_label': ['rec_col1', 'rec_col2', 'rec_col3'],
+        'c_row1': ['v11', numpy.nan, 'v13'],
+        'c_row2': ['v21', 'v22', 'v23'],
+        'c_row3': ['v31', 'v32', 'v33'],
+    })
+
+    record_map = data_algebra.cdata_impl.RecordMap(
+        blocks_in=data_algebra.cdata.RecordSpecification(
+            control_table=incoming_shape,
+            record_keys=record_keys
+        ),
+        blocks_out=data_algebra.cdata.RecordSpecification(
+            control_table=outgoing_shape,
+            record_keys=record_keys
+        ),
+    )
+
+    res = record_map.transform(data)
+
+    expect = pandas.DataFrame({
+        'record_id': [1, 1, 1, 2, 2, 2],
+        'column_label': ['rec_col1', 'rec_col2', 'rec_col3', 'rec_col1', 'rec_col2', 'rec_col3'],
+        'c_row1': [1.0, None, 3.0, 11.0, None, 13.0],
+        'c_row2': [4, 5, 6, 14, 15, 16],
+        'c_row3': [7, 8, 9, 17, 18, 19],
+        })
+
+    assert data_algebra.util.equivalent_frames(res, expect)
diff --git a/tests/test_simple.py b/tests/test_simple.py
@@ -71,3 +71,15 @@ def test_simple():
     res = ops.eval_pandas(data_map={"d": d_local}, eval_env=locals())
     expect = pandas.DataFrame({"x": [1, 2], "y": [3, 4], "z": [1.25, 2.25]})
     assert data_algebra.util.equivalent_frames(res, expect)
+
+def test_pandas_to_example():
+    d = pandas.DataFrame({
+        'record_id': [1, 1, 1, 2, 2, 2],
+        'column_label': ['rec_col1', 'rec_col2', 'rec_col3', 'rec_col1', 'rec_col2', 'rec_col3'],
+        'c_row1': [1.0, None, 3.0, 11.0, None, 13.0],
+        'c_row2': [4, 5, 6, 14, 15, 16],
+        'c_row3': [7, 8, 9, 17, 18, 19],
+    })
+    d_str = data_algebra.util.pandas_to_example_str(d)
+    d_back = eval(d_str)
+    assert data_algebra.util.equivalent_frames(d, d_back)