add negation

JohnMount · JohnMount · commit 5f7461f71ad6 · 2019-09-13T07:21:41.000-07:00
diff --git a/build/lib/data_algebra/dask_model.py b/build/lib/data_algebra/dask_model.py
@@ -137,3 +137,16 @@ def natural_join_step(self, op, *, data_map, eval_env):
                 res = res.drop(c + "_tmp_right_col", axis=1)
         res = res.reset_index(drop=True)
         return res
+
+    def order_rows_step(self, op, *, data_map, eval_env):
+        if not isinstance(op, data_algebra.data_ops.OrderRowsNode):
+            raise TypeError("op was supposed to be a data_algebra.data_ops.OrderRowsNode")
+        if len(op.order_columns) > 1:
+            raise RuntimeError("sorting doesn't support more than one order column in dask yet")
+        if len(op.reverse) > 0:
+            raise RuntimeError("sorting doesn't support reverse in dask yet")
+        res = op.sources[0].eval_pandas_implementation(data_map=data_map,
+                                                       eval_env=eval_env,
+                                                       pandas_model=self)
+        res.set_index(op.order_columns[0])  # may cause problems in later steps
+        return res
diff --git a/build/lib/data_algebra/expr_rep.py b/build/lib/data_algebra/expr_rep.py
@@ -307,17 +307,24 @@ def get_column_names(self, columns_seen):
 
 
 # map from op-name to special Python formatting code
-py_formatters = {"___": lambda expr: expr.to_python()}
+py_formatters = {
+    "neg": lambda expr: "-(" + expr.args[0].to_pandas() + ")",
+}
+
 
 pandas_eval_env = {
     'is_null': lambda x: pandas.isnull(x),
-    'is_bad': data_algebra.util.is_bad
+    'is_bad': data_algebra.util.is_bad,
 }
+
+
 pd_formatters = {
     "is_bad": lambda expr: "@is_bad(" + expr.args[0].to_pandas() + ")",
     "is_null": lambda expr: "@is_null(" + expr.args[0].to_pandas() + ")",
+    "neg": lambda expr: "-(" + expr.args[0].to_pandas() + ")",
 }
 
+
 r_formatters = {"___": lambda expr: expr.to_R()}
 
 
diff --git a/coverage.txt b/coverage.txt
@@ -2,19 +2,20 @@
 platform darwin -- Python 3.6.9, pytest-5.0.1, py-1.8.0, pluggy-0.12.0
 rootdir: /Users/johnmount/Documents/work/data_algebra
 plugins: cov-2.7.1
-collected 16 items
+collected 17 items
 
-tests/test_apply.py .                                                    [  6%]
-tests/test_cdata1.py .                                                   [ 12%]
-tests/test_dask.py ..                                                    [ 25%]
-tests/test_drop_columns.py .                                             [ 31%]
-tests/test_example_data_ops.py .                                         [ 37%]
-tests/test_natural_join.py .                                             [ 43%]
-tests/test_null_bad.py .                                                 [ 50%]
-tests/test_parse.py .                                                    [ 56%]
-tests/test_poject.py .                                                   [ 62%]
-tests/test_scoring_example.py .                                          [ 68%]
-tests/test_simple.py ....                                                [ 93%]
+tests/test_apply.py .                                                    [  5%]
+tests/test_cdata1.py .                                                   [ 11%]
+tests/test_dask.py ..                                                    [ 23%]
+tests/test_drop_columns.py .                                             [ 29%]
+tests/test_example_data_ops.py .                                         [ 35%]
+tests/test_natural_join.py .                                             [ 41%]
+tests/test_neg.py .                                                      [ 47%]
+tests/test_null_bad.py .                                                 [ 52%]
+tests/test_parse.py .                                                    [ 58%]
+tests/test_poject.py .                                                   [ 64%]
+tests/test_scoring_example.py .                                          [ 70%]
+tests/test_simple.py ....                                                [ 94%]
 tests/test_sqlite.py .                                                   [100%]
 
 ---------- coverage: platform darwin, python 3.6.9-final-0 -----------
@@ -25,20 +26,20 @@ data_algebra/SQLite.py            43      5    88%
 data_algebra/__init__.py          30      8    73%
 data_algebra/cdata.py             48      4    92%
 data_algebra/cdata_impl.py        92     15    84%
-data_algebra/dask_model.py        91     15    84%
+data_algebra/dask_model.py       101     18    82%
 data_algebra/data_ops.py         610    110    82%
 data_algebra/data_pipe.py        156     33    79%
 data_algebra/data_types.py        45     27    40%
 data_algebra/db_model.py         349     74    79%
 data_algebra/env.py               54     12    78%
-data_algebra/expr_rep.py         294     88    70%
+data_algebra/expr_rep.py         294     86    71%
 data_algebra/pandas_model.py     136     18    87%
 data_algebra/pending_eval.py      34     34     0%
 data_algebra/pipe.py              65     19    71%
 data_algebra/util.py              72      6    92%
 data_algebra/yaml.py              76     11    86%
 --------------------------------------------------
-TOTAL                           2216    483    78%
+TOTAL                           2226    484    78%
 
 
-========================== 16 passed in 4.72 seconds ===========================
+========================== 17 passed in 6.88 seconds ===========================
diff --git a/data_algebra/expr_rep.py b/data_algebra/expr_rep.py
@@ -307,17 +307,24 @@ def get_column_names(self, columns_seen):
 
 
 # map from op-name to special Python formatting code
-py_formatters = {"___": lambda expr: expr.to_python()}
+py_formatters = {
+    "neg": lambda expr: "-(" + expr.args[0].to_pandas() + ")",
+}
+
 
 pandas_eval_env = {
     'is_null': lambda x: pandas.isnull(x),
-    'is_bad': data_algebra.util.is_bad
+    'is_bad': data_algebra.util.is_bad,
 }
+
+
 pd_formatters = {
     "is_bad": lambda expr: "@is_bad(" + expr.args[0].to_pandas() + ")",
     "is_null": lambda expr: "@is_null(" + expr.args[0].to_pandas() + ")",
+    "neg": lambda expr: "-(" + expr.args[0].to_pandas() + ")",
 }
 
+
 r_formatters = {"___": lambda expr: expr.to_R()}
 
 
diff --git a/dist/data_algebra-0.1.5-py3-none-any.whl b/dist/data_algebra-0.1.5-py3-none-any.whl
diff --git a/dist/data_algebra-0.1.5.tar.gz b/dist/data_algebra-0.1.5.tar.gz
diff --git a/tests/test_neg.py b/tests/test_neg.py
@@ -0,0 +1,42 @@
+
+import pandas
+
+import data_algebra
+import data_algebra.env
+import data_algebra.util
+import data_algebra.yaml
+from data_algebra.data_ops import *
+
+
+
+def test_neg():
+    # ask YAML to write simpler structures
+    data_algebra.yaml.fix_ordered_dict_yaml_rep()
+
+    d_local = pandas.DataFrame({
+        'subjectID': [1, 1, 2, 2],
+        'surveyCategory': ["withdrawal behavior", "positive re-framing", "withdrawal behavior", "positive re-framing"],
+        'assessmentTotal': [5, 2, 3, 4],
+    })
+
+    scale = 0.237
+
+    with data_algebra.env.Env(locals()) as env:
+        ops = TableDescription('d',
+                               ['subjectID',
+                                'surveyCategory',
+                                'assessmentTotal']). \
+            extend({'v': '-assessmentTotal'})
+
+    res_local = ops.transform(d_local)
+
+    expect = pandas.DataFrame({
+        'subjectID': [1, 1, 2, 2],
+        'surveyCategory': ["withdrawal behavior", "positive re-framing", "withdrawal behavior", "positive re-framing"],
+        'assessmentTotal': [5, 2, 3, 4],
+        'v': [-5, -2, -3, -4],
+    })
+
+    assert data_algebra.util.equivalent_frames(res_local, expect, float_tol=1e-3)
+
+    data_algebra.yaml.check_op_round_trip(ops)