Skip to content

Commit 5f7461f

Browse files
committed
add negation
1 parent eb7a4ee commit 5f7461f

File tree

7 files changed

+90
-20
lines changed

7 files changed

+90
-20
lines changed

build/lib/data_algebra/dask_model.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,3 +137,16 @@ def natural_join_step(self, op, *, data_map, eval_env):
137137
res = res.drop(c + "_tmp_right_col", axis=1)
138138
res = res.reset_index(drop=True)
139139
return res
140+
141+
def order_rows_step(self, op, *, data_map, eval_env):
142+
if not isinstance(op, data_algebra.data_ops.OrderRowsNode):
143+
raise TypeError("op was supposed to be a data_algebra.data_ops.OrderRowsNode")
144+
if len(op.order_columns) > 1:
145+
raise RuntimeError("sorting doesn't support more than one order column in dask yet")
146+
if len(op.reverse) > 0:
147+
raise RuntimeError("sorting doesn't support reverse in dask yet")
148+
res = op.sources[0].eval_pandas_implementation(data_map=data_map,
149+
eval_env=eval_env,
150+
pandas_model=self)
151+
res.set_index(op.order_columns[0]) # may cause problems in later steps
152+
return res

build/lib/data_algebra/expr_rep.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -307,17 +307,24 @@ def get_column_names(self, columns_seen):
307307

308308

309309
# map from op-name to special Python formatting code
310-
py_formatters = {"___": lambda expr: expr.to_python()}
310+
py_formatters = {
311+
"neg": lambda expr: "-(" + expr.args[0].to_pandas() + ")",
312+
}
313+
311314

312315
pandas_eval_env = {
313316
'is_null': lambda x: pandas.isnull(x),
314-
'is_bad': data_algebra.util.is_bad
317+
'is_bad': data_algebra.util.is_bad,
315318
}
319+
320+
316321
pd_formatters = {
317322
"is_bad": lambda expr: "@is_bad(" + expr.args[0].to_pandas() + ")",
318323
"is_null": lambda expr: "@is_null(" + expr.args[0].to_pandas() + ")",
324+
"neg": lambda expr: "-(" + expr.args[0].to_pandas() + ")",
319325
}
320326

327+
321328
r_formatters = {"___": lambda expr: expr.to_R()}
322329

323330

coverage.txt

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,20 @@
22
platform darwin -- Python 3.6.9, pytest-5.0.1, py-1.8.0, pluggy-0.12.0
33
rootdir: /Users/johnmount/Documents/work/data_algebra
44
plugins: cov-2.7.1
5-
collected 16 items
5+
collected 17 items
66

7-
tests/test_apply.py . [ 6%]
8-
tests/test_cdata1.py . [ 12%]
9-
tests/test_dask.py .. [ 25%]
10-
tests/test_drop_columns.py . [ 31%]
11-
tests/test_example_data_ops.py . [ 37%]
12-
tests/test_natural_join.py . [ 43%]
13-
tests/test_null_bad.py . [ 50%]
14-
tests/test_parse.py . [ 56%]
15-
tests/test_poject.py . [ 62%]
16-
tests/test_scoring_example.py . [ 68%]
17-
tests/test_simple.py .... [ 93%]
7+
tests/test_apply.py . [ 5%]
8+
tests/test_cdata1.py . [ 11%]
9+
tests/test_dask.py .. [ 23%]
10+
tests/test_drop_columns.py . [ 29%]
11+
tests/test_example_data_ops.py . [ 35%]
12+
tests/test_natural_join.py . [ 41%]
13+
tests/test_neg.py . [ 47%]
14+
tests/test_null_bad.py . [ 52%]
15+
tests/test_parse.py . [ 58%]
16+
tests/test_poject.py . [ 64%]
17+
tests/test_scoring_example.py . [ 70%]
18+
tests/test_simple.py .... [ 94%]
1819
tests/test_sqlite.py . [100%]
1920

2021
---------- coverage: platform darwin, python 3.6.9-final-0 -----------
@@ -25,20 +26,20 @@ data_algebra/SQLite.py 43 5 88%
2526
data_algebra/__init__.py 30 8 73%
2627
data_algebra/cdata.py 48 4 92%
2728
data_algebra/cdata_impl.py 92 15 84%
28-
data_algebra/dask_model.py 91 15 84%
29+
data_algebra/dask_model.py 101 18 82%
2930
data_algebra/data_ops.py 610 110 82%
3031
data_algebra/data_pipe.py 156 33 79%
3132
data_algebra/data_types.py 45 27 40%
3233
data_algebra/db_model.py 349 74 79%
3334
data_algebra/env.py 54 12 78%
34-
data_algebra/expr_rep.py 294 88 70%
35+
data_algebra/expr_rep.py 294 86 71%
3536
data_algebra/pandas_model.py 136 18 87%
3637
data_algebra/pending_eval.py 34 34 0%
3738
data_algebra/pipe.py 65 19 71%
3839
data_algebra/util.py 72 6 92%
3940
data_algebra/yaml.py 76 11 86%
4041
--------------------------------------------------
41-
TOTAL 2216 483 78%
42+
TOTAL 2226 484 78%
4243

4344

44-
========================== 16 passed in 4.72 seconds ===========================
45+
========================== 17 passed in 6.88 seconds ===========================

data_algebra/expr_rep.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -307,17 +307,24 @@ def get_column_names(self, columns_seen):
307307

308308

309309
# map from op-name to special Python formatting code
310-
py_formatters = {"___": lambda expr: expr.to_python()}
310+
py_formatters = {
311+
"neg": lambda expr: "-(" + expr.args[0].to_pandas() + ")",
312+
}
313+
311314

312315
pandas_eval_env = {
313316
'is_null': lambda x: pandas.isnull(x),
314-
'is_bad': data_algebra.util.is_bad
317+
'is_bad': data_algebra.util.is_bad,
315318
}
319+
320+
316321
pd_formatters = {
317322
"is_bad": lambda expr: "@is_bad(" + expr.args[0].to_pandas() + ")",
318323
"is_null": lambda expr: "@is_null(" + expr.args[0].to_pandas() + ")",
324+
"neg": lambda expr: "-(" + expr.args[0].to_pandas() + ")",
319325
}
320326

327+
321328
r_formatters = {"___": lambda expr: expr.to_R()}
322329

323330

88 Bytes
Binary file not shown.

dist/data_algebra-0.1.5.tar.gz

194 Bytes
Binary file not shown.

tests/test_neg.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
2+
import pandas
3+
4+
import data_algebra
5+
import data_algebra.env
6+
import data_algebra.util
7+
import data_algebra.yaml
8+
from data_algebra.data_ops import *
9+
10+
11+
12+
def test_neg():
13+
# ask YAML to write simpler structures
14+
data_algebra.yaml.fix_ordered_dict_yaml_rep()
15+
16+
d_local = pandas.DataFrame({
17+
'subjectID': [1, 1, 2, 2],
18+
'surveyCategory': ["withdrawal behavior", "positive re-framing", "withdrawal behavior", "positive re-framing"],
19+
'assessmentTotal': [5, 2, 3, 4],
20+
})
21+
22+
scale = 0.237
23+
24+
with data_algebra.env.Env(locals()) as env:
25+
ops = TableDescription('d',
26+
['subjectID',
27+
'surveyCategory',
28+
'assessmentTotal']). \
29+
extend({'v': '-assessmentTotal'})
30+
31+
res_local = ops.transform(d_local)
32+
33+
expect = pandas.DataFrame({
34+
'subjectID': [1, 1, 2, 2],
35+
'surveyCategory': ["withdrawal behavior", "positive re-framing", "withdrawal behavior", "positive re-framing"],
36+
'assessmentTotal': [5, 2, 3, 4],
37+
'v': [-5, -2, -3, -4],
38+
})
39+
40+
assert data_algebra.util.equivalent_frames(res_local, expect, float_tol=1e-3)
41+
42+
data_algebra.yaml.check_op_round_trip(ops)

0 commit comments

Comments
 (0)