Skip to content

Commit 4e77c04

Browse files
committed
work on zero-length operators on project
1 parent bf4d068 commit 4e77c04

File tree

6 files changed

+89
-49
lines changed

6 files changed

+89
-49
lines changed

build/lib/data_algebra/pandas_model.py

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -128,28 +128,31 @@ def project_step(self, op, *, data_map, eval_env):
128128
# build an agg list: https://www.shanelynn.ie/summarising-aggregation-and-grouping-data-in-python-pandas/
129129
# https://stackoverflow.com/questions/44635626/rename-result-columns-from-pandas-aggregation-futurewarning-using-a-dict-with
130130
for (k, opk) in op.ops.items():
131-
if len(opk.args) != 1:
131+
if len(opk.args) > 1:
132132
raise ValueError(
133133
"non-trivial aggregation expression: " + str(k) + ": " + str(opk)
134134
)
135-
if not isinstance(opk.args[0], data_algebra.expr_rep.ColumnReference):
136-
raise ValueError(
137-
"windows expression argument must be a column: "
138-
+ str(k)
139-
+ ": "
140-
+ str(opk)
141-
)
135+
if len(opk.args) > 0:
136+
if not isinstance(opk.args[0], data_algebra.expr_rep.ColumnReference):
137+
raise ValueError(
138+
"windows expression argument must be a column: "
139+
+ str(k)
140+
+ ": "
141+
+ str(opk)
142+
)
142143
res = op.sources[0].eval_implementation(
143144
data_map=data_map, eval_env=eval_env, data_model=self
144145
)
146+
res['_data_table_temp_col'] = 1
145147
if len(op.group_by) > 0:
146148
res = res.groupby(op.group_by)
147-
remove_temp_col = False
148149
if len(op.ops) > 0:
149-
cols = {k: res[str(opk.args[0])].agg(opk.op) for (k, opk) in op.ops.items()}
150+
cols = {k: (res[str(opk.args[0])].agg(opk.op) if
151+
len(opk.args)>0 else
152+
res['_data_table_temp_col'].agg(opk.op))
153+
for (k, opk) in op.ops.items()}
150154
else:
151-
cols = {'_data_table_temp_col': res[op.sources[0].column_names[0]].agg('sum') }
152-
remove_temp_col = True
155+
cols = {'_data_table_temp_col': res['_data_table_temp_col'].agg('sum') }
153156

154157
# agg can return scalars, which then can't be made into a pandas.DataFrame
155158
def promote_scalar(v):
@@ -164,7 +167,7 @@ def promote_scalar(v):
164167
res = self.columns_to_frame(cols).reset_index(
165168
drop=len(op.group_by) < 1
166169
) # grouping variables in the index
167-
if remove_temp_col:
170+
if '_data_table_temp_col' in res.columns:
168171
res = res.drop('_data_table_temp_col', 1)
169172
return res
170173

coverage.txt

Lines changed: 24 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2,33 +2,34 @@
22
platform darwin -- Python 3.6.9, pytest-5.0.1, py-1.8.0, pluggy-0.12.0
33
rootdir: /Users/johnmount/Documents/work/data_algebra
44
plugins: cov-2.7.1
5-
collected 36 items
5+
collected 37 items
66

77
tests/test_R_yaml.py . [ 2%]
88
tests/test_apply.py . [ 5%]
99
tests/test_cdata1.py . [ 8%]
10-
tests/test_cdata_example.py .... [ 19%]
11-
tests/test_cols_used.py . [ 22%]
10+
tests/test_cdata_example.py .... [ 18%]
11+
tests/test_cols_used.py . [ 21%]
1212
tests/test_dask.py .. [ 27%]
13-
tests/test_datatable.py . [ 30%]
14-
tests/test_drop_columns.py . [ 33%]
15-
tests/test_example_data_ops.py . [ 36%]
16-
tests/test_exp.py . [ 38%]
17-
tests/test_export_neg.py . [ 41%]
18-
tests/test_free_expr.py . [ 44%]
19-
tests/test_if_else.py . [ 47%]
20-
tests/test_math.py . [ 50%]
21-
tests/test_natural_join.py . [ 52%]
22-
tests/test_neg.py . [ 55%]
23-
tests/test_null_bad.py . [ 58%]
24-
tests/test_parse.py . [ 61%]
25-
tests/test_poject.py .. [ 66%]
26-
tests/test_project.py . [ 69%]
27-
tests/test_scatter_example.py . [ 72%]
28-
tests/test_scoring_example.py . [ 75%]
29-
tests/test_select_stacking.py . [ 77%]
30-
tests/test_simple.py ..... [ 91%]
31-
tests/test_sqlite.py . [ 94%]
13+
tests/test_datatable.py . [ 29%]
14+
tests/test_drop_columns.py . [ 32%]
15+
tests/test_example_data_ops.py . [ 35%]
16+
tests/test_exp.py . [ 37%]
17+
tests/test_export_neg.py . [ 40%]
18+
tests/test_free_expr.py . [ 43%]
19+
tests/test_if_else.py . [ 45%]
20+
tests/test_math.py . [ 48%]
21+
tests/test_natural_join.py . [ 51%]
22+
tests/test_neg.py . [ 54%]
23+
tests/test_null_bad.py . [ 56%]
24+
tests/test_parse.py . [ 59%]
25+
tests/test_poject.py .. [ 64%]
26+
tests/test_project.py . [ 67%]
27+
tests/test_scatter_example.py . [ 70%]
28+
tests/test_scoring_example.py . [ 72%]
29+
tests/test_select_stacking.py . [ 75%]
30+
tests/test_simple.py ..... [ 89%]
31+
tests/test_sqlite.py . [ 91%]
32+
tests/test_strat_example.py . [ 94%]
3233
tests/test_window2.py . [ 97%]
3334
tests/test_window_fns.py . [100%]
3435

@@ -61,4 +62,4 @@ data_algebra/yaml.py 119 15 87%
6162
TOTAL 3396 1021 70%
6263

6364

64-
========================== 36 passed in 7.65 seconds ===========================
65+
========================== 37 passed in 7.58 seconds ===========================

data_algebra/pandas_model.py

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -128,28 +128,31 @@ def project_step(self, op, *, data_map, eval_env):
128128
# build an agg list: https://www.shanelynn.ie/summarising-aggregation-and-grouping-data-in-python-pandas/
129129
# https://stackoverflow.com/questions/44635626/rename-result-columns-from-pandas-aggregation-futurewarning-using-a-dict-with
130130
for (k, opk) in op.ops.items():
131-
if len(opk.args) != 1:
131+
if len(opk.args) > 1:
132132
raise ValueError(
133133
"non-trivial aggregation expression: " + str(k) + ": " + str(opk)
134134
)
135-
if not isinstance(opk.args[0], data_algebra.expr_rep.ColumnReference):
136-
raise ValueError(
137-
"windows expression argument must be a column: "
138-
+ str(k)
139-
+ ": "
140-
+ str(opk)
141-
)
135+
if len(opk.args) > 0:
136+
if not isinstance(opk.args[0], data_algebra.expr_rep.ColumnReference):
137+
raise ValueError(
138+
"windows expression argument must be a column: "
139+
+ str(k)
140+
+ ": "
141+
+ str(opk)
142+
)
142143
res = op.sources[0].eval_implementation(
143144
data_map=data_map, eval_env=eval_env, data_model=self
144145
)
146+
res['_data_table_temp_col'] = 1
145147
if len(op.group_by) > 0:
146148
res = res.groupby(op.group_by)
147-
remove_temp_col = False
148149
if len(op.ops) > 0:
149-
cols = {k: res[str(opk.args[0])].agg(opk.op) for (k, opk) in op.ops.items()}
150+
cols = {k: (res[str(opk.args[0])].agg(opk.op) if
151+
len(opk.args)>0 else
152+
res['_data_table_temp_col'].agg(opk.op))
153+
for (k, opk) in op.ops.items()}
150154
else:
151-
cols = {'_data_table_temp_col': res[op.sources[0].column_names[0]].agg('sum') }
152-
remove_temp_col = True
155+
cols = {'_data_table_temp_col': res['_data_table_temp_col'].agg('sum') }
153156

154157
# agg can return scalars, which then can't be made into a pandas.DataFrame
155158
def promote_scalar(v):
@@ -164,7 +167,7 @@ def promote_scalar(v):
164167
res = self.columns_to_frame(cols).reset_index(
165168
drop=len(op.group_by) < 1
166169
) # grouping variables in the index
167-
if remove_temp_col:
170+
if '_data_table_temp_col' in res.columns:
168171
res = res.drop('_data_table_temp_col', 1)
169172
return res
170173

18 Bytes
Binary file not shown.

dist/data_algebra-0.2.5.tar.gz

18 Bytes
Binary file not shown.

tests/test_strat_example.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
2+
# https://github.com/WinVector/pyvtreat/blob/master/Examples/StratifiedCrossPlan/StratifiedCrossPlan.ipynb
3+
4+
import pandas
5+
import data_algebra.util
6+
from data_algebra.data_ops import *
7+
8+
9+
def test_strat_example():
10+
prepared_stratified = pandas.DataFrame({
11+
'y': [1, 0, 0, 1, 0, 0],
12+
'g': [0, 0, 0, 1, 1, 1],
13+
'x': [1, 2, 3, 4, 5, 6]
14+
})
15+
16+
ops = describe_table(prepared_stratified). \
17+
project({
18+
'sum': 'y.sum()',
19+
'mean': 'y.mean()',
20+
'size': '_size()',
21+
},
22+
group_by=['g'])
23+
24+
res = ops.transform(prepared_stratified)
25+
26+
expect = pandas.DataFrame({
27+
'g': [0, 1],
28+
'sum': [1, 1],
29+
'mean': [0.3333333333333333, 0.3333333333333333],
30+
'size': [3, 3],
31+
})
32+
33+
assert data_algebra.util.equivalent_frames(res, expect)

0 commit comments

Comments
 (0)