Skip to content

Commit 687c145

Browse files
committed
rebuild retest
1 parent c175229 commit 687c145

File tree

8 files changed

+71
-37
lines changed

8 files changed

+71
-37
lines changed

build/lib/data_algebra/db_model.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,18 @@ def _db_is_bad_expr(dbmodel, expression):
4444
)
4545

4646

47+
def _db_if_else_expr(dbmodel, expression):
48+
if_expr = dbmodel.expr_to_sql(expression.args[0], want_inline_parens=True)
49+
x_expr = dbmodel.expr_to_sql(expression.args[1], want_inline_parens=True)
50+
y_expr = dbmodel.expr_to_sql(expression.args[2], want_inline_parens=True)
51+
return (
52+
"CASE" +
53+
" WHEN " + if_expr + " THEN " + x_expr +
54+
" WHEN NOT " + if_expr + " THEN " + y_expr +
55+
" ELSE NULL END"
56+
)
57+
58+
4759
def _db_neg_expr(dbmodel, expression):
4860
subexpr = dbmodel.expr_to_sql(expression.args[0], want_inline_parens=True)
4961
return "( -" + subexpr + " )"
@@ -53,6 +65,7 @@ def _db_neg_expr(dbmodel, expression):
5365
"is_null": _db_is_null_expr,
5466
"is_bad": _db_is_bad_expr,
5567
"neg": _db_neg_expr,
68+
"if_else": _db_if_else_expr,
5669
}
5770

5871

@@ -237,9 +250,9 @@ def extend_to_sql(self, extend_node, *, using=None, temp_id_source=None):
237250
self.expr_to_sql(oi) + window_term + " AS " + self.quote_identifier(ci)
238251
for (ci, oi) in subops.items()
239252
]
240-
origcols = {k for k in using if k not in subops.keys()}
253+
origcols = [k for k in using if k not in subops.keys()]
241254
if len(origcols) > 0:
242-
derived = [self.quote_identifier(ci) for ci in origcols] + derived
255+
derived = [self.quote_identifier(ci) for ci in set(origcols)] + derived
243256
sql_str = (
244257
"SELECT "
245258
+ ", ".join(derived)

build/lib/data_algebra/expr_rep.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
from typing import Union
22
import collections
33

4-
import pandas
5-
64
import data_algebra.util
75
import data_algebra.env
86

@@ -43,6 +41,15 @@ def __uop_expr__(self, op, *, params=None):
4341
raise TypeError("op is supposed to be a string")
4442
return Expression(op, (self,), params=params)
4543

44+
def __triop_expr__(self, op, x, y):
45+
if not isinstance(op, str):
46+
raise TypeError("op is supposed to be a string")
47+
if not isinstance(x, Term):
48+
x = Value(x)
49+
if not isinstance(y, Term):
50+
y = Value(y)
51+
return Expression(op, (self, x, y), inline=False)
52+
4653
# tree re-write
4754

4855
def replace_view(self, view):
@@ -289,6 +296,9 @@ def is_null(self):
289296
def is_bad(self):
290297
return self.__uop_expr__("is_bad")
291298

299+
def if_else(self, x, y):
300+
return self.__triop_expr__("if_else", x, y)
301+
292302

293303
class Value(Term):
294304
def __init__(self, value):
@@ -337,15 +347,12 @@ def get_column_names(self, columns_seen):
337347
}
338348

339349

340-
pandas_eval_env = {
341-
"is_null": lambda x: pandas.isnull(x),
342-
"is_bad": data_algebra.util.is_bad,
343-
}
344-
345-
346350
pd_formatters = {
347351
"is_bad": lambda expr: "@is_bad(" + expr.args[0].to_pandas() + ")",
348352
"is_null": lambda expr: "@is_null(" + expr.args[0].to_pandas() + ")",
353+
"if_else": lambda expr: "@if_else(" + expr.args[0].to_pandas() +\
354+
", " + expr.args[1].to_pandas() +\
355+
", " + expr.args[2].to_pandas()+ ")",
349356
"neg": lambda expr: "-" + expr.args[0].to_pandas(want_inline_parens=True),
350357
}
351358

build/lib/data_algebra/pandas_model.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,21 @@
1+
2+
import numpy
13
import pandas
24

35
import data_algebra
6+
import data_algebra.util
47
import data_algebra.data_model
58
import data_algebra.expr_rep
69
import data_algebra.data_ops
710

811

12+
pandas_eval_env = {
13+
"is_null": lambda x: pandas.isnull(x),
14+
"is_bad": data_algebra.util.is_bad,
15+
"if_else": lambda c, x, y: numpy.where(c, x, y)
16+
}
17+
18+
919
class PandasModel(data_algebra.data_model.DataModel):
1020
def __init__(self):
1121
data_algebra.data_model.DataModel.__init__(self)
@@ -52,7 +62,7 @@ def extend_step(self, op, *, data_map, eval_env):
5262
op_src = opk.to_pandas()
5363
res[k] = res.eval(
5464
op_src,
55-
local_dict=data_algebra.expr_rep.pandas_eval_env,
65+
local_dict=pandas_eval_env,
5666
global_dict=eval_env,
5767
)
5868
else:

build/lib/data_algebra/util.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,11 @@ def equivalent_frames(
107107
if not all([ca_null[i] == cb_null[i] for i in range(a.shape[0])]):
108108
return False
109109
if can_convert_v_to_numeric(ca):
110+
ca = numpy.asarray(ca, dtype=float)
111+
cb = numpy.asarray(cb, dtype=float)
110112
dif = ca - cb
111-
if dif.abs().max() > float_tol:
113+
dif = numpy.asarray([abs(d) for d in dif if not pandas.isnull(d)])
114+
if dif.max() > float_tol:
112115
return False
113116
else:
114117
if not all([ca[i] == cb[i] for i in range(a.shape[0])]):

coverage.txt

Lines changed: 24 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2,27 +2,28 @@
22
platform darwin -- Python 3.6.9, pytest-5.0.1, py-1.8.0, pluggy-0.12.0
33
rootdir: /Users/johnmount/Documents/work/data_algebra
44
plugins: cov-2.7.1
5-
collected 29 items
5+
collected 30 items
66

77
tests/test_R_yaml.py . [ 3%]
88
tests/test_apply.py . [ 6%]
99
tests/test_cdata1.py . [ 10%]
10-
tests/test_cdata_example.py .... [ 24%]
11-
tests/test_cols_used.py . [ 27%]
12-
tests/test_dask.py .. [ 34%]
13-
tests/test_datatable.py . [ 37%]
14-
tests/test_drop_columns.py . [ 41%]
15-
tests/test_example_data_ops.py . [ 44%]
16-
tests/test_exp.py . [ 48%]
17-
tests/test_export_neg.py . [ 51%]
18-
tests/test_free_expr.py . [ 55%]
19-
tests/test_natural_join.py . [ 58%]
20-
tests/test_neg.py . [ 62%]
21-
tests/test_null_bad.py . [ 65%]
22-
tests/test_parse.py . [ 68%]
23-
tests/test_poject.py . [ 72%]
24-
tests/test_scatter_example.py . [ 75%]
25-
tests/test_scoring_example.py . [ 79%]
10+
tests/test_cdata_example.py .... [ 23%]
11+
tests/test_cols_used.py . [ 26%]
12+
tests/test_dask.py .. [ 33%]
13+
tests/test_datatable.py . [ 36%]
14+
tests/test_drop_columns.py . [ 40%]
15+
tests/test_example_data_ops.py . [ 43%]
16+
tests/test_exp.py . [ 46%]
17+
tests/test_export_neg.py . [ 50%]
18+
tests/test_free_expr.py . [ 53%]
19+
tests/test_if_else.py . [ 56%]
20+
tests/test_natural_join.py . [ 60%]
21+
tests/test_neg.py . [ 63%]
22+
tests/test_null_bad.py . [ 66%]
23+
tests/test_parse.py . [ 70%]
24+
tests/test_poject.py . [ 73%]
25+
tests/test_scatter_example.py . [ 76%]
26+
tests/test_scoring_example.py . [ 80%]
2627
tests/test_simple.py ..... [ 96%]
2728
tests/test_sqlite.py . [100%]
2829

@@ -41,18 +42,18 @@ data_algebra/data_ops.py 800 166 79%
4142
data_algebra/data_pipe.py 183 41 78%
4243
data_algebra/data_types.py 39 19 51%
4344
data_algebra/datatable_model.py 131 81 38%
44-
data_algebra/db_model.py 357 83 77%
45+
data_algebra/db_model.py 362 83 77%
4546
data_algebra/diagram.py 52 52 0%
4647
data_algebra/env.py 46 7 85%
4748
data_algebra/expr.py 20 4 80%
48-
data_algebra/expr_rep.py 306 81 74%
49-
data_algebra/pandas_model.py 137 25 82%
49+
data_algebra/expr_rep.py 314 84 73%
50+
data_algebra/pandas_model.py 140 25 82%
5051
data_algebra/pending_eval.py 34 34 0%
5152
data_algebra/pipe.py 65 19 71%
52-
data_algebra/util.py 81 7 91%
53+
data_algebra/util.py 84 7 92%
5354
data_algebra/yaml.py 120 15 88%
5455
-----------------------------------------------------
55-
TOTAL 2909 792 73%
56+
TOTAL 2928 795 73%
5657

5758

58-
========================== 29 passed in 6.82 seconds ===========================
59+
========================== 30 passed in 7.22 seconds ===========================

data_algebra/db_model.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -250,9 +250,9 @@ def extend_to_sql(self, extend_node, *, using=None, temp_id_source=None):
250250
self.expr_to_sql(oi) + window_term + " AS " + self.quote_identifier(ci)
251251
for (ci, oi) in subops.items()
252252
]
253-
origcols = {k for k in using if k not in subops.keys()}
253+
origcols = [k for k in using if k not in subops.keys()]
254254
if len(origcols) > 0:
255-
derived = [self.quote_identifier(ci) for ci in origcols] + derived
255+
derived = [self.quote_identifier(ci) for ci in set(origcols)] + derived
256256
sql_str = (
257257
"SELECT "
258258
+ ", ".join(derived)
216 Bytes
Binary file not shown.

dist/data_algebra-0.2.1.tar.gz

201 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)