Skip to content

Commit aa30836

Browse files
committed
work on custom functions (todo: sqlite equiv path)
1 parent faa09f8 commit aa30836

File tree

12 files changed

+110
-67
lines changed

12 files changed

+110
-67
lines changed

build/lib/data_algebra/custom_functions.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -109,11 +109,6 @@ def make_custom_function_map(data_model):
109109
pandas_formatter=lambda expr: ("@min(" + expr.args[0].to_pandas() + ")"),
110110
implementation=lambda x: [numpy.min(x)] * len(x),
111111
),
112-
CustomFunction(
113-
name="fn", # special case, user defined function
114-
pandas_formatter=lambda expr: "@fn(" + expr.args[0].to_pandas() + ")",
115-
implementation=None,
116-
),
117112
]
118113
mp = {cf.name: cf for cf in custom_functions}
119114
return mp

build/lib/data_algebra/data_ops.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,24 +36,37 @@
3636

3737
# wrap a single argument function as a user callable function in pipeline
3838
# used for custom aggregators
39-
def user_fn(fn, var, display_form=None):
39+
def user_fn(fn, fvars=None, *, display_form=None):
4040
if isinstance(fn, str):
4141
if display_form is None:
4242
display_form = fn
4343
fn = eval(fn)
4444
if not callable(fn):
4545
raise TypeError("expected fn to be callable")
46+
if fvars is None:
47+
fvars = []
48+
if isinstance(fvars, str):
49+
fn_args = [data_algebra.expr_rep.ColumnReference(view=None, column_name=fvars)]
50+
else:
51+
for v in fvars:
52+
if not isinstance(v, str):
53+
raise TypeError("Expect all vars names to be strings")
54+
fn_args = [data_algebra.expr_rep.ColumnReference(view=None, column_name=v) for v in fvars]
55+
qvars = [v.__repr__() for v in fvars]
4656
if display_form is None:
47-
display_form = fn.__name__
48-
if not isinstance(var, str):
49-
raise TypeError("expected var to be str")
57+
display_form = ('user_fn('
58+
+ fn.__name__
59+
+ ', ['
60+
+ ', '.join(qvars)
61+
+ '])')
5062
return data_algebra.expr_rep.FnTerm(
5163
fn,
52-
fn_arg=data_algebra.expr_rep.ColumnReference(view=None, column_name=var),
64+
fn_args=fn_args,
5365
display_form=display_form,
5466
)
5567

5668

69+
5770
class ViewRepresentation(OperatorPlatform, ABC):
5871
"""Structure to represent the columns of a query or a table.
5972
Abstract base class."""

build/lib/data_algebra/expr_rep.py

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -690,17 +690,11 @@ def to_python(self, *, want_inline_parens=False):
690690
return self.value.__repr__()
691691

692692

693-
class Name(str):
693+
class UnQuotedStr(str):
694694
def __init__(self, v):
695695
self.v = v
696696
str.__init__(v)
697697

698-
def is_equal(self, other):
699-
# can't use == as that builds a larger expression
700-
if not isinstance(other, Name):
701-
return False
702-
return self.v == other.v
703-
704698
def str(self):
705699
return self.v
706700

@@ -709,22 +703,29 @@ def __repr__(self):
709703

710704

711705
class FnTerm(Term):
712-
def __init__(self, value, fn_arg=None, display_form=None, op=None):
706+
# represent a function of columns
707+
def __init__(self, value, fn_args=None, name=None, display_form=None, op=None):
713708
if not callable(value):
714709
raise TypeError("value type must be callable")
715710
self.value = value
711+
if name is None:
712+
name = value.__name__
713+
self.name = name
716714
if display_form is None:
717715
display_form = value.__name__
718716
if op is None:
719717
op = value.__name__
720718
self.display_form = display_form
721719
self.op = op
722-
if fn_arg is None:
720+
if fn_args is None:
723721
self.args = []
724722
else:
725-
if not isinstance(fn_arg, ColumnReference):
726-
raise TypeError("Expected fn_arg to be None or a ColumnReference")
727-
self.args = [fn_arg]
723+
if isinstance(fn_args, ColumnReference):
724+
fn_args = [fn_args]
725+
for v in fn_args:
726+
if not isinstance(v, ColumnReference):
727+
raise TypeError("Expected fn_args to be None or all ColumnReference")
728+
self.args = fn_args
728729
Term.__init__(self)
729730

730731
def is_equal(self, other):
@@ -749,7 +750,7 @@ def replace_view(self, view):
749750
return self
750751

751752
def to_python(self, *, want_inline_parens=False):
752-
return Name(self.display_form)
753+
return UnQuotedStr(self.display_form)
753754

754755

755756
class ListTerm(Term):
@@ -1068,7 +1069,7 @@ def parse_assignments_in_context(ops, view, *, parse_env=None):
10681069
if not isinstance(v, Term):
10691070
if callable(v):
10701071
# k = f(k) implicit form
1071-
v = FnTerm(v, fn_arg=ColumnReference(view=view, column_name=k))
1072+
v = FnTerm(v, fn_args=[ColumnReference(view=view, column_name=k)])
10721073
else:
10731074
v = _parse_by_eval(
10741075
source_str=v, data_def=mp, outter_environemnt=parse_env

build/lib/data_algebra/pandas_base.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -112,10 +112,22 @@ def extend_step(self, op, *, data_map, eval_env, narrow):
112112
standin_name = "_data_algebra_temp_g" # name of an arbitrary input variable
113113
if not window_situation:
114114
for (k, opk) in op.ops.items():
115-
op_src = opk.to_pandas()
116-
res[k] = res.eval(
117-
op_src, local_dict=self.pandas_eval_env, global_dict=eval_env
118-
)
115+
if isinstance(opk, data_algebra.expr_rep.FnTerm):
116+
# res[k] = opk.value(*[res[nm.column_name] for nm in opk.args])
117+
pe = self.pandas_eval_env.copy()
118+
pe[opk.name] = opk.value
119+
op_src = ('@' + opk.name
120+
+ '('
121+
+ ', '.join([nm.column_name for nm in opk.args])
122+
+ ')')
123+
res[k] = res.eval(
124+
op_src, local_dict=pe, global_dict=eval_env
125+
)
126+
else:
127+
op_src = opk.to_pandas()
128+
res[k] = res.eval(
129+
op_src, local_dict=self.pandas_eval_env, global_dict=eval_env
130+
)
119131
else:
120132
# build up a sub-frame to work on
121133
col_list = [c for c in set(op.partition_by)]

coverage.txt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ data_algebra/cdata_impl.py 10 1 90%
7272
data_algebra/connected_components.py 49 1 98%
7373
data_algebra/custom_functions.py 17 2 88%
7474
data_algebra/data_model.py 40 18 55%
75-
data_algebra/data_ops.py 1221 247 80%
75+
data_algebra/data_ops.py 1228 248 80%
7676
data_algebra/data_ops_types.py 74 33 55%
7777
data_algebra/data_ops_utils.py 34 6 82%
7878
data_algebra/data_pipe.py 189 50 74%
@@ -81,17 +81,17 @@ data_algebra/diagram.py 56 44 21%
8181
data_algebra/env.py 31 3 90%
8282
data_algebra/eval_model.py 18 2 89%
8383
data_algebra/expr.py 20 4 80%
84-
data_algebra/expr_rep.py 695 219 68%
84+
data_algebra/expr_rep.py 697 217 69%
8585
data_algebra/flow_text.py 17 0 100%
8686
data_algebra/modin_model.py 36 5 86%
8787
data_algebra/near_sql.py 140 16 89%
88-
data_algebra/pandas_base.py 219 21 90%
88+
data_algebra/pandas_base.py 224 21 91%
8989
data_algebra/pandas_model.py 25 17 32%
9090
data_algebra/test_util.py 126 18 86%
9191
data_algebra/util.py 28 4 86%
9292
data_algebra/yaml.py 95 11 88%
9393
----------------------------------------------------------
94-
TOTAL 4276 944 78%
94+
TOTAL 4290 943 78%
9595

9696

97-
============================= 99 passed in 14.21s ==============================
97+
============================= 99 passed in 13.80s ==============================

data_algebra/custom_functions.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -109,11 +109,6 @@ def make_custom_function_map(data_model):
109109
pandas_formatter=lambda expr: ("@min(" + expr.args[0].to_pandas() + ")"),
110110
implementation=lambda x: [numpy.min(x)] * len(x),
111111
),
112-
CustomFunction(
113-
name="fn", # special case, user defined function
114-
pandas_formatter=lambda expr: "@fn(" + expr.args[0].to_pandas() + ")",
115-
implementation=None,
116-
),
117112
]
118113
mp = {cf.name: cf for cf in custom_functions}
119114
return mp

data_algebra/data_ops.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,24 +36,37 @@
3636

3737
# wrap a single argument function as a user callable function in pipeline
3838
# used for custom aggregators
39-
def user_fn(fn, var, display_form=None):
39+
def user_fn(fn, fvars=None, *, display_form=None):
4040
if isinstance(fn, str):
4141
if display_form is None:
4242
display_form = fn
4343
fn = eval(fn)
4444
if not callable(fn):
4545
raise TypeError("expected fn to be callable")
46+
if fvars is None:
47+
fvars = []
48+
if isinstance(fvars, str):
49+
fn_args = [data_algebra.expr_rep.ColumnReference(view=None, column_name=fvars)]
50+
else:
51+
for v in fvars:
52+
if not isinstance(v, str):
53+
raise TypeError("Expect all vars names to be strings")
54+
fn_args = [data_algebra.expr_rep.ColumnReference(view=None, column_name=v) for v in fvars]
55+
qvars = [v.__repr__() for v in fvars]
4656
if display_form is None:
47-
display_form = fn.__name__
48-
if not isinstance(var, str):
49-
raise TypeError("expected var to be str")
57+
display_form = ('user_fn('
58+
+ fn.__name__
59+
+ ', ['
60+
+ ', '.join(qvars)
61+
+ '])')
5062
return data_algebra.expr_rep.FnTerm(
5163
fn,
52-
fn_arg=data_algebra.expr_rep.ColumnReference(view=None, column_name=var),
64+
fn_args=fn_args,
5365
display_form=display_form,
5466
)
5567

5668

69+
5770
class ViewRepresentation(OperatorPlatform, ABC):
5871
"""Structure to represent the columns of a query or a table.
5972
Abstract base class."""

data_algebra/expr_rep.py

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -690,17 +690,11 @@ def to_python(self, *, want_inline_parens=False):
690690
return self.value.__repr__()
691691

692692

693-
class Name(str):
693+
class UnQuotedStr(str):
694694
def __init__(self, v):
695695
self.v = v
696696
str.__init__(v)
697697

698-
def is_equal(self, other):
699-
# can't use == as that builds a larger expression
700-
if not isinstance(other, Name):
701-
return False
702-
return self.v == other.v
703-
704698
def str(self):
705699
return self.v
706700

@@ -709,22 +703,29 @@ def __repr__(self):
709703

710704

711705
class FnTerm(Term):
712-
def __init__(self, value, fn_arg=None, display_form=None, op=None):
706+
# represent a function of columns
707+
def __init__(self, value, fn_args=None, name=None, display_form=None, op=None):
713708
if not callable(value):
714709
raise TypeError("value type must be callable")
715710
self.value = value
711+
if name is None:
712+
name = value.__name__
713+
self.name = name
716714
if display_form is None:
717715
display_form = value.__name__
718716
if op is None:
719717
op = value.__name__
720718
self.display_form = display_form
721719
self.op = op
722-
if fn_arg is None:
720+
if fn_args is None:
723721
self.args = []
724722
else:
725-
if not isinstance(fn_arg, ColumnReference):
726-
raise TypeError("Expected fn_arg to be None or a ColumnReference")
727-
self.args = [fn_arg]
723+
if isinstance(fn_args, ColumnReference):
724+
fn_args = [fn_args]
725+
for v in fn_args:
726+
if not isinstance(v, ColumnReference):
727+
raise TypeError("Expected fn_args to be None or all ColumnReference")
728+
self.args = fn_args
728729
Term.__init__(self)
729730

730731
def is_equal(self, other):
@@ -749,7 +750,7 @@ def replace_view(self, view):
749750
return self
750751

751752
def to_python(self, *, want_inline_parens=False):
752-
return Name(self.display_form)
753+
return UnQuotedStr(self.display_form)
753754

754755

755756
class ListTerm(Term):
@@ -1068,7 +1069,7 @@ def parse_assignments_in_context(ops, view, *, parse_env=None):
10681069
if not isinstance(v, Term):
10691070
if callable(v):
10701071
# k = f(k) implicit form
1071-
v = FnTerm(v, fn_arg=ColumnReference(view=view, column_name=k))
1072+
v = FnTerm(v, fn_args=[ColumnReference(view=view, column_name=k)])
10721073
else:
10731074
v = _parse_by_eval(
10741075
source_str=v, data_def=mp, outter_environemnt=parse_env

data_algebra/pandas_base.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -112,10 +112,22 @@ def extend_step(self, op, *, data_map, eval_env, narrow):
112112
standin_name = "_data_algebra_temp_g" # name of an arbitrary input variable
113113
if not window_situation:
114114
for (k, opk) in op.ops.items():
115-
op_src = opk.to_pandas()
116-
res[k] = res.eval(
117-
op_src, local_dict=self.pandas_eval_env, global_dict=eval_env
118-
)
115+
if isinstance(opk, data_algebra.expr_rep.FnTerm):
116+
# res[k] = opk.value(*[res[nm.column_name] for nm in opk.args])
117+
pe = self.pandas_eval_env.copy()
118+
pe[opk.name] = opk.value
119+
op_src = ('@' + opk.name
120+
+ '('
121+
+ ', '.join([nm.column_name for nm in opk.args])
122+
+ ')')
123+
res[k] = res.eval(
124+
op_src, local_dict=pe, global_dict=eval_env
125+
)
126+
else:
127+
op_src = opk.to_pandas()
128+
res[k] = res.eval(
129+
op_src, local_dict=self.pandas_eval_env, global_dict=eval_env
130+
)
119131
else:
120132
# build up a sub-frame to work on
121133
col_list = [c for c in set(op.partition_by)]
213 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)