Skip to content

Commit 44c37b9

Browse files
committed
ENH: allow in-line expression assignment with df.eval
TST: tests for local name overlaps ENH: moved assign to visit_Assign from visit_Module
1 parent be7c4c0 commit 44c37b9

File tree

5 files changed

+98
-5
lines changed

5 files changed

+98
-5
lines changed

doc/source/release.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,8 @@ Experimental Features
7272
``numexpr`` behind the scenes. This results in large speedups for complicated
7373
expressions involving large DataFrames/Series.
7474
- :class:`~pandas.DataFrame` has a new :meth:`~pandas.DataFrame.eval` that
75-
evaluates an expression in the context of the ``DataFrame``.
75+
evaluates an expression in the context of the ``DataFrame``; allows
76+
inline expression assignment
7677
- A :meth:`~pandas.DataFrame.query` method has been added that allows
7778
you to select elements of a ``DataFrame`` using a natural query syntax nearly
7879
identical to Python syntax.

pandas/computation/eval.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,4 +203,10 @@ def eval(expr, parser='pandas', engine='numexpr', truediv=True,
203203
eng = _engines[engine]
204204
eng_inst = eng(parsed_expr)
205205
ret = eng_inst.evaluate()
206+
207+
# assign if needed
208+
if parsed_expr.assignee is not None and parsed_expr.assigner is not None:
209+
parsed_expr.assignee[parsed_expr.assigner] = ret
210+
return None
211+
206212
return ret

pandas/computation/expr.py

Lines changed: 45 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
_arith_ops_syms, _unary_ops_syms, is_term)
2222
from pandas.computation.ops import _reductions, _mathops, _LOCAL_TAG
2323
from pandas.computation.ops import Op, BinOp, UnaryOp, Term, Constant, Div
24+
from pandas.computation.ops import UndefinedVariableError
2425

2526

2627
def _ensure_scope(level=2, global_dict=None, local_dict=None, resolvers=None,
@@ -417,6 +418,8 @@ def __init__(self, env, engine, parser, preparser=_preparse):
417418
self.engine = engine
418419
self.parser = parser
419420
self.preparser = preparser
421+
self.assignee = None
422+
self.assigner = None
420423

421424
def visit(self, node, **kwargs):
422425
if isinstance(node, string_types):
@@ -575,9 +578,39 @@ def visit_Slice(self, node, **kwargs):
575578
return slice(lower, upper, step)
576579

577580
def visit_Assign(self, node, **kwargs):
578-
cmpr = ast.Compare(ops=[ast.Eq()], left=node.targets[0],
579-
comparators=[node.value])
580-
return self.visit(cmpr)
581+
"""
582+
support a single assignment node, like
583+
584+
c = a + b
585+
586+
set the assignee at the top level, must be a Name node which
587+
might or might not exist in the resolvers
588+
589+
"""
590+
591+
if len(node.targets) != 1:
592+
raise SyntaxError('can only assign a single expression')
593+
if not isinstance(node.targets[0], ast.Name):
594+
raise SyntaxError('left hand side of an assignment must be a single name')
595+
596+
# we have no one to assign to
597+
if not len(self.env.resolvers):
598+
raise NotImplementedError
599+
600+
try:
601+
assigner = self.visit(node.targets[0], **kwargs)
602+
except (UndefinedVariableError):
603+
assigner = node.targets[0].id
604+
605+
self.assigner = getattr(assigner,'name',assigner)
606+
if self.assigner is None:
607+
raise SyntaxError('left hand side of an assignment must be a single resolvable name')
608+
try:
609+
self.assignee = self.env.resolvers[0]
610+
except:
611+
raise ValueError('cannot create an assignee for this expression')
612+
613+
return self.visit(node.value, **kwargs)
581614

582615
def visit_Attribute(self, node, **kwargs):
583616
attr = node.attr
@@ -669,7 +702,7 @@ def visitor(x, y):
669702
return reduce(visitor, operands)
670703

671704

672-
_python_not_supported = frozenset(['Assign', 'Dict', 'Call', 'BoolOp',
705+
_python_not_supported = frozenset(['Dict', 'Call', 'BoolOp',
673706
'In', 'NotIn'])
674707
_numexpr_supported_calls = frozenset(_reductions + _mathops)
675708

@@ -712,6 +745,14 @@ def __init__(self, expr, engine='numexpr', parser='pandas', env=None,
712745
self.terms = self.parse()
713746
self.truediv = truediv
714747

748+
@property
749+
def assigner(self):
750+
return getattr(self._visitor,'assigner',None)
751+
752+
@property
753+
def assignee(self):
754+
return getattr(self._visitor,'assignee',None)
755+
715756
def __call__(self):
716757
self.env.locals['truediv'] = self.truediv
717758
return self.terms(self.env)

pandas/computation/pytables.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,11 @@ def visit_USub(self, node, **kwargs):
389389
def visit_Index(self, node, **kwargs):
390390
return self.visit(node.value).value
391391

392+
def visit_Assign(self, node, **kwargs):
393+
cmpr = ast.Compare(ops=[ast.Eq()], left=node.targets[0],
394+
comparators=[node.value])
395+
return self.visit(cmpr)
396+
392397
def visit_Subscript(self, node, **kwargs):
393398
value = self.visit(node.value)
394399
slobj = self.visit(node.slice)

pandas/computation/tests/test_eval.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from pandas.computation.ops import (_binary_ops_dict, _unary_ops_dict,
2525
_special_case_arith_ops_syms,
2626
_arith_ops_syms, _bool_ops_syms)
27+
from pandas.computation.common import NameResolutionError
2728
import pandas.computation.expr as expr
2829
import pandas.util.testing as tm
2930
from pandas.util.testing import (assert_frame_equal, randbool,
@@ -1154,6 +1155,45 @@ def test_assignment_fails(self):
11541155
self.assertRaises(NotImplementedError, self.eval, expr1,
11551156
local_dict={'df': df, 'df2': df2})
11561157

1158+
def test_assignment_column(self):
1159+
df = DataFrame(np.random.randn(5, 2), columns=list('ab'))
1160+
orig_df = df.copy()
1161+
1162+
# multiple assignees
1163+
self.assertRaises(SyntaxError, df.eval, 'd c = a + b')
1164+
1165+
# invalid assignees
1166+
self.assertRaises(SyntaxError, df.eval, 'd,c = a + b')
1167+
self.assertRaises(SyntaxError, df.eval, 'Timestamp("20131001") = a + b')
1168+
1169+
# single assignment - existing variable
1170+
expected = orig_df.copy()
1171+
expected['a'] = expected['a'] + expected['b']
1172+
df = orig_df.copy()
1173+
df.eval('a = a + b')
1174+
assert_frame_equal(df,expected)
1175+
1176+
# single assignment - new variable
1177+
expected = orig_df.copy()
1178+
expected['c'] = expected['a'] + expected['b']
1179+
df = orig_df.copy()
1180+
df.eval('c = a + b')
1181+
assert_frame_equal(df,expected)
1182+
1183+
# with a local name overlap
1184+
a = 1
1185+
df = orig_df.copy()
1186+
df.eval('a = 1 + b')
1187+
expected = orig_df.copy()
1188+
expected['a'] = 1 + expected['b']
1189+
assert_frame_equal(df,expected)
1190+
1191+
df = orig_df.copy()
1192+
def f():
1193+
a = 1
1194+
df.eval('a=a+b')
1195+
self.assertRaises(NameResolutionError, f)
1196+
11571197
def test_basic_period_index_boolean_expression(self):
11581198
df = mkdf(2, 2, data_gen_f=f, c_idx_type='p', r_idx_type='i')
11591199

0 commit comments

Comments
 (0)