Skip to content

Commit 3819ae9

Browse files
committed
Merge pull request #66 from chrish42/capture-vars-not-env
Capture vars not env
2 parents 845bded + 48d7e1e commit 3819ae9

File tree

10 files changed

+209
-74
lines changed

10 files changed

+209
-74
lines changed

doc/changes.rst

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,20 @@ Changes
33

44
.. currentmodule:: patsy
55

6+
v0.4.0
7+
------
8+
9+
* Formulas (more precisely, :class:`EvalFactor` objects) now only
10+
keep a reference to the variables required from their environment
11+
instead of the whole environment when the formula was defined.
12+
13+
* Incompatible change: :class:`EvalFactor` does not take an
14+
``eval_env`` argument anymore.
15+
16+
* Incompatible change: the :func:`design_matrix_builders` function and
17+
the :meth:`EvalFactor.memorize_passes_needed` method now
18+
requires an ``eval_env`` as an additional argument.
19+
620
v0.3.0
721
------
822

doc/expert-model-specification.rst

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,12 +78,11 @@ things are.)
7878
from patsy import (ModelDesc, EvalEnvironment, Term, EvalFactor,
7979
LookupFactor, demo_data, dmatrix)
8080
data = demo_data("a", "x")
81-
env = EvalEnvironment.capture()
8281
8382
# LookupFactor takes a dictionary key:
8483
a_lookup = LookupFactor("a")
8584
# EvalFactor takes arbitrary Python code:
86-
x_transform = EvalFactor("np.log(x ** 2)", env)
85+
x_transform = EvalFactor("np.log(x ** 2)")
8786
# First argument is empty list for dmatrix; we would need to put
8887
# something there if we were calling dmatrices.
8988
desc = ModelDesc([],
@@ -157,7 +156,7 @@ The full interface looks like this:
157156
:term:`hashable`. These methods will determine which factors
158157
Patsy considers equal for purposes of redundancy elimination.
159158

160-
.. method:: memorize_passes_needed(state)
159+
.. method:: memorize_passes_needed(state, eval_env)
161160

162161
Return the number of passes through the data that this factor
163162
will need in order to set up any :ref:`stateful-transforms`.
@@ -171,6 +170,9 @@ The full interface looks like this:
171170
will be passed back in to all memorization and evaluation
172171
methods.
173172

173+
`eval_env` is an :class:`EvalEnvironment` object, describing
174+
the Python environment where the factor is being evaluated.
175+
174176
.. method:: memorize_chunk(state, which_pass, data)
175177

176178
Called repeatedly with each 'chunk' of data produced by the

doc/formulas.rst

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -58,13 +58,12 @@ To make this more concrete, here's how you could manually construct
5858
the same objects that Patsy will construct if given the above
5959
formula::
6060

61-
from patsy import EvalEnvironment, ModelDesc
62-
env = EvalEnvironment.capture()
63-
ModelDesc([Term([EvalFactor("y", env)])],
61+
from patsy import ModelDesc
62+
ModelDesc([Term([EvalFactor("y")])],
6463
[Term([]),
65-
Term([EvalFactor("a", env)]),
66-
Term([EvalFactor("a", env), EvalFactor("b", env)]),
67-
Term([EvalFactor("np.log(x)", env)])])
64+
Term([EvalFactor("a")]),
65+
Term([EvalFactor("a"), EvalFactor("b")]),
66+
Term([EvalFactor("np.log(x)")])])
6867

6968
Compare to what you get from parsing the above formula::
7069

patsy/build.py

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -325,14 +325,14 @@ def test__ColumnBuilder():
325325
cb_intercept.build({f1: [1, 2, 3], f2: [1, 2, 3], f3: [1, 2, 3]}, mat3)
326326
assert np.allclose(mat3, 1)
327327

328-
def _factors_memorize(factors, data_iter_maker):
328+
def _factors_memorize(factors, data_iter_maker, eval_env):
329329
# First, start off the memorization process by setting up each factor's
330330
# state and finding out how many passes it will need:
331331
factor_states = {}
332332
passes_needed = {}
333333
for factor in factors:
334334
state = {}
335-
which_pass = factor.memorize_passes_needed(state)
335+
which_pass = factor.memorize_passes_needed(state, eval_env)
336336
factor_states[factor] = state
337337
passes_needed[factor] = which_pass
338338
# Now, cycle through the data until all the factors have finished
@@ -362,7 +362,7 @@ def __init__(self, requested_passes, token):
362362
self._chunk_in_pass = 0
363363
self._seen_passes = 0
364364

365-
def memorize_passes_needed(self, state):
365+
def memorize_passes_needed(self, state, eval_env):
366366
state["calls"] = []
367367
state["token"] = self._token
368368
return self._requested_passes
@@ -389,7 +389,7 @@ def __call__(self):
389389
f1 = MockFactor(1, "f1")
390390
f2a = MockFactor(2, "f2a")
391391
f2b = MockFactor(2, "f2b")
392-
factor_states = _factors_memorize(set([f0, f1, f2a, f2b]), data)
392+
factor_states = _factors_memorize(set([f0, f1, f2a, f2b]), data, {})
393393
assert data.calls == 2
394394
mem_chunks0 = [("memorize_chunk", 0)] * data.CHUNKS
395395
mem_chunks1 = [("memorize_chunk", 1)] * data.CHUNKS
@@ -615,7 +615,7 @@ def _make_term_column_builders(terms,
615615
term_to_column_builders[term] = column_builders
616616
return new_term_order, term_to_column_builders
617617

618-
def design_matrix_builders(termlists, data_iter_maker, NA_action="drop"):
618+
def design_matrix_builders(termlists, data_iter_maker, eval_env, NA_action="drop"):
619619
"""Construct several :class:`DesignMatrixBuilders` from termlists.
620620
621621
This is one of Patsy's fundamental functions. This function and
@@ -629,6 +629,14 @@ def design_matrix_builders(termlists, data_iter_maker, NA_action="drop"):
629629
simple iterator because sufficiently complex formulas may require
630630
multiple passes over the data (e.g. if there are nested stateful
631631
transforms).
632+
:arg eval_env: Either a :class:`EvalEnvironment` which will be used to
633+
look up any variables referenced in `termlists` that cannot be
634+
found in `data_iter_maker`, or else a depth represented as an
635+
integer which will be passed to :meth:`EvalEnvironment.capture`.
636+
``eval_env=0`` means to use the context of the function calling
637+
:func:`design_matrix_builders` for lookups. If calling this function
638+
from a library, you probably want ``eval_env=1``, which means that
639+
variables should be resolved in *your* caller's namespace.
632640
:arg NA_action: An :class:`NAAction` object or string, used to determine
633641
what values count as 'missing' for purposes of determining the levels of
634642
categorical factors.
@@ -643,14 +651,25 @@ def design_matrix_builders(termlists, data_iter_maker, NA_action="drop"):
643651
644652
.. versionadded:: 0.2.0
645653
The ``NA_action`` argument.
654+
.. versionadded:: 0.4.0
655+
The ``eval_env`` argument.
646656
"""
657+
# Check type of eval_env to help people migrating to 0.4.0. Third
658+
# argument used to be NA_action (a string). Having the check for
659+
# eval_env's type gives people migrating to 0.4.0 who used NA_action
660+
# not as a keyword argument a nice error message here, instead of a
661+
# more obscure backtrace later on.
662+
if not isinstance(eval_env, six.integer_types + (EvalEnvironment,)):
663+
raise TypeError("Parameter 'eval_env' must be either an integer or an instance "
664+
"of patsy.EvalEnvironment.")
665+
eval_env = EvalEnvironment.capture(eval_env, reference=1)
647666
if isinstance(NA_action, str):
648667
NA_action = NAAction(NA_action)
649668
all_factors = set()
650669
for termlist in termlists:
651670
for term in termlist:
652671
all_factors.update(term.factors)
653-
factor_states = _factors_memorize(all_factors, data_iter_maker)
672+
factor_states = _factors_memorize(all_factors, data_iter_maker, eval_env)
654673
# Now all the factors have working eval methods, so we can evaluate them
655674
# on some data to find out what type of data they return.
656675
(num_column_counts,

patsy/desc.py

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -191,8 +191,8 @@ def test_ModelDesc_from_formula():
191191
for input in ("y ~ x", parse_formula("y ~ x")):
192192
eval_env = EvalEnvironment.capture(0)
193193
md = ModelDesc.from_formula(input, eval_env)
194-
assert md.lhs_termlist == [Term([EvalFactor("y", eval_env)]),]
195-
assert md.rhs_termlist == [INTERCEPT, Term([EvalFactor("x", eval_env)])]
194+
assert md.lhs_termlist == [Term([EvalFactor("y")]),]
195+
assert md.rhs_termlist == [INTERCEPT, Term([EvalFactor("x")])]
196196

197197
class IntermediateExpr(object):
198198
"This class holds an intermediate result while we're evaluating a tree."
@@ -356,8 +356,7 @@ def _eval_number(evaluator, tree):
356356
"only allowed with **", tree)
357357

358358
def _eval_python_expr(evaluator, tree):
359-
factor = EvalFactor(tree.token.extra, evaluator._factor_eval_env,
360-
origin=tree.origin)
359+
factor = EvalFactor(tree.token.extra, origin=tree.origin)
361360
return IntermediateExpr(False, None, False, [Term([factor])])
362361

363362
class Evaluator(object):
@@ -585,16 +584,15 @@ def eval(self, tree, require_evalexpr=True):
585584
"a + <-a**2>",
586585
]
587586

588-
def _assert_terms_match(terms, expected_intercept, expecteds, eval_env): # pragma: no cover
587+
def _assert_terms_match(terms, expected_intercept, expecteds): # pragma: no cover
589588
if expected_intercept:
590589
expecteds = [()] + expecteds
591590
assert len(terms) == len(expecteds)
592591
for term, expected in zip(terms, expecteds):
593592
if isinstance(term, Term):
594593
if isinstance(expected, str):
595594
expected = (expected,)
596-
assert term.factors == tuple([EvalFactor(s, eval_env)
597-
for s in expected])
595+
assert term.factors == tuple([EvalFactor(s) for s in expected])
598596
else:
599597
assert term == expected
600598

@@ -609,11 +607,9 @@ def _do_eval_formula_tests(tests): # pragma: no cover
609607
print(model_desc)
610608
lhs_intercept, lhs_termlist, rhs_intercept, rhs_termlist = result
611609
_assert_terms_match(model_desc.lhs_termlist,
612-
lhs_intercept, lhs_termlist,
613-
eval_env)
610+
lhs_intercept, lhs_termlist)
614611
_assert_terms_match(model_desc.rhs_termlist,
615-
rhs_intercept, rhs_termlist,
616-
eval_env)
612+
rhs_intercept, rhs_termlist)
617613

618614
def test_eval_formula():
619615
_do_eval_formula_tests(_eval_tests)

0 commit comments

Comments
 (0)