Skip to content

Commit 25939fd

Browse files
committed
Merge branch 'constant' of github.com:pymc-devs/pymc3 into constant
2 parents ba872ea + 5716806 commit 25939fd

File tree

12 files changed

+228
-151
lines changed

12 files changed

+228
-151
lines changed

docs/source/notebooks/NUTS_scaling_using_ADVI.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -490,7 +490,7 @@
490490
],
491491
"source": [
492492
"with mdl:\n",
493-
" v_params = pm.variational.advi(n=100000, verbose=False) \n",
493+
" v_params = pm.variational.advi(n=100000) \n",
494494
"\n",
495495
"_ = plt.plot(-np.log10(-v_params.elbo_vals))"
496496
]

pymc3/distributions/transforms.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ def jacobian_det(self, y_):
240240
t_stick_breaking = lambda eps: StickBreaking(eps)
241241

242242

243-
class Circular(ElemwiseTransform):
243+
class Circular(Transform):
244244
"""Transforms a linear space into a circular one.
245245
"""
246246
name = "circular"
@@ -250,5 +250,8 @@ def backward(self, y):
250250

251251
def forward(self, x):
252252
return x
253+
254+
def jacobian_det(self, x):
255+
return 0
253256

254257
circular = Circular()

pymc3/glm/families.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,24 +39,26 @@ def __init__(self, **kwargs):
3939
else:
4040
setattr(self, key, val)
4141

42-
def _get_priors(self, model=None):
42+
def _get_priors(self, model=None, name=''):
4343
"""Return prior distributions of the likelihood.
4444
4545
Returns
4646
-------
4747
dict : mapping name -> pymc3 distribution
4848
"""
49+
if name:
50+
name = '{}_'.format(name)
4951
model = modelcontext(model)
5052
priors = {}
5153
for key, val in self.priors.items():
5254
if isinstance(val, numbers.Number):
5355
priors[key] = val
5456
else:
55-
priors[key] = model.Var(key, val)
57+
priors[key] = model.Var('{}{}'.format(name, key), val)
5658

5759
return priors
5860

59-
def create_likelihood(self, y_est, y_data, model=None):
61+
def create_likelihood(self, name, y_est, y_data, model=None):
6062
"""Create likelihood distribution of observed data.
6163
6264
Parameters
@@ -66,10 +68,12 @@ def create_likelihood(self, y_est, y_data, model=None):
6668
y_data : array
6769
Observed dependent variable
6870
"""
69-
priors = self._get_priors(model=model)
71+
priors = self._get_priors(model=model, name=name)
7072
# Wrap y_est in link function
7173
priors[self.parent] = self.link(y_est)
72-
return self.likelihood('y', observed=y_data, **priors)
74+
if name:
75+
name = '{}_'.format(name)
76+
return self.likelihood('{}y'.format(name), observed=y_data, **priors)
7377

7478
def __repr__(self):
7579
return """Family {klass}:

pymc3/glm/glm.py

Lines changed: 150 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,41 @@
11
import numpy as np
22
from ..distributions import Normal
3-
from ..tuning.starting import find_MAP
43
from ..model import modelcontext
54
import patsy
6-
import theano
75
import pandas as pd
8-
from collections import defaultdict
9-
from pandas.tools.plotting import scatter_matrix
6+
import theano
7+
from collections import defaultdict, namedtuple
108

119
from . import families
1210

1311
__all__ = ['glm', 'linear_component', 'plot_posterior_predictive']
1412

1513

16-
def linear_component(formula, data, priors=None,
17-
intercept_prior=None,
18-
regressor_prior=None,
19-
init_vals=None, family=None,
20-
model=None):
14+
def _xy_to_data_and_formula(X, y):
15+
if not isinstance(y, pd.Series):
16+
y = pd.Series(y, name='y')
17+
else:
18+
if not y.name:
19+
y.name = 'y'
20+
if not isinstance(X, (pd.DataFrame, pd.Series)):
21+
if len(X.shape) > 1:
22+
cols = ['x%d' % i for i in range(X.shape[1])]
23+
else:
24+
cols = ['x']
25+
X = pd.DataFrame(X, columns=cols)
26+
elif isinstance(X, pd.Series):
27+
if not X.name:
28+
X.name = 'x'
29+
# else -> pd.DataFrame -> ok
30+
data = pd.concat([y, X], 1)
31+
formula = patsy.ModelDesc(
32+
[patsy.Term([patsy.LookupFactor(y.name)])],
33+
[patsy.Term([patsy.LookupFactor(p)]) for p in X.columns]
34+
)
35+
return data, formula
36+
37+
38+
class linear_component(namedtuple('Estimate', 'y_est,coeffs')):
2139
"""Create linear model according to patsy specification.
2240
2341
Parameters
@@ -38,10 +56,6 @@ def linear_component(formula, data, priors=None,
3856
init_vals : dict
3957
Set starting values externally: parameter -> value
4058
Default: None
41-
family : statsmodels.family
42-
Link function to pass to statsmodels (init has to be True).
43-
See `statsmodels.api.families`
44-
Default: identity
4559
4660
Output
4761
------
@@ -50,51 +64,78 @@ def linear_component(formula, data, priors=None,
5064
Example
5165
-------
5266
# Logistic regression
53-
y_est, coeffs = glm('male ~ height + weight',
54-
htwt_data,
55-
family=glm.families.Binomial(link=glm.family.logit))
56-
y_data = Bernoulli('y', y_est, observed=data.male)
67+
y_est, coeffs = linear_component('male ~ height + weight',
68+
htwt_data)
69+
probability = glm.families.logit(y_est)
70+
y_data = Bernoulli('y', probability, observed=data.male)
5771
"""
58-
if intercept_prior is None:
59-
intercept_prior = Normal.dist(mu=0, tau=1.0E-12)
60-
if regressor_prior is None:
61-
regressor_prior = Normal.dist(mu=0, tau=1.0E-12)
62-
63-
if priors is None:
64-
priors = defaultdict(None)
65-
66-
# Build patsy design matrix and get regressor names.
67-
_, dmatrix = patsy.dmatrices(formula, data)
68-
reg_names = dmatrix.design_info.column_names
69-
70-
if init_vals is None:
71-
init_vals = {}
72-
73-
# Create individual coefficients
74-
model = modelcontext(model)
75-
coeffs = []
76-
77-
if reg_names[0] == 'Intercept':
78-
prior = priors.get('Intercept', intercept_prior)
79-
coeff = model.Var(reg_names.pop(0), prior)
80-
if 'Intercept' in init_vals:
81-
coeff.tag.test_value = init_vals['Intercept']
82-
coeffs.append(coeff)
83-
84-
for reg_name in reg_names:
85-
prior = priors.get(reg_name, regressor_prior)
86-
coeff = model.Var(reg_name, prior)
87-
if reg_name in init_vals:
88-
coeff.tag.test_value = init_vals[reg_name]
89-
coeffs.append(coeff)
90-
91-
y_est = theano.dot(np.asarray(dmatrix),
92-
theano.tensor.stack(*coeffs)).reshape((1, -1))
72+
__slots__ = ()
9373

94-
return y_est, coeffs
95-
96-
97-
def glm(*args, **kwargs):
74+
def __new__(cls, formula, data, priors=None,
75+
intercept_prior=None,
76+
regressor_prior=None,
77+
init_vals=None,
78+
model=None,
79+
name=''):
80+
if intercept_prior is None:
81+
intercept_prior = Normal.dist(mu=0, tau=1.0E-12)
82+
if regressor_prior is None:
83+
regressor_prior = Normal.dist(mu=0, tau=1.0E-12)
84+
85+
if priors is None:
86+
priors = defaultdict(None)
87+
88+
# Build patsy design matrix and get regressor names.
89+
_, dmatrix = patsy.dmatrices(formula, data)
90+
reg_names = dmatrix.design_info.column_names
91+
92+
if init_vals is None:
93+
init_vals = {}
94+
95+
# Create individual coefficients
96+
model = modelcontext(model)
97+
coeffs = []
98+
if name:
99+
name = '{}_'.format(name)
100+
if reg_names[0] == 'Intercept':
101+
prior = priors.get('Intercept', intercept_prior)
102+
coeff = model.Var('{}{}'.format(name, reg_names.pop(0)), prior)
103+
if 'Intercept' in init_vals:
104+
coeff.tag.test_value = init_vals['Intercept']
105+
coeffs.append(coeff)
106+
107+
for reg_name in reg_names:
108+
prior = priors.get(reg_name, regressor_prior)
109+
coeff = model.Var('{}{}'.format(name, reg_name), prior)
110+
if reg_name in init_vals:
111+
coeff.tag.test_value = init_vals[reg_name]
112+
coeffs.append(coeff)
113+
114+
y_est = theano.dot(np.asarray(dmatrix),
115+
theano.tensor.stack(*coeffs)).reshape((1, -1))
116+
117+
return super(linear_component, cls).__new__(cls, y_est, coeffs)
118+
119+
@classmethod
120+
def from_xy(cls, X, y,
121+
priors=None,
122+
intercept_prior=None,
123+
regressor_prior=None,
124+
init_vals=None,
125+
model=None,
126+
name=''):
127+
data, formula = _xy_to_data_and_formula(X, y)
128+
return cls(formula, data,
129+
priors=priors,
130+
intercept_prior=intercept_prior,
131+
regressor_prior=regressor_prior,
132+
init_vals=init_vals,
133+
model=model,
134+
name=name
135+
)
136+
137+
138+
class glm(namedtuple('Estimate', 'y_est,coeffs')):
98139
"""Create GLM after Patsy model specification string.
99140
100141
Parameters
@@ -121,29 +162,66 @@ def glm(*args, **kwargs):
121162
122163
Output
123164
------
124-
vars : List of created random variables (y_est, coefficients etc)
165+
(y_est, coeffs) : Estimate for y, list of coefficients
125166
126167
Example
127168
-------
128169
# Logistic regression
129170
vars = glm('male ~ height + weight',
130171
data,
131-
family=glm.families.Binomial(link=glm.families.logit))
172+
family=glm.families.Binomial())
132173
"""
133-
134-
model = modelcontext(kwargs.get('model'))
135-
136-
family = kwargs.pop('family', families.Normal())
137-
138-
call_find_map = kwargs.pop('find_MAP', True)
139-
formula = args[0]
140-
data = args[1]
141-
y_data = np.asarray(patsy.dmatrices(formula, data)[0]).T
142-
143-
y_est, coeffs = linear_component(*args, **kwargs)
144-
family.create_likelihood(y_est, y_data)
145-
146-
return [y_est] + coeffs
174+
__slots__ = ()
175+
176+
def __new__(cls, formula, data, priors=None,
177+
intercept_prior=None,
178+
regressor_prior=None,
179+
init_vals=None,
180+
family='normal',
181+
model=None,
182+
name=''):
183+
_families = dict(
184+
normal=families.Normal,
185+
student=families.StudentT,
186+
binomial=families.Binomial,
187+
poisson=families.Poisson
188+
)
189+
if isinstance(family, str):
190+
family = _families[family]()
191+
192+
y_data = np.asarray(patsy.dmatrices(formula, data)[0]).T
193+
194+
y_est, coeffs = linear_component(
195+
formula, data, priors=priors,
196+
intercept_prior=intercept_prior,
197+
regressor_prior=regressor_prior,
198+
init_vals=init_vals,
199+
model=model,
200+
name=name
201+
)
202+
family.create_likelihood(name, y_est, y_data, model=model)
203+
204+
return super(glm, cls).__new__(cls, y_est, coeffs)
205+
206+
@classmethod
207+
def from_xy(cls, X, y,
208+
priors=None,
209+
intercept_prior=None,
210+
regressor_prior=None,
211+
init_vals=None,
212+
family='normal',
213+
model=None,
214+
name=''):
215+
data, formula = _xy_to_data_and_formula(X, y)
216+
return cls(formula, data,
217+
priors=priors,
218+
intercept_prior=intercept_prior,
219+
regressor_prior=regressor_prior,
220+
init_vals=init_vals,
221+
model=model,
222+
family=family,
223+
name=name
224+
)
147225

148226

149227
def plot_posterior_predictive(trace, eval=None, lm=None, samples=30, **kwargs):

pymc3/model.py

Lines changed: 9 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -169,25 +169,16 @@ def logpt(self):
169169

170170

171171
class Model(Context, Factor):
172-
"""Encapsulates the variables and likelihood factors of a model.
172+
"""Encapsulates the variables and likelihood factors of a model."""
173173

174-
Parameters
175-
----------
176-
verbose : int
177-
Model verbosity setting, determining how much feedback various
178-
operations provide. Normal verbosity is verbose=1 (default), silence
179-
is verbose=0, high is any value greater than 1.
180-
"""
181-
182-
def __init__(self, verbose=1):
174+
def __init__(self):
183175
self.named_vars = {}
184176
self.free_RVs = []
185177
self.observed_RVs = []
186178
self.deterministics = []
187179
self.potentials = []
188180
self.missing_values = []
189181
self.model = self
190-
self.verbose = verbose
191182

192183
@property
193184
@memoize
@@ -287,12 +278,11 @@ def Var(self, name, dist, data=None):
287278
else:
288279
var = TransformedRV(name=name, distribution=dist, model=self,
289280
transform=dist.transform)
290-
if self.verbose:
291-
pm._log.info('Applied {transform}-transform to {name}'
292-
' and added transformed {orig_name} to model.'.format(
293-
transform=dist.transform.name,
294-
name=name,
295-
orig_name='{}_{}_'.format(name, dist.transform.name)))
281+
pm._log.debug('Applied {transform}-transform to {name}'
282+
' and added transformed {orig_name} to model.'.format(
283+
transform=dist.transform.name,
284+
name=name,
285+
orig_name='{}_{}_'.format(name, dist.transform.name)))
296286
self.deterministics.append(var)
297287
return var
298288
elif isinstance(data, dict):
@@ -624,8 +614,8 @@ def __init__(self, name, data, distribution, model):
624614
self.data = {name: as_tensor(data, name, model, distribution)
625615
for name, data in data.items()}
626616

627-
self.missing_values = [data.missing_values for data in self.data.values()
628-
if data.missing_values is not None]
617+
self.missing_values = [datum.missing_values for datum in self.data.values()
618+
if datum.missing_values is not None]
629619
self.logp_elemwiset = distribution.logp(**self.data)
630620
self.model = model
631621
self.distribution = distribution

0 commit comments

Comments
 (0)