Skip to content

Commit e72b0d5

Browse files
author
Junpeng Lao
authored
Add offset to glm models (#2889)
* Add `offset` kwarg to glm with test * add release note and clean ups * minor potential fail safe * set default to 0.
1 parent b463f2b commit e72b0d5

File tree

4 files changed

+29
-26
lines changed

4 files changed

+29
-26
lines changed

RELEASE-NOTES.md

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,24 +17,12 @@
1717
- Add logitnormal distribution
1818
- Densityplot: add support for discrete variables
1919
- Fix the Binomial likelihood in `.glm.families.Binomial`, with the flexibility of specifying the `n`.
20+
- Add `offset` kwarg to `.glm`.
2021

2122
### Fixes
2223

2324
- `VonMises` does not overflow for large values of kappa. i0 and i1 have been removed and we now use log_i0 to compute the logp.
24-
- The bandwidth for KDE plots is computed using a modified version of Scott's rule. The new version uses entropy instead of standard
25-
deviation. This works better for multimodal distributions. Functions using KDE plots has a new argument `bw` controlling the bandwidth.
26-
27-
=======
28-
- Add `logit_p` keyword to `pm.Bernoulli`, so that users can specify the logit
29-
of the success probability. This is faster and more stable than using
30-
`p=tt.nnet.sigmoid(logit_p)`.
31-
- Add `random` keyword to `pm.DensityDist` thus enabling users to pass custom random method
32-
which in turn makes sampling from a `DensityDist` possible.
33-
34-
### Fixes
35-
36-
- `VonMises` does not overflow for large values of kappa. i0 and i1 have been removed and we now use
37-
log_i0 to compute the logp.
25+
- The bandwidth for KDE plots is computed using a modified version of Scott's rule. The new version uses entropy instead of standard deviation. This works better for multimodal distributions. Functions using KDE plots has a new argument `bw` controlling the bandwidth.
3826

3927
### Deprecations
4028

pymc3/glm/linear.py

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,15 @@ class LinearComponent(Model):
2828
use `Regressor` key for defining default prior for all regressors
2929
defaults to Normal.dist(mu=0, tau=1.0E-6)
3030
vars : dict - random variables instead of creating new ones
31+
offset : scalar, or numpy/theano array with the same shape as y
32+
this can be used to specify an a priori known component to be
33+
included in the linear predictor during fitting.
3134
"""
3235
default_regressor_prior = Normal.dist(mu=0, tau=1.0E-6)
3336
default_intercept_prior = Flat.dist()
3437

3538
def __init__(self, x, y, intercept=True, labels=None,
36-
priors=None, vars=None, name='', model=None):
39+
priors=None, vars=None, name='', model=None, offset=0.):
3740
super(LinearComponent, self).__init__(name, model)
3841
if priors is None:
3942
priors = {}
@@ -77,17 +80,17 @@ def __init__(self, x, y, intercept=True, labels=None,
7780
)
7881
coeffs.append(v)
7982
self.coeffs = tt.stack(coeffs, axis=0)
80-
self.y_est = x.dot(self.coeffs)
83+
self.y_est = x.dot(self.coeffs) + offset
8184

8285
@classmethod
8386
def from_formula(cls, formula, data, priors=None, vars=None,
84-
name='', model=None):
87+
name='', model=None, offset=0.):
8588
import patsy
8689
y, x = patsy.dmatrices(formula, data)
8790
labels = x.design_info.column_names
8891
return cls(np.asarray(x), np.asarray(y)[:, -1], intercept=False,
8992
labels=labels, priors=priors, vars=vars, name=name,
90-
model=model)
93+
model=model, offset=offset)
9194

9295

9396
class GLM(LinearComponent):
@@ -108,12 +111,17 @@ class GLM(LinearComponent):
108111
init : dict - test_vals for coefficients
109112
vars : dict - random variables instead of creating new ones
110113
family : pymc3..families object
114+
offset : scalar, or numpy/theano array with the same shape as y
115+
this can be used to specify an a priori known component to be
116+
included in the linear predictor during fitting.
111117
"""
112118
def __init__(self, x, y, intercept=True, labels=None,
113-
priors=None, vars=None, family='normal', name='', model=None):
119+
priors=None, vars=None, family='normal', name='',
120+
model=None, offset=0.):
114121
super(GLM, self).__init__(
115122
x, y, intercept=intercept, labels=labels,
116-
priors=priors, vars=vars, name=name, model=model
123+
priors=priors, vars=vars, name=name,
124+
model=model, offset=offset
117125
)
118126

119127
_families = dict(
@@ -131,13 +139,14 @@ def __init__(self, x, y, intercept=True, labels=None,
131139

132140
@classmethod
133141
def from_formula(cls, formula, data, priors=None,
134-
vars=None, family='normal', name='', model=None):
142+
vars=None, family='normal', name='',
143+
model=None, offset=0.):
135144
import patsy
136145
y, x = patsy.dmatrices(formula, data)
137146
labels = x.design_info.column_names
138147
return cls(np.asarray(x), np.asarray(y)[:, -1], intercept=False,
139148
labels=labels, priors=priors, vars=vars, family=family,
140-
name=name, model=model)
149+
name=name, model=model, offset=offset)
141150

142151

143152
glm = GLM

pymc3/stats.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -887,10 +887,6 @@ def summary(trace, varnames=None, transform=lambda x: x, stat_funcs=None,
887887
samples. Defaults to the smaller of 100 or the number of samples.
888888
This is only meaningful when `stat_funcs` is None.
889889
890-
See also
891-
--------
892-
summary : Generate a pretty-printed summary of a trace.
893-
894890
Returns
895891
-------
896892
`pandas.DataFrame` with summary statistics for each variable Defaults one

pymc3/tests/test_glm.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,16 @@ def test_glm(self):
5858
assert round(abs(np.mean(trace['x'])-self.slope), 1) == 0
5959
assert round(abs(np.mean(trace['sd'])-self.sd), 1) == 0
6060

61+
def test_glm_offset(self):
62+
offset = 1.
63+
with Model() as model:
64+
GLM.from_formula('y ~ x', self.data_linear, offset=offset)
65+
step = Slice(model.vars)
66+
trace = sample(500, step=step, tune=0, progressbar=False,
67+
random_seed=self.random_seed)
68+
69+
assert round(abs(np.mean(trace['Intercept'])-self.intercept+offset), 1) == 0
70+
6171
def test_glm_link_func(self):
6272
with Model() as model:
6373
GLM.from_formula('y ~ x', self.data_logistic,

0 commit comments

Comments
 (0)