1
1
import numpy as np
2
2
from ..distributions import Normal
3
- from ..tuning .starting import find_MAP
4
3
from ..model import modelcontext
5
4
import patsy
6
- import theano
7
5
import pandas as pd
8
- from collections import defaultdict
9
- from pandas . tools . plotting import scatter_matrix
6
+ import theano
7
+ from collections import defaultdict , namedtuple
10
8
11
9
from . import families
12
10
13
11
__all__ = ['glm' , 'linear_component' , 'plot_posterior_predictive' ]
14
12
15
13
16
- def linear_component (formula , data , priors = None ,
17
- intercept_prior = None ,
18
- regressor_prior = None ,
19
- init_vals = None , family = None ,
20
- model = None ):
14
+ def _xy_to_data_and_formula (X , y ):
15
+ if not isinstance (y , pd .Series ):
16
+ y = pd .Series (y , name = 'y' )
17
+ else :
18
+ if not y .name :
19
+ y .name = 'y'
20
+ if not isinstance (X , (pd .DataFrame , pd .Series )):
21
+ if len (X .shape ) > 1 :
22
+ cols = ['x%d' % i for i in range (X .shape [1 ])]
23
+ else :
24
+ cols = ['x' ]
25
+ X = pd .DataFrame (X , columns = cols )
26
+ elif isinstance (X , pd .Series ):
27
+ if not X .name :
28
+ X .name = 'x'
29
+ # else -> pd.DataFrame -> ok
30
+ data = pd .concat ([y , X ], 1 )
31
+ formula = patsy .ModelDesc (
32
+ [patsy .Term ([patsy .LookupFactor (y .name )])],
33
+ [patsy .Term ([patsy .LookupFactor (p )]) for p in X .columns ]
34
+ )
35
+ return data , formula
36
+
37
+
38
+ class linear_component (namedtuple ('Estimate' , 'y_est,coeffs' )):
21
39
"""Create linear model according to patsy specification.
22
40
23
41
Parameters
@@ -38,10 +56,6 @@ def linear_component(formula, data, priors=None,
38
56
init_vals : dict
39
57
Set starting values externally: parameter -> value
40
58
Default: None
41
- family : statsmodels.family
42
- Link function to pass to statsmodels (init has to be True).
43
- See `statsmodels.api.families`
44
- Default: identity
45
59
46
60
Output
47
61
------
@@ -50,51 +64,78 @@ def linear_component(formula, data, priors=None,
50
64
Example
51
65
-------
52
66
# Logistic regression
53
- y_est, coeffs = glm ('male ~ height + weight',
54
- htwt_data,
55
- family= glm.families.Binomial(link=glm.family. logit) )
56
- y_data = Bernoulli('y', y_est , observed=data.male)
67
+ y_est, coeffs = linear_component ('male ~ height + weight',
68
+ htwt_data)
69
+ probability = glm.families.logit(y_est )
70
+ y_data = Bernoulli('y', probability , observed=data.male)
57
71
"""
58
- if intercept_prior is None :
59
- intercept_prior = Normal .dist (mu = 0 , tau = 1.0E-12 )
60
- if regressor_prior is None :
61
- regressor_prior = Normal .dist (mu = 0 , tau = 1.0E-12 )
62
-
63
- if priors is None :
64
- priors = defaultdict (None )
65
-
66
- # Build patsy design matrix and get regressor names.
67
- _ , dmatrix = patsy .dmatrices (formula , data )
68
- reg_names = dmatrix .design_info .column_names
69
-
70
- if init_vals is None :
71
- init_vals = {}
72
-
73
- # Create individual coefficients
74
- model = modelcontext (model )
75
- coeffs = []
76
-
77
- if reg_names [0 ] == 'Intercept' :
78
- prior = priors .get ('Intercept' , intercept_prior )
79
- coeff = model .Var (reg_names .pop (0 ), prior )
80
- if 'Intercept' in init_vals :
81
- coeff .tag .test_value = init_vals ['Intercept' ]
82
- coeffs .append (coeff )
83
-
84
- for reg_name in reg_names :
85
- prior = priors .get (reg_name , regressor_prior )
86
- coeff = model .Var (reg_name , prior )
87
- if reg_name in init_vals :
88
- coeff .tag .test_value = init_vals [reg_name ]
89
- coeffs .append (coeff )
90
-
91
- y_est = theano .dot (np .asarray (dmatrix ),
92
- theano .tensor .stack (* coeffs )).reshape ((1 , - 1 ))
72
+ __slots__ = ()
93
73
94
- return y_est , coeffs
95
-
96
-
97
- def glm (* args , ** kwargs ):
74
+ def __new__ (cls , formula , data , priors = None ,
75
+ intercept_prior = None ,
76
+ regressor_prior = None ,
77
+ init_vals = None ,
78
+ model = None ,
79
+ name = '' ):
80
+ if intercept_prior is None :
81
+ intercept_prior = Normal .dist (mu = 0 , tau = 1.0E-12 )
82
+ if regressor_prior is None :
83
+ regressor_prior = Normal .dist (mu = 0 , tau = 1.0E-12 )
84
+
85
+ if priors is None :
86
+ priors = defaultdict (None )
87
+
88
+ # Build patsy design matrix and get regressor names.
89
+ _ , dmatrix = patsy .dmatrices (formula , data )
90
+ reg_names = dmatrix .design_info .column_names
91
+
92
+ if init_vals is None :
93
+ init_vals = {}
94
+
95
+ # Create individual coefficients
96
+ model = modelcontext (model )
97
+ coeffs = []
98
+ if name :
99
+ name = '{}_' .format (name )
100
+ if reg_names [0 ] == 'Intercept' :
101
+ prior = priors .get ('Intercept' , intercept_prior )
102
+ coeff = model .Var ('{}{}' .format (name , reg_names .pop (0 )), prior )
103
+ if 'Intercept' in init_vals :
104
+ coeff .tag .test_value = init_vals ['Intercept' ]
105
+ coeffs .append (coeff )
106
+
107
+ for reg_name in reg_names :
108
+ prior = priors .get (reg_name , regressor_prior )
109
+ coeff = model .Var ('{}{}' .format (name , reg_name ), prior )
110
+ if reg_name in init_vals :
111
+ coeff .tag .test_value = init_vals [reg_name ]
112
+ coeffs .append (coeff )
113
+
114
+ y_est = theano .dot (np .asarray (dmatrix ),
115
+ theano .tensor .stack (* coeffs )).reshape ((1 , - 1 ))
116
+
117
+ return super (linear_component , cls ).__new__ (cls , y_est , coeffs )
118
+
119
+ @classmethod
120
+ def from_xy (cls , X , y ,
121
+ priors = None ,
122
+ intercept_prior = None ,
123
+ regressor_prior = None ,
124
+ init_vals = None ,
125
+ model = None ,
126
+ name = '' ):
127
+ data , formula = _xy_to_data_and_formula (X , y )
128
+ return cls (formula , data ,
129
+ priors = priors ,
130
+ intercept_prior = intercept_prior ,
131
+ regressor_prior = regressor_prior ,
132
+ init_vals = init_vals ,
133
+ model = model ,
134
+ name = name
135
+ )
136
+
137
+
138
+ class glm (namedtuple ('Estimate' , 'y_est,coeffs' )):
98
139
"""Create GLM after Patsy model specification string.
99
140
100
141
Parameters
@@ -121,29 +162,66 @@ def glm(*args, **kwargs):
121
162
122
163
Output
123
164
------
124
- vars : List of created random variables (y_est, coefficients etc)
165
+ (y_est, coeffs) : Estimate for y, list of coefficients
125
166
126
167
Example
127
168
-------
128
169
# Logistic regression
129
170
vars = glm('male ~ height + weight',
130
171
data,
131
- family=glm.families.Binomial(link=glm.families.logit ))
172
+ family=glm.families.Binomial())
132
173
"""
133
-
134
- model = modelcontext (kwargs .get ('model' ))
135
-
136
- family = kwargs .pop ('family' , families .Normal ())
137
-
138
- call_find_map = kwargs .pop ('find_MAP' , True )
139
- formula = args [0 ]
140
- data = args [1 ]
141
- y_data = np .asarray (patsy .dmatrices (formula , data )[0 ]).T
142
-
143
- y_est , coeffs = linear_component (* args , ** kwargs )
144
- family .create_likelihood (y_est , y_data )
145
-
146
- return [y_est ] + coeffs
174
+ __slots__ = ()
175
+
176
+ def __new__ (cls , formula , data , priors = None ,
177
+ intercept_prior = None ,
178
+ regressor_prior = None ,
179
+ init_vals = None ,
180
+ family = 'normal' ,
181
+ model = None ,
182
+ name = '' ):
183
+ _families = dict (
184
+ normal = families .Normal ,
185
+ student = families .StudentT ,
186
+ binomial = families .Binomial ,
187
+ poisson = families .Poisson
188
+ )
189
+ if isinstance (family , str ):
190
+ family = _families [family ]()
191
+
192
+ y_data = np .asarray (patsy .dmatrices (formula , data )[0 ]).T
193
+
194
+ y_est , coeffs = linear_component (
195
+ formula , data , priors = priors ,
196
+ intercept_prior = intercept_prior ,
197
+ regressor_prior = regressor_prior ,
198
+ init_vals = init_vals ,
199
+ model = model ,
200
+ name = name
201
+ )
202
+ family .create_likelihood (name , y_est , y_data , model = model )
203
+
204
+ return super (glm , cls ).__new__ (cls , y_est , coeffs )
205
+
206
+ @classmethod
207
+ def from_xy (cls , X , y ,
208
+ priors = None ,
209
+ intercept_prior = None ,
210
+ regressor_prior = None ,
211
+ init_vals = None ,
212
+ family = 'normal' ,
213
+ model = None ,
214
+ name = '' ):
215
+ data , formula = _xy_to_data_and_formula (X , y )
216
+ return cls (formula , data ,
217
+ priors = priors ,
218
+ intercept_prior = intercept_prior ,
219
+ regressor_prior = regressor_prior ,
220
+ init_vals = init_vals ,
221
+ model = model ,
222
+ family = family ,
223
+ name = name
224
+ )
147
225
148
226
149
227
def plot_posterior_predictive (trace , eval = None , lm = None , samples = 30 , ** kwargs ):
0 commit comments