@@ -99,3 +99,119 @@ Good reasons for defining a custom Op might be the following:
99
99
100
100
Theano has extensive `documentation, <http://deeplearning.net/software/theano/extending/index.html >`_
101
101
about how to write new Ops.
102
+
103
+
104
+ Finding the root of a function
105
+ ------------------------------
106
+
107
+ We'll use finding the root of a function as a simple example.
108
+ Let's say we want to define a model where a parameter is defined
109
+ implicitly as the root of a function, that depends on another
110
+ parameter:
111
+
112
+ .. math ::
113
+
114
+ \theta \sim N^+(0 , 1 )\\
115
+ \text {$\mu\in \mathbb {R}^+$ such that $R(\mu , \theta )
116
+ = \mu + \mu e^{\theta \mu } - 1 = 0 $}\\
117
+ y \sim N(\mu , 0.1 ^2 )
118
+
119
+ First, we observe that this problem is well-defined, because
120
+ :math: `R(\cdot , \theta )` is monotone and has the image :math: `(-1 , \infty )`
121
+ for :math: `\mu , \theta \in \mathbb {R}^+`. To avoid overflows in
122
+ :math: `\exp (\mu \theta )` for large
123
+ values of :math: `\mu\theta ` we instead find the root of
124
+
125
+ .. math ::
126
+
127
+ R'(\mu , \theta )
128
+ = \log (R(\mu , \theta ) + 1 )
129
+ = \log (\mu ) + \log (1 + e^{\theta\mu }).
130
+
131
+ Also, we have
132
+
133
+ .. math ::
134
+
135
+ \frac {\partial }{\partial\mu }R'(\mu , \theta )
136
+ = \theta \, \text {logit}^{-1 }(\theta\mu ) + \mu ^{-1 }.
137
+
138
+ We can now use `scipy.optimize.newton ` to find the root::
139
+
140
+ from scipy import optimize, special
141
+ import numpy as np
142
+
143
+ def func(mu, theta):
144
+ thetamu = theta * mu
145
+ value = np.log(mu) + np.logaddexp(0, thetamu)
146
+ return value
147
+
148
+ def jac(mu, theta):
149
+ thetamu = theta * mu
150
+ jac = theta * special.expit(thetamu) + 1 / mu
151
+ return jac
152
+
153
+ def mu_from_theta(theta):
154
+ return optimize.newton(func, 1, fprime=jac, args=(0.4,))
155
+
156
+ We could wrap `mu_from_theta ` with `tt.as_op ` and use gradient-free
157
+ methods like Metropolis, but to get NUTS and ADVI working, we also
158
+ need to define the derivative of `mu_from_theta `. We can find this
159
+ derivative using the implicit function theorem, or equivalently we
160
+ take the derivative with respect of :math: `\theta ` for both sides of
161
+ :math: `R(\mu (\theta ), \theta ) = 0 ` and solve for :math: `\frac {d\mu }{d\theta }`.
162
+ This isn't hard to do by hand, but for the fun of it, let's do it using
163
+ sympy::
164
+
165
+ import sympy
166
+
167
+ mu = sympy.Function('mu')
168
+ theta = sympy.Symbol('theta')
169
+ R = mu(theta) + mu(theta) * sympy.exp(theta * mu(theta)) - 1
170
+ solution = sympy.solve(R.diff(theta), mu(theta).diff(theta))[0]
171
+
172
+ We get
173
+
174
+ .. math ::
175
+
176
+ \frac {d}{d\theta }\mu (\theta )
177
+ = - \frac {\mu (\theta )^2 }{1 + \theta\mu (\theta ) + e^{-\theta\mu (\theta )}}
178
+
179
+ Now, we use this to define a theano op, that also computes the gradient::
180
+
181
+ import theano
182
+ import theano.tensor as tt
183
+ import theano.tests.unittest_tools
184
+
185
+ class MuFromTheta(tt.Op):
186
+ itypes = [tt.dscalar]
187
+ otypes = [tt.dscalar]
188
+
189
+ def perform(self, node, inputs, outputs):
190
+ theta, = inputs
191
+ mu = mu_from_theta(theta)
192
+ outputs[0][0] = np.array(mu)
193
+
194
+ def grad(self, inputs, g):
195
+ theta, = inputs
196
+ mu = self(theta)
197
+ thetamu = theta * mu
198
+ return [- g[0] * mu ** 2 / (1 + thetamu + tt.exp(-thetamu))]
199
+
200
+ If you value your sanity, always check that the gradient is ok::
201
+
202
+ theano.tests.unittest_tools.verify_grad(MuFromTheta(), [np.array(0.2)])
203
+ theano.tests.unittest_tools.verify_grad(MuFromTheta(), [np.array(1e-5)])
204
+ theano.tests.unittest_tools.verify_grad(MuFromTheta(), [np.array(1e5)])
205
+
206
+ We can now define our model using this new op::
207
+
208
+ import pymc3 as pm
209
+
210
+ tt_mu_from_theta = MuFromTheta()
211
+
212
+ with pm.Model() as model:
213
+ theta = pm.HalfNormal('theta', sd=1)
214
+ mu = pm.Deterministic('mu', tt_mu_from_theta(theta))
215
+ pm.Normal('y', mu=mu, sd=0.1, observed=[0.2, 0.21, 0.3])
216
+
217
+ trace = pm.sample()
0 commit comments