updating python versions

jsalvatier · jsalvatier · commit 5d8c683970ef · 2013-05-03T09:13:08.000-07:00
diff --git a/examples/stochastic_volatility.py b/examples/stochastic_volatility.py
@@ -9,11 +9,11 @@
 from pymc.distributions.timeseries import *
 
 from scipy.sparse import csc_matrix
-from  scipy import optimize
+from scipy import optimize
 
 # <markdowncell>
 
-# Asset prices have time-varying volatility (variance of day over day `returns`). In some periods, returns are highly vaiable, and in others very stable. Stochastic volatility models model this with a latent volatility variable, modeled as a stochastic process. The following model is similar to the one described in the No-U-Turn Sampler paper, Hoffman (2011) p21.
+# Asset prices have time-varying volatility (variance of day over day `returns`). In some periods, returns are highly variable, while in others very stable. Stochastic volatility models model this with a latent volatility variable, modeled as a stochastic process. The following model is similar to the one described in the No-U-Turn Sampler paper, Hoffman (2011) p21.
 # 
 # $$ \sigma \sim Exponential(50) $$
 # 
@@ -31,28 +31,27 @@
 
 # <codecell>
 
-model = Model()
+# Load 400 returns from the S&P 500.
+n = 400
+returns = np.genfromtxt("data/SP500.csv")[-n:]
 
 # <markdowncell>
 
 # Specifying the model in pymc mirrors its statistical specification. 
 # 
-# However, it is easier to sample the scale of the volatility process innovations, $\sigma $, on a log scale, so we create it using `TransformedVar` and use `logtransform`. `TransformedVar` creates one variable in the transformed space and one in the normal space. The one in the transformed space (here $log(\sigma) $) is the one over which sampling will occur, and the one in the normal space is the one to use throughout the rest of the model.
+# However, it is easier to sample the scale of the volatility process innovations, $\sigma$, on a log scale, so we create it using `TransformedVar` and use `logtransform`. `TransformedVar` creates one variable in the transformed space and one in the normal space. The one in the transformed space (here $\text{log}(\sigma) $) is the one over which sampling will occur, and the one in the normal space is the one to use throughout the rest of the model.
 # 
 # It takes a variable name, a distribution and a transformation to use.
 
 # <codecell>
 
-n = 400
-returns = np.genfromtxt("data/SP500.csv")[-n:]
-
+model = Model()
 with model: 
     sigma, log_sigma = model.TransformedVar('sigma', Exponential(1./.02, testval = .1),
-                 logtransform)
+                                            logtransform)
 
     nu = Exponential('nu', 1./10)
 
-    
     s = GaussianRandomWalk('s', sigma**-2, shape = n)
 
     r = T('r', nu, lam = exp(-2*s), observed = returns)
@@ -61,11 +60,11 @@
 
 # ## Fit Model
 # 
-# To get a decent scaling matrix for the hamiltonaian sampler, we find the hessian at a point. The method `Model.d2logpc` gives us a Theano compiled function that returns the matrix of 2nd derivatives.
+# To get a decent scaling matrix for the Hamiltonian sampler, we find the Hessian at a point. The method `Model.d2logpc` gives us a `Theano` compiled function that returns the matrix of 2nd derivatives.
 # 
-# However, the 2nd derivatives for the degrees of freedom parameter, `nu`, are negative and thus not very informative and make the matrix non-positive definite, so we replace that entry with a reasonable guess at the scale. The interactions between `log_sigma`/`nu` and `s` are also not very useful, so we set them to zero. 
+# However, the 2nd derivatives for the degrees of freedom parameter, `nu`, are negative and thus not very informative and make the matrix non-positive definite, so we replace that entry with a reasonable guess at the scale. The interactions between `log_sigma`/`nu` and `s` are also not very useful, so we set them to zero.
 # 
-# The hessian matrix is also very sparse, so we make it a sparse matrix for faster sampling.
+# The Hessian matrix is also very sparse, so we make it a sparse matrix for faster sampling.
 
 # <codecell>
 
@@ -87,32 +86,35 @@ def hessian(point, nusd):
 # <codecell>
 
 with model:
-    start = find_MAP(vars = [s], fmin = optimize.fmin_l_bfgs_b)
+    start = find_MAP(vars=[s], fmin = optimize.fmin_l_bfgs_b)
 
 # <markdowncell>
 
-# We do a short initial run to get near the right area, then start again using a new hessian at the new starting point to get faster sampling due to better scaling.
+# We do a short initial run to get near the right area, then start again using a new Hessian at the new starting point to get faster sampling due to better scaling.
 
 # <codecell>
 
 with model: 
     step = HamiltonianMC(model.vars, hessian(start, 6))
     trace = sample(200, step, start, trace = model.vars + [sigma]) 
 
+    # Start next run at the last sampled position.
     start2 = trace.point(-1)
     step = HamiltonianMC(model.vars, hessian(start2, 6), path_length = 4.)
-    trace = sample(8000, step, trace = trace) 
+    trace = sample(8000, step, trace=trace) 
 
 # <codecell>
 
 #figsize(12,6)
 title(str(s))
-plot(trace[s][::10].T,'b', alpha = .01);
+plot(trace[s][::10].T,'b', alpha=.01)
+xlabel('time')
+ylabel('volatility')
 
 #figsize(12,6)
-traceplot(trace, model.vars[:-1]);
+traceplot(trace, model.vars[:-1])
 
-# <markdowncell>
+# <rawcell>
 
 # ## References
 # 
diff --git a/examples/tutorial.py b/examples/tutorial.py
@@ -12,13 +12,24 @@
 
 # Model
 # -----
-# We consider the following generative model
+# Consider the following true generative model:
+#     
+# $$ x_{true} \sim \textrm{Normal}(2,1) $$
+# $$ y_{true} \sim \textrm{Normal}(\textrm{exp}(x_{true}),1)$$
+# $$ z_{data} \sim \textrm{Normal}(x_{true} + y_{true},0.75)$$
+# 
+# Where $x_{true}$ is a scalar, $y_{true}$ is a vector of length 2, and $z_{data}$ is a $2\times 20$ matrix.
+# 
+# We can simulate this using Numpy:
 
 # <codecell>
 
+ndims = 2
+nobs = 20
+
 xtrue = normal(scale = 2., size = 1)
-ytrue = normal(loc = np.exp(xtrue), scale = 1, size = (2,1))
-zdata = normal(loc = xtrue + ytrue, scale = .75, size = (2, 20))
+ytrue = normal(loc = np.exp(xtrue), scale = 1, size = (ndims,1))
+zdata = normal(loc = xtrue + ytrue, scale = .75, size = (ndims, nobs))
 
 # <markdowncell>
 
@@ -27,7 +38,16 @@
 # <markdowncell>
 
 # Build Model
-# ----------- 
+# -----------
+# 
+# Now we want to do inference assuming the following model:
+# 
+# $$ x \sim \textrm{Normal}(0,1) $$
+# $$ y \sim \textrm{Normal}(\textrm{exp}(x),2)$$
+# $$ z \sim \textrm{Normal}(x + y,0.75)$$
+# 
+# The aim here is to get posteriors over $x$ and $y$ given the data we have about $z$ (`zdata`).
+# 
 # We create a new `Model` objects, and do operations within its context. The `with` lets PyMC know this model is the current model of interest. 
 # 
 # We construct new random variables with the constructor for its prior distribution such as `Normal` while within a model context (inside the `with`). When you make a random variable it is automatically added to the model. The constructor returns a Theano variable.
@@ -38,15 +58,20 @@
 
 with Model() as model:
     x = Normal('x', mu = 0., tau = 1)
-    y = Normal('y', mu = exp(x), tau = 2.**-2, shape = (2,1))
-    
-    z = Normal('z', mu = x + y, tau = .75**-2, observed = zdata)
+    y = Normal('y', mu = exp(x), tau = 2.**-2, shape = (ndims,1)) # here, shape is telling us it's a vector rather than a scalar.
+    z = Normal('z', mu = x + y, tau = .75**-2, observed = zdata) # shape is inferred from zdata
+
+# <markdowncell>
+
+# A parenthetical note on the parameters for the normal. Variance is encoded as `tau`, indicating precision, which is simply inverse variance (so $\tau=\sigma^{-2}$ ). This is used because the gamma function is the conjugate prior for precision, and must be inverted to get variance. Encoding in terms of precision saves the inversion step in cases where variance is actually modeled using gamma as a prior.
 
 # <markdowncell>
 
 # Fit Model
 # ---------
-# We need a starting point for our sampling. The `find_MAP` function finds the maximum a posteriori point (MAP), which is often a good choice for starting point. `find_MAP` uses an optimization algorithm to find the local maximum of the log posterior. 
+# We need a starting point for our sampling. The `find_MAP` function finds the maximum a posteriori point (MAP), which is often a good choice for starting point. `find_MAP` uses an optimization algorithm (`scipy.optimize.fmin_l_bfgs_b`, or [BFGS](http://en.wikipedia.org/wiki/BFGS_method), by default) to find the local maximum of the log posterior.
+# 
+# Note that this `with` construction is used again. Functions like `find_MAP` and `HamiltonianMC` need to have a model in their context. `with` activates the context of a particular model within its block.
 
 # <codecell>
 
@@ -59,7 +84,13 @@
 
 # <codecell>
 
-start
+print "MAP found:"
+print "x:", start['x']
+print "y:", start['y']
+
+print "Compare with true values:"
+print "ytrue", ytrue
+print "xtrue", xtrue
 
 # <markdowncell>
 
@@ -109,7 +140,7 @@
 
 # <codecell>
 
-traceplot(trace)
+traceplot(trace);
 
 # <markdowncell>
 
@@ -130,3 +161,10 @@
 # * Without a name argument, it simply constructs a distribution object and returns it. It won't construct a random variable. This object has properties like `logp` (density function) and `expectation`.
 # * With a name argument, it constructs a random variable using the distrubtion object as the prior distribution and inserts this random variable into the current model. Then the constructor returns the random variable. 
 
+# <codecell>
+
+help(model)
+
+# <codecell>
+
+