Skip to content

Commit e504d49

Browse files
author
Bob Carpenter
committed
added HMM simultations and new section in manual in time-series chapter
1 parent 2322308 commit e504d49

File tree

6 files changed

+190
-80
lines changed

6 files changed

+190
-80
lines changed

misc/hmm/hmm-analytic.stan

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
data {
2+
int<lower=1> K; // num categories
3+
int<lower=1> V; // num words
4+
int<lower=0> T; // num instances
5+
int<lower=1,upper=V> w[T]; // words
6+
int<lower=1,upper=K> z[T]; // categories
7+
vector<lower=0>[K] alpha; // transit prior
8+
vector<lower=0>[V] beta; // emit prior
9+
}
10+
transformed data {
11+
vector<lower=0>[K] alpha_post[K];
12+
vector<lower=0>[V] beta_post[K];
13+
for (k in 1:K)
14+
alpha_post[k] <- alpha;
15+
for (t in 2:T)
16+
alpha_post[z[t-1],z[t]] <- alpha_post[z[t-1],z[t]] + 1;
17+
for (k in 1:K)
18+
beta_post[k] <- beta;
19+
for (t in 1:T)
20+
beta_post[z[t],w[t]] <- beta_post[z[t],w[t]] + 1;
21+
}
22+
parameters {
23+
simplex[K] theta[K]; // transit probs
24+
simplex[V] phi[K]; // emit probs
25+
}
26+
model {
27+
for (k in 1:K)
28+
theta[k] ~ dirichlet(alpha_post[k]);
29+
for (k in 1:K)
30+
phi[k] ~ dirichlet(beta_post[k]);
31+
}

misc/hmm/hmm-fit-semisup.stan

Lines changed: 0 additions & 73 deletions
This file was deleted.

misc/hmm/hmm-semisup.stan

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
data {
2+
int<lower=1> K; // num categories
3+
int<lower=1> V; // num words
4+
int<lower=0> T; // num supervised items
5+
int<lower=1> T_unsup; // num unsupervised items
6+
int<lower=1,upper=V> w[T]; // words
7+
int<lower=1,upper=K> z[T]; // categories
8+
int<lower=1,upper=V> u[T_unsup]; // unsup words
9+
vector<lower=0>[K] alpha; // transit prior
10+
vector<lower=0>[V] beta; // emit prior
11+
}
12+
parameters {
13+
simplex[K] theta[K]; // transit probs
14+
simplex[V] phi[K]; // emit probs
15+
}
16+
model {
17+
for (k in 1:K)
18+
theta[k] ~ dirichlet(alpha);
19+
for (k in 1:K)
20+
phi[k] ~ dirichlet(beta);
21+
for (t in 1:T)
22+
w[t] ~ categorical(phi[z[t]]);
23+
for (t in 2:T)
24+
z[t] ~ categorical(theta[z[t-1]]);
25+
26+
{
27+
// forward algorithm computes log p(u|...)
28+
real acc[K];
29+
real gamma[T_unsup,K];
30+
for (k in 1:K)
31+
gamma[1,k] <- log(phi[k,u[1]]);
32+
for (t in 2:T_unsup) {
33+
for (k in 1:K) {
34+
for (j in 1:K)
35+
acc[j] <- gamma[t-1,j] + log(theta[j,k]) + log(phi[k,u[t]]);
36+
gamma[t,k] <- log_sum_exp(acc);
37+
}
38+
}
39+
lp__ <- lp__ + log_sum_exp(gamma[T_unsup]);
40+
}
41+
}
42+
generated quantities {
43+
int<lower=1,upper=K> y_star[T_unsup];
44+
real log_p_y_star;
45+
{
46+
// Viterbi algorithm
47+
int back_ptr[T_unsup,K];
48+
real best_logp[T_unsup,K];
49+
real best_total_logp;
50+
for (k in 1:K)
51+
best_logp[1,K] <- log(phi[k,u[1]]);
52+
for (t in 2:T_unsup) {
53+
for (k in 1:K) {
54+
best_logp[t,k] <- negative_infinity();
55+
for (j in 1:K) {
56+
real logp;
57+
logp <- best_logp[t-1,j] + log(theta[j,k]) + log(phi[k,u[t]]);
58+
if (logp > best_logp[t,k]) {
59+
back_ptr[t,k] <- j;
60+
best_logp[t,k] <- logp;
61+
}
62+
}
63+
}
64+
}
65+
log_p_y_star <- max(best_logp[T_unsup]);
66+
for (k in 1:K)
67+
if (best_logp[T_unsup,k] == log_p_y_star)
68+
y_star[T_unsup] <- k;
69+
for (t in 1:(T_unsup - 1))
70+
y_star[T_unsup - t] <- back_ptr[T_unsup - t + 1,
71+
y_star[T_unsup - t + 1]];
72+
}
73+
}

misc/hmm/hmm-sim.R

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
library("MCMCpack")
2+
library("rstan")
3+
4+
# CONSTANTS
5+
K <- 3;
6+
V <- 10;
7+
T <- 100;
8+
T_unsup <- 500;
9+
alpha <- rep(1,K);
10+
beta <- rep(0.1,V);
11+
12+
# DATA
13+
w <- rep(0,T);
14+
z <- rep(0,T);
15+
u <- rep(0,T_unsup);
16+
17+
# PARAMETERS
18+
theta <- rdirichlet(K,alpha);
19+
phi <- rdirichlet(K,beta);
20+
21+
# SIMULATE DATA
22+
23+
# supervised
24+
z[1] <- sample(1:K,1);
25+
for (t in 2:T)
26+
z[t] <- sample(1:K,1,replace=TRUE,theta[z[t - 1], 1:K]);
27+
for (t in 1:T)
28+
w[t] <- sample(1:V,1,replace=TRUE,phi[z[t],1:V]);
29+
30+
# unsupervised
31+
y <- rep(0,T_unsup);
32+
y[1] <- sample(1:K,1);
33+
for (t in 2:T_unsup)
34+
y[t] <- sample(1:K,1,replace=TRUE,theta[y[t-1],1:K]);
35+
for (t in 1:T_unsup)
36+
u[t] <- sample(1:V,1,replace=TRUE,phi[y[t], 1:V]);
37+
38+
39+
fit <- stan('hmm-semisup.stan', # 'hmm-fit-semisup.stan',
40+
data=list(K=K,V=V,T=T,T_unsup=T_unsup,M=M,w=w,z=z,u=u,alpha=alpha,beta=beta),
41+
iter=200, chains=1, init=0); # fit = fit // reuse model

misc/hmm/hmm-sufficient.stan

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
data {
2+
int<lower=1> K; // num categories
3+
int<lower=1> V; // num words
4+
int<lower=0> T; // num instances
5+
int<lower=1,upper=V> w[T]; // words
6+
int<lower=1,upper=K> z[T]; // categories
7+
vector<lower=0>[K] alpha; // transit prior
8+
vector<lower=0>[V] beta; // emit prior
9+
}
10+
transformed data {
11+
int<lower=0> trans[K,K];
12+
int<lower=0> emit[K,V];
13+
for (k1 in 1:K)
14+
for (k2 in 1:K)
15+
trans[k1,k2] <- 0;
16+
for (t in 2:T)
17+
trans[z[t - 1], z[t]] <- 1 + trans[z[t - 1], z[t]];
18+
for (k in 1:K)
19+
for (v in 1:V)
20+
emit[k,v] <- 0;
21+
for (t in 1:T)
22+
emit[z[t], w[t]] <- 1 + emit[z[t], w[t]];
23+
}
24+
parameters {
25+
simplex[K] theta[K]; // transit probs
26+
simplex[V] phi[K]; // emit probs
27+
}
28+
model {
29+
for (k in 1:K)
30+
theta[k] ~ dirichlet(alpha);
31+
for (k in 1:K)
32+
phi[k] ~ dirichlet(beta);
33+
34+
for (k in 1:K)
35+
trans[k] ~ multinomial(theta[k]);
36+
for (k in 1:K)
37+
emit[k] ~ multinomial(phi[k]);
38+
}
Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
data {
22
int<lower=1> K; // num categories
33
int<lower=1> V; // num words
4-
int<lower=0> N; // num instances
5-
int<lower=1,upper=V> w[N]; // words
6-
int<lower=1,upper=K> z[N]; // categories
4+
int<lower=0> T; // num instances
5+
int<lower=1,upper=V> w[T]; // words
6+
int<lower=1,upper=K> z[T]; // categories
77
vector<lower=0>[K] alpha; // transit prior
88
vector<lower=0>[V] beta; // emit prior
99
}
@@ -16,8 +16,8 @@ model {
1616
theta[k] ~ dirichlet(alpha);
1717
for (k in 1:K)
1818
phi[k] ~ dirichlet(beta);
19-
for (n in 1:N)
20-
w[n] ~ categorical(phi[z[n]]);
21-
for (n in 2:N)
22-
z[n] ~ categorical(theta[z[n-1]]);
19+
for (t in 1:T)
20+
w[t] ~ categorical(phi[z[t]]);
21+
for (t in 2:T)
22+
z[t] ~ categorical(theta[z[t - 1]]);
2323
}

0 commit comments

Comments
 (0)