Skip to content

Commit 90f9461

Browse files
committed
updated for new array syntax, cmdstanr
1 parent 8a7eb50 commit 90f9461

File tree

5 files changed

+1130
-509
lines changed

5 files changed

+1130
-509
lines changed
Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,15 @@
11
library(maptools);
22
library(spdep);
33
library(rgdal)
4-
library(rstan);
5-
options(mc.cores = 3);
4+
library(cmdstanr);
65

76
load("nyc_subset.data.R");
87

98
nyc_shp<-readOGR("nycTracts10", layer="nycTracts10");
109
geoids <- nyc_shp$GEOID10 %in% nyc_tractIDs;
1110
nyc_subset_shp <- nyc_shp[geoids,];
1211
nyc_subset_shp <- nyc_subset_shp[order(nyc_subset_shp$GEOID10),];
13-
1412
nb_nyc_subset = poly2nb(nyc_subset_shp);
15-
coords<-coordinates(clipped_nyc_subset_shp);
1613

1714
y = events_2001
1815
E = pop_2001;
@@ -27,8 +24,22 @@ node2 = nbs$node2;
2724
N_edges = nbs$N_edges;
2825
scaling_factor = scale_nb_components(nb_nyc_subset)[1];
2926

30-
bym2_stan = stan_model("bym2_offset_only.stan");
31-
bym2_fit = sampling(bym2_stan, data=list(N,N_edges,node1,node2,y,E,scaling_factor), control = list(adapt_delta = 0.97), chains=3, warmup=7000, iter=8000, save_warmup=FALSE);
32-
print(bym2_fit, digits=3, pars=c("beta0", "rho", "sigma", "mu[1]", "mu[2]", "mu[3]", "mu[500]", "mu[1000]", "mu[1500]", "mu[1900]", "phi[1]", "phi[2]", "phi[3]", "phi[500]", "phi[1000]", "phi[1500]", "phi[1900]", "theta[1]", "theta[2]", "theta[3]", "theta[500]", "theta[1000]", "theta[1500]", "theta[1900]"), probs=c(0.025, 0.5, 0.975));
27+
data = list(N=N,
28+
N_edges=N_edges,
29+
node1=node1,
30+
node2=node2,
31+
y=y,
32+
E=E,
33+
scaling_factor=scaling_factor);
34+
35+
bym2_model = cmdstan_model("bym2_offset_only.stan");
36+
bym2_fit = bym2_model$sample(data=data, parallel_chains=4, refresh=0);
37+
38+
bym2_fit$summary(
39+
variables = c(
40+
"beta0", "rho", "sigma",
41+
"mu[1]", "mu[2]", "mu[3]", "mu[500]", "mu[1000]", "mu[1500]", "mu[1900]",
42+
"phi[1]", "phi[2]", "phi[3]", "phi[500]", "phi[1000]", "phi[1500]", "phi[1900]",
43+
"theta[1]", "theta[2]", "theta[3]", "theta[500]", "theta[1000]", "theta[1500]", "theta[1900]"));
3344

3445
save(bym2_fit, file="nyc_bym2_fit.data.R");
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
library(devtools)
2+
if(!require(cmdstanr)){
3+
devtools::install_github("stan-dev/cmdstanr", dependencies=c("Depends", "Imports"))
4+
}
5+
library(cmdstanr)
6+
options(digits=3)
7+
8+
source("mungeCARdata4stan.R")
9+
source("scotland_data.R")
10+
y = data$y;
11+
x = 0.1 * data$x;
12+
E = data$E;
13+
14+
nbs = mungeCARdata4stan(data$adj, data$num);
15+
N = nbs$N;
16+
node1 = nbs$node1;
17+
node2 = nbs$node2;
18+
N_edges = nbs$N_edges;
19+
20+
data = list(N=N,
21+
N_edges=N_edges,
22+
node1=node1,
23+
node2=node2,
24+
y=y,
25+
x=x,
26+
E=E);
27+
28+
bym_model = cmdstan_model("bym_predictor_plus_offset.stan");
29+
30+
bym_scot_stanfit = bym_model$sample(
31+
data = data,
32+
parallel_chains = 4,
33+
refresh=0);
34+
35+
bym_scot_stanfit$summary(variables = c("lp__", "beta0", "beta1",
36+
"sigma_phi", "tau_phi",
37+
"sigma_theta", "tau_theta",
38+
"mu[5]","phi[5]","theta[5]"));
39+
40+
bym_scot_stanfit$summary(variables = c("lp__", "beta0", "beta1",
41+
"sigma_phi", "tau_phi",
42+
"sigma_theta", "tau_theta",
43+
"mu[5]","phi[5]","theta[5]"),
44+
~quantile(.x, probs = c(0.025, 0.5, 0.975)));
45+
46+
47+
48+
Lines changed: 28 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,19 @@
1-
library(rstan)
2-
options(mc.cores = parallel::detectCores())
3-
1+
library(devtools)
2+
if(!require(cmdstanr)){
3+
devtools::install_github("stan-dev/cmdstanr", dependencies=c("Depends", "Imports"))
4+
}
5+
if(!require(INLA)){
6+
install.packages("INLA",repos=c(getOption("repos"),INLA="https://inla.r-inla-download.org/R/stable"), dep=TRUE)
7+
}
8+
library(cmdstanr)
49
library(INLA)
510

611
source("mungeCARdata4stan.R")
712
source("scotland_data.R")
813
y = data$y;
14+
x = 0.1 * data$x;
915
E = data$E;
1016
K = 1;
11-
x = 0.1 * data$x;
1217

1318
nbs = mungeCARdata4stan(data$adj, data$num);
1419
N = nbs$N;
@@ -31,18 +36,23 @@ Q_inv = inla.qinv(Q_pert, constr=list(A = matrix(1,1,nbs$N),e=0))
3136
#Compute the geometric mean of the variances, which are on the diagonal of Q.inv
3237
scaling_factor = exp(mean(log(diag(Q_inv))))
3338

34-
scot_stanfit_soft = stan("bym2_predictor_plus_offset_soft.stan",
35-
data=list(N,N_edges,node1,node2,y,x,E,scaling_factor),
36-
control=list(adapt_delta = 0.97, stepsize = 0.1),
37-
chains=3, warmup=5000, iter=6000, save_warmup=FALSE);
38-
39-
40-
41-
scot_stanfit_hard = stan("bym2_predictor_plus_offset_hard.stan",
42-
data=list(N,N_edges,node1,node2,y,x,E,scaling_factor),
43-
control=list(adapt_delta = 0.97, stepsize = 0.1),
44-
chains=3, warmup=5000, iter=6000, save_warmup=FALSE);
45-
46-
print(scot_stanfit_soft, pars=c("lp__", "beta0", "beta1", "rho", "sigma", "mu[5]", "phi[5]", "theta[5]"), probs=c(0.025, 0.5, 0.975));
47-
print(scot_stanfit_hard, pars=c("lp__", "beta0", "beta1", "rho", "sigma", "mu[5]", "phi[5]", "theta[5]"), probs=c(0.025, 0.5, 0.975));
39+
data = list(N=N,
40+
N_edges=N_edges,
41+
node1=node1,
42+
node2=node2,
43+
y=y,
44+
x=x,
45+
E=E,
46+
scaling_factor=scaling_factor);
47+
48+
bym2_model = cmdstan_model("bym2_predictor_plus_offset.stan");
49+
50+
bym2_scot_stanfit = bym2_model$sample(
51+
data=data,
52+
parallel_chains=4,
53+
refresh=0);
54+
55+
bym2_scot_stanfit$summary(variables = c("beta0", "beta1",
56+
"sigma", "rho",
57+
"mu[5]","phi[5]","theta[5]"))
4858

knitr/car-iar-poisson/icar_stan.Rmd

Lines changed: 71 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -517,15 +517,16 @@ To test this model with real data, we ran it on the version of the Scotland Lip
517517
[scotland_data.R](https://github.com/stan-dev/example-models/tree/master/knitr/car-iar-poisson/scotland_data.R),
518518
described in the previous section.
519519
The R script
520-
[fit_scotland.R](https://github.com/stan-dev/example-models/tree/master/knitr/car-iar-poisson/fit_scotland.R)
520+
[fit_scotland_bym.R](https://github.com/stan-dev/example-models/tree/master/knitr/car-iar-poisson/fit_scotland_bym.R)
521521
fits the model to the data.
522522

523-
```{r fit-scotland }
523+
```{r bym-fit-scotland }
524524
library(devtools)
525525
if(!require(cmdstanr)){
526526
devtools::install_github("stan-dev/cmdstanr", dependencies=c("Depends", "Imports"))
527527
}
528528
library(cmdstanr)
529+
options(digits=3)
529530
530531
source("mungeCARdata4stan.R")
531532
source("scotland_data.R")
@@ -539,21 +540,25 @@ node1 = nbs$node1;
539540
node2 = nbs$node2;
540541
N_edges = nbs$N_edges;
541542
542-
data = list(N=N,N_edges=N_edges,node1=node1,node2=node2,y=y,x=x,E=E)
543+
data = list(N=N,
544+
N_edges=N_edges,
545+
node1=node1,
546+
node2=node2,
547+
y=y,
548+
x=x,
549+
E=E);
543550
544551
bym_model = cmdstan_model("bym_predictor_plus_offset.stan");
545-
scot_stanfit = bym_model$sample(
552+
553+
bym_scot_stanfit = bym_model$sample(
546554
data = data,
547555
parallel_chains = 4,
548-
iter_warmup = 5000,
549-
iter_sampling = 5000);
550-
551-
options(digits=3)
552-
scot_stanfit$summary(variables = c("lp__", "beta0", "beta1",
553-
"sigma_phi", "tau_phi",
554-
"sigma_theta", "tau_theta",
555-
"mu[5]","phi[5]","theta[5]"),
556-
~quantile(.x, probs = c(0.025, 0.5, 0.975)))
556+
refresh=0);
557+
558+
bym_scot_stanfit$summary(variables = c("lp__", "beta0", "beta1",
559+
"sigma_phi", "tau_phi",
560+
"sigma_theta", "tau_theta",
561+
"mu[5]","phi[5]","theta[5]"));
557562
```
558563

559564
The priors on all parameters match the priors on the corresponding WinBUGS model in the file
@@ -573,7 +578,6 @@ library(rstan)
573578
574579
source("scotland_data.R");
575580
576-
577581
iter = 100000;
578582
burn = 90000;
579583
mfile = "bym_bugs.txt";
@@ -700,17 +704,22 @@ The R script
700704
fits the model to the data. This code includes details on how to compute the scaling factor using the INLA library.
701705

702706
```{r fit-scotland-bym2 }
703-
library(rstan)
704-
options(mc.cores = parallel::detectCores())
705-
707+
library(devtools)
708+
if(!require(cmdstanr)){
709+
devtools::install_github("stan-dev/cmdstanr", dependencies=c("Depends", "Imports"))
710+
}
711+
if(!require(INLA)){
712+
install.packages("INLA",repos=c(getOption("repos"),INLA="https://inla.r-inla-download.org/R/stable"), dep=TRUE)
713+
}
714+
library(cmdstanr)
706715
library(INLA)
707716
708717
source("mungeCARdata4stan.R")
709718
source("scotland_data.R")
710719
y = data$y;
720+
x = 0.1 * data$x;
711721
E = data$E;
712722
K = 1;
713-
x = 0.1 * data$x;
714723
715724
nbs = mungeCARdata4stan(data$adj, data$num);
716725
N = nbs$N;
@@ -733,16 +742,31 @@ Q_inv = inla.qinv(Q_pert, constr=list(A = matrix(1,1,nbs$N),e=0))
733742
#Compute the geometric mean of the variances, which are on the diagonal of Q.inv
734743
scaling_factor = exp(mean(log(diag(Q_inv))))
735744
736-
scot_stanfit = stan("bym2_predictor_plus_offset.stan", data=list(N,N_edges,node1,node2,y,x,E,scaling_factor), warmup=5000, iter=6000);
737-
738-
print(scot_stanfit, pars=c("beta0", "beta1", "rho", "sigma", "log_precision", "logit_rho", "mu[5]", "phi[5]", "theta[5]"), probs=c(0.025, 0.5, 0.975));
745+
data = list(N=N,
746+
N_edges=N_edges,
747+
node1=node1,
748+
node2=node2,
749+
y=y,
750+
x=x,
751+
E=E,
752+
scaling_factor=scaling_factor);
753+
754+
bym2_model = cmdstan_model("bym2_predictor_plus_offset.stan");
755+
756+
bym2_scot_stanfit = bym2_model$sample(
757+
data=data,
758+
parallel_chains=4,
759+
refresh=0);
760+
761+
bym2_scot_stanfit$summary(variables = c("beta0", "beta1",
762+
"sigma", "rho",
763+
"mu[5]","phi[5]","theta[5]"))
739764
```
740765

741-
To see how this re-parameterization affects the fit, we reprint the results of fitting the Scotland data using the previous version of the BYM model,
742-
printing only the parameters and generated quantities shared by these two models:
766+
To see how this re-parameterization affects the fit, we reprint the above results, showing only the parameters and generated quantities shared by these two models:
743767

744768
```{r print-fit-scotland-bym }
745-
print(bym_scot_stanfit, pars=c("beta0", "beta1", "mu[5]"), probs=c(0.025, 0.5, 0.975));
769+
bym_scot_stanfit$summary(variables = c("beta0", "beta1", "mu[5]"));
746770
```
747771

748772
As a further check, we compare the results of using Stan implementation of the BYM2 model to fit the Scotland lip cancer dataset with the results obtained by using INLA's implementation of the BYM2 model. The script to run INLA using package R-INLA is in file
@@ -756,8 +780,6 @@ After fitting the model, we print the values for the fixed effects parameters, i
756780
x 0.3706808 0.1320332 0.1054408 0.3725290 0.62566048 0.3762751 4.162445e-09
757781
```
758782

759-
760-
761783
## Bigger data: from 56 counties in Scotland to 1921 census tracts in New York City
762784

763785
To demonstrate the scalability of using Stan to compute a spatial ICAR component,
@@ -804,8 +826,7 @@ fits the BYM2 Stan model to the 2001 NYC traffic accident data and saves the res
804826
library(maptools);
805827
library(spdep);
806828
library(rgdal)
807-
library(rstan);
808-
options(mc.cores = 3);
829+
library(cmdstanr);
809830
810831
load("nyc_subset.data.R");
811832
@@ -828,14 +849,28 @@ node2 = nbs$node2;
828849
N_edges = nbs$N_edges;
829850
scaling_factor = scale_nb_components(nb_nyc_subset)[1];
830851
831-
bym2_stan = stan_model("bym2_offset_only.stan");
832-
bym2_fit = sampling(bym2_stan, data=list(N,N_edges,node1,node2,y,E,scaling_factor), control = list(adapt_delta = 0.97), chains=3, warmup=7000, iter=8000, save_warmup=FALSE);
852+
data = list(N=N,
853+
N_edges=N_edges,
854+
node1=node1,
855+
node2=node2,
856+
y=y,
857+
E=E,
858+
scaling_factor=scaling_factor);
833859
834-
print(bym2_fit, digits=3, pars=c("beta0", "rho", "sigma", "mu[1]", "mu[2]", "mu[3]", "mu[500]", "mu[1000]", "mu[1500]", "mu[1900]", "phi[1]", "phi[2]", "phi[3]", "phi[500]", "phi[1000]", "phi[1500]", "phi[1900]", "theta[1]", "theta[2]", "theta[3]", "theta[500]", "theta[1000]", "theta[1500]", "theta[1900]"), probs=c(0.025, 0.5, 0.975));
860+
bym2_model = cmdstan_model("bym2_offset_only.stan");
861+
bym2_fit = bym2_model$sample(data=data, parallel_chains=4, refresh=0);
862+
863+
bym2_fit$summary(
864+
variables = c(
865+
"beta0", "rho", "sigma",
866+
"mu[1]", "mu[2]", "mu[3]", "mu[500]", "mu[1000]", "mu[1500]", "mu[1900]",
867+
"phi[1]", "phi[2]", "phi[3]", "phi[500]", "phi[1000]", "phi[1500]", "phi[1900]",
868+
"theta[1]", "theta[2]", "theta[3]", "theta[500]", "theta[1000]", "theta[1500]", "theta[1900]"));
835869
836870
save(bym2_fit, file="nyc_bym2_fit.data.R");
871+
837872
```
838-
The Rhat values indicate good convergences, and the n_eff numbers, while low for `rho` and `sigma`, are sufficient. ICAR models require a large number of warmup iterations; for this model, at least 7000 are required for a good fit. On a 2015 13-inch MacBook pro with 2 CPUs, running 3 chains took for a total of 8000 iterations took 5 hours to fit.
873+
The Rhat values indicate good convergences, and the n_eff numbers, while low for `rho` and `sigma`, are sufficient.
839874

840875
### Visual comparisons of data and model fits
841876

@@ -1028,7 +1063,8 @@ Funded in part by the National Institute of Child Health and Human Development,
10281063

10291064
#### R Packages
10301065

1031-
* Statistics: [RStan](http://mc-stan.org/users/interfaces/rstan.html), [RStanArm](http://mc-stan.org/users/interfaces/rstanarm.html), [R2OpenBugs](https://cran.r-project.org/web/packages/R2OpenBugs), OpenBUGS, [R-INLA](http://www.r-inla.org).
1066+
* Statistics: [CmdStanR](http://mc-stan.org/cmdstanr),
1067+
[R2OpenBugs](https://cran.r-project.org/web/packages/R2OpenBugs), OpenBUGS, [R-INLA](http://www.r-inla.org).
10321068

10331069
* Plots and supporting libraries: [ggplot2](http://ggplot2.org), [ggmap](https://cran.r-project.org/web/packages/ggmap), [dplyr](https://cran.r-project.org/web/packages/dplyr), [tidy](https://cran.r-project.org/web/packages/tidy)
10341070

@@ -1039,12 +1075,12 @@ Funded in part by the National Institute of Child Health and Human Development,
10391075
### Licenses
10401076

10411077
<small>
1042-
**Code:** Copyright (2018) Columbia University. Released under the
1078+
**Code:** Copyright (2018-2023) Columbia University. Released under the
10431079
[BSD 3-clause license](https://opensource.org/licenses/BSD-3-Clause).
10441080
</small>
10451081

10461082
<small>
1047-
**Text:** Copyright (2018) Mitzi Morris. Released under the
1083+
**Text:** Copyright (2018-2023) Mitzi Morris. Released under the
10481084
the [CC BY-NC 4.0
10491085
license](https://creativecommons.org/licenses/by-nc/4.0/).
10501086
</small>

knitr/car-iar-poisson/icar_stan.html

Lines changed: 965 additions & 449 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)