@@ -517,15 +517,16 @@ To test this model with real data, we ran it on the version of the Scotland Lip
517517[ scotland_data.R] ( https://github.com/stan-dev/example-models/tree/master/knitr/car-iar-poisson/scotland_data.R ) ,
518518described in the previous section.
519519The R script
520- [ fit_scotland .R] ( https://github.com/stan-dev/example-models/tree/master/knitr/car-iar-poisson/fit_scotland .R )
520+ [ fit_scotland_bym .R] ( https://github.com/stan-dev/example-models/tree/master/knitr/car-iar-poisson/fit_scotland_bym .R )
521521fits the model to the data.
522522
523- ``` {r fit-scotland }
523+ ``` {r bym- fit-scotland }
524524library(devtools)
525525if(!require(cmdstanr)){
526526 devtools::install_github("stan-dev/cmdstanr", dependencies=c("Depends", "Imports"))
527527}
528528library(cmdstanr)
529+ options(digits=3)
529530
530531source("mungeCARdata4stan.R")
531532source("scotland_data.R")
@@ -539,21 +540,25 @@ node1 = nbs$node1;
539540node2 = nbs$node2;
540541N_edges = nbs$N_edges;
541542
542- data = list(N=N,N_edges=N_edges,node1=node1,node2=node2,y=y,x=x,E=E)
543+ data = list(N=N,
544+ N_edges=N_edges,
545+ node1=node1,
546+ node2=node2,
547+ y=y,
548+ x=x,
549+ E=E);
543550
544551bym_model = cmdstan_model("bym_predictor_plus_offset.stan");
545- scot_stanfit = bym_model$sample(
552+
553+ bym_scot_stanfit = bym_model$sample(
546554 data = data,
547555 parallel_chains = 4,
548- iter_warmup = 5000,
549- iter_sampling = 5000);
550-
551- options(digits=3)
552- scot_stanfit$summary(variables = c("lp__", "beta0", "beta1",
553- "sigma_phi", "tau_phi",
554- "sigma_theta", "tau_theta",
555- "mu[5]","phi[5]","theta[5]"),
556- ~quantile(.x, probs = c(0.025, 0.5, 0.975)))
556+ refresh=0);
557+
558+ bym_scot_stanfit$summary(variables = c("lp__", "beta0", "beta1",
559+ "sigma_phi", "tau_phi",
560+ "sigma_theta", "tau_theta",
561+ "mu[5]","phi[5]","theta[5]"));
557562```
558563
559564The priors on all parameters match the priors on the corresponding WinBUGS model in the file
@@ -573,7 +578,6 @@ library(rstan)
573578
574579source("scotland_data.R");
575580
576-
577581iter = 100000;
578582burn = 90000;
579583mfile = "bym_bugs.txt";
@@ -700,17 +704,22 @@ The R script
700704fits the model to the data. This code includes details on how to compute the scaling factor using the INLA library.
701705
702706``` {r fit-scotland-bym2 }
703- library(rstan)
704- options(mc.cores = parallel::detectCores())
705-
707+ library(devtools)
708+ if(!require(cmdstanr)){
709+ devtools::install_github("stan-dev/cmdstanr", dependencies=c("Depends", "Imports"))
710+ }
711+ if(!require(INLA)){
712+ install.packages("INLA",repos=c(getOption("repos"),INLA="https://inla.r-inla-download.org/R/stable"), dep=TRUE)
713+ }
714+ library(cmdstanr)
706715library(INLA)
707716
708717source("mungeCARdata4stan.R")
709718source("scotland_data.R")
710719y = data$y;
720+ x = 0.1 * data$x;
711721E = data$E;
712722K = 1;
713- x = 0.1 * data$x;
714723
715724nbs = mungeCARdata4stan(data$adj, data$num);
716725N = nbs$N;
@@ -733,16 +742,31 @@ Q_inv = inla.qinv(Q_pert, constr=list(A = matrix(1,1,nbs$N),e=0))
733742#Compute the geometric mean of the variances, which are on the diagonal of Q.inv
734743scaling_factor = exp(mean(log(diag(Q_inv))))
735744
736- scot_stanfit = stan("bym2_predictor_plus_offset.stan", data=list(N,N_edges,node1,node2,y,x,E,scaling_factor), warmup=5000, iter=6000);
737-
738- print(scot_stanfit, pars=c("beta0", "beta1", "rho", "sigma", "log_precision", "logit_rho", "mu[5]", "phi[5]", "theta[5]"), probs=c(0.025, 0.5, 0.975));
745+ data = list(N=N,
746+ N_edges=N_edges,
747+ node1=node1,
748+ node2=node2,
749+ y=y,
750+ x=x,
751+ E=E,
752+ scaling_factor=scaling_factor);
753+
754+ bym2_model = cmdstan_model("bym2_predictor_plus_offset.stan");
755+
756+ bym2_scot_stanfit = bym2_model$sample(
757+ data=data,
758+ parallel_chains=4,
759+ refresh=0);
760+
761+ bym2_scot_stanfit$summary(variables = c("beta0", "beta1",
762+ "sigma", "rho",
763+ "mu[5]","phi[5]","theta[5]"))
739764```
740765
741- To see how this re-parameterization affects the fit, we reprint the results of fitting the Scotland data using the previous version of the BYM model,
742- printing only the parameters and generated quantities shared by these two models:
766+ To see how this re-parameterization affects the fit, we reprint the above results, showing only the parameters and generated quantities shared by these two models:
743767
744768``` {r print-fit-scotland-bym }
745- print( bym_scot_stanfit, pars= c("beta0", "beta1", "mu[5]"), probs=c(0.025, 0.5, 0.975 ));
769+ bym_scot_stanfit$summary(variables = c("beta0", "beta1", "mu[5]"));
746770```
747771
748772As a further check, we compare the results of using Stan implementation of the BYM2 model to fit the Scotland lip cancer dataset with the results obtained by using INLA's implementation of the BYM2 model. The script to run INLA using package R-INLA is in file
@@ -756,8 +780,6 @@ After fitting the model, we print the values for the fixed effects parameters, i
756780x 0.3706808 0.1320332 0.1054408 0.3725290 0.62566048 0.3762751 4.162445e-09
757781```
758782
759-
760-
761783## Bigger data: from 56 counties in Scotland to 1921 census tracts in New York City
762784
763785To demonstrate the scalability of using Stan to compute a spatial ICAR component,
@@ -804,8 +826,7 @@ fits the BYM2 Stan model to the 2001 NYC traffic accident data and saves the res
804826library(maptools);
805827library(spdep);
806828library(rgdal)
807- library(rstan);
808- options(mc.cores = 3);
829+ library(cmdstanr);
809830
810831load("nyc_subset.data.R");
811832
@@ -828,14 +849,28 @@ node2 = nbs$node2;
828849N_edges = nbs$N_edges;
829850scaling_factor = scale_nb_components(nb_nyc_subset)[1];
830851
831- bym2_stan = stan_model("bym2_offset_only.stan");
832- bym2_fit = sampling(bym2_stan, data=list(N,N_edges,node1,node2,y,E,scaling_factor), control = list(adapt_delta = 0.97), chains=3, warmup=7000, iter=8000, save_warmup=FALSE);
852+ data = list(N=N,
853+ N_edges=N_edges,
854+ node1=node1,
855+ node2=node2,
856+ y=y,
857+ E=E,
858+ scaling_factor=scaling_factor);
833859
834- print(bym2_fit, digits=3, pars=c("beta0", "rho", "sigma", "mu[1]", "mu[2]", "mu[3]", "mu[500]", "mu[1000]", "mu[1500]", "mu[1900]", "phi[1]", "phi[2]", "phi[3]", "phi[500]", "phi[1000]", "phi[1500]", "phi[1900]", "theta[1]", "theta[2]", "theta[3]", "theta[500]", "theta[1000]", "theta[1500]", "theta[1900]"), probs=c(0.025, 0.5, 0.975));
860+ bym2_model = cmdstan_model("bym2_offset_only.stan");
861+ bym2_fit = bym2_model$sample(data=data, parallel_chains=4, refresh=0);
862+
863+ bym2_fit$summary(
864+ variables = c(
865+ "beta0", "rho", "sigma",
866+ "mu[1]", "mu[2]", "mu[3]", "mu[500]", "mu[1000]", "mu[1500]", "mu[1900]",
867+ "phi[1]", "phi[2]", "phi[3]", "phi[500]", "phi[1000]", "phi[1500]", "phi[1900]",
868+ "theta[1]", "theta[2]", "theta[3]", "theta[500]", "theta[1000]", "theta[1500]", "theta[1900]"));
835869
836870save(bym2_fit, file="nyc_bym2_fit.data.R");
871+
837872```
838- The Rhat values indicate good convergences, and the n_eff numbers, while low for ` rho ` and ` sigma ` , are sufficient. ICAR models require a large number of warmup iterations; for this model, at least 7000 are required for a good fit. On a 2015 13-inch MacBook pro with 2 CPUs, running 3 chains took for a total of 8000 iterations took 5 hours to fit.
873+ The Rhat values indicate good convergences, and the n_eff numbers, while low for ` rho ` and ` sigma ` , are sufficient.
839874
840875### Visual comparisons of data and model fits
841876
@@ -1028,7 +1063,8 @@ Funded in part by the National Institute of Child Health and Human Development,
10281063
10291064#### R Packages
10301065
1031- * Statistics: [ RStan] ( http://mc-stan.org/users/interfaces/rstan.html ) , [ RStanArm] ( http://mc-stan.org/users/interfaces/rstanarm.html ) , [ R2OpenBugs] ( https://cran.r-project.org/web/packages/R2OpenBugs ) , OpenBUGS, [ R-INLA] ( http://www.r-inla.org ) .
1066+ * Statistics: [ CmdStanR] ( http://mc-stan.org/cmdstanr ) ,
1067+ [ R2OpenBugs] ( https://cran.r-project.org/web/packages/R2OpenBugs ) , OpenBUGS, [ R-INLA] ( http://www.r-inla.org ) .
10321068
10331069* Plots and supporting libraries: [ ggplot2] ( http://ggplot2.org ) , [ ggmap] ( https://cran.r-project.org/web/packages/ggmap ) , [ dplyr] ( https://cran.r-project.org/web/packages/dplyr ) , [ tidy] ( https://cran.r-project.org/web/packages/tidy )
10341070
@@ -1039,12 +1075,12 @@ Funded in part by the National Institute of Child Health and Human Development,
10391075### Licenses
10401076
10411077<small >
1042- ** Code:** Copyright (2018) Columbia University. Released under the
1078+ ** Code:** Copyright (2018-2023 ) Columbia University. Released under the
10431079[ BSD 3-clause license] ( https://opensource.org/licenses/BSD-3-Clause ) .
10441080</small >
10451081
10461082<small >
1047- ** Text:** Copyright (2018) Mitzi Morris. Released under the
1083+ ** Text:** Copyright (2018-2023 ) Mitzi Morris. Released under the
10481084the [ CC BY-NC 4.0
10491085license] ( https://creativecommons.org/licenses/by-nc/4.0/ ) .
10501086</small >
0 commit comments