|
| 1 | +## Simple Gibbs Sampler Example |
| 2 | +## Adapted from Darren Wilkinson's post at |
| 3 | +## http://darrenjw.wordpress.com/2010/04/28/mcmc-programming-in-r-python-java-and-c/ |
| 4 | +## |
| 5 | +## Sanjog Misra and Dirk Eddelbuettel, June-July 2011 |
| 6 | +## Updated by Dirk Eddelbuettel, Mar 2020 |
| 7 | + |
| 8 | +suppressMessages({ |
| 9 | + library(Rcpp) |
| 10 | + library(rbenchmark) |
| 11 | +}) |
| 12 | + |
| 13 | + |
| 14 | +## Actual joint density -- the code which follow implements |
| 15 | +## a Gibbs sampler to draw from the following joint density f(x,y) |
| 16 | +fun <- function(x,y) { |
| 17 | + x*x * exp(-x*y*y - y*y + 2*y - 4*x) |
| 18 | +} |
| 19 | + |
| 20 | +## Note that the full conditionals are propotional to |
| 21 | +## f(x|y) = (x^2)*exp(-x*(4+y*y)) : a Gamma density kernel |
| 22 | +## f(y|x) = exp(-0.5*2*(x+1)*(y^2 - 2*y/(x+1)) : Normal Kernel |
| 23 | + |
| 24 | +## There is a small typo in Darrens code. |
| 25 | +## The full conditional for the normal has the wrong variance |
| 26 | +## It should be 1/sqrt(2*(x+1)) not 1/sqrt(1+x) |
| 27 | +## This we can verify ... |
| 28 | +## The actual conditional (say for x=3) can be computed as follows |
| 29 | +## First - Construct the Unnormalized Conditional |
| 30 | +fy.unnorm <- function(y) fun(3,y) |
| 31 | + |
| 32 | +## Then - Find the appropriate Normalizing Constant |
| 33 | +K <- integrate(fy.unnorm,-Inf,Inf) |
| 34 | + |
| 35 | +## Finally - Construct Actual Conditional |
| 36 | +fy <- function(y) fy.unnorm(y)/K$val |
| 37 | + |
| 38 | +## Now - The corresponding Normal should be |
| 39 | +fy.dnorm <- function(y) { |
| 40 | + x <- 3 |
| 41 | + dnorm(y,1/(1+x),sqrt(1/(2*(1+x)))) |
| 42 | +} |
| 43 | + |
| 44 | +## and not ... |
| 45 | +fy.dnorm.wrong <- function(y) { |
| 46 | + x <- 3 |
| 47 | + dnorm(y,1/(1+x),sqrt(1/((1+x)))) |
| 48 | +} |
| 49 | + |
| 50 | +if (interactive()) { |
| 51 | + ## Graphical check |
| 52 | + ## Actual (gray thick line) |
| 53 | + curve(fy,-2,2,col='grey',lwd=5) |
| 54 | + |
| 55 | + ## Correct Normal conditional (blue dotted line) |
| 56 | + curve(fy.dnorm,-2,2,col='blue',add=T,lty=3) |
| 57 | + |
| 58 | + ## Wrong Normal (Red line) |
| 59 | + curve(fy.dnorm.wrong,-2,2,col='red',add=T) |
| 60 | +} |
| 61 | + |
| 62 | +## Here is the actual Gibbs Sampler |
| 63 | +## This is Darren Wilkinsons R code (with the corrected variance) |
| 64 | +## But we are returning only his columns 2 and 3 as the 1:N sequence |
| 65 | +## is never used below |
| 66 | +Rgibbs <- function(N,thin) { |
| 67 | + mat <- matrix(0,ncol=2,nrow=N) |
| 68 | + x <- 0 |
| 69 | + y <- 0 |
| 70 | + for (i in 1:N) { |
| 71 | + for (j in 1:thin) { |
| 72 | + x <- rgamma(1,3,y*y+4) |
| 73 | + y <- rnorm(1,1/(x+1),1/sqrt(2*(x+1))) |
| 74 | + } |
| 75 | + mat[i,] <- c(x,y) |
| 76 | + } |
| 77 | + mat |
| 78 | +} |
| 79 | + |
| 80 | +## Now for the Rcpp version -- Notice how easy it is to code up! |
| 81 | + |
| 82 | +cppFunction("NumericMatrix RcppGibbs(int N, int thn){ |
| 83 | + NumericMatrix mat(N, 2); // Setup storage |
| 84 | + double x = 0, y = 0; // The rest follows the R version |
| 85 | + for (int i = 0; i < N; i++) { |
| 86 | + for (int j = 0; j < thn; j++) { |
| 87 | + x = R::rgamma(3.0,1.0/(y*y+4)); |
| 88 | + y = R::rnorm(1.0/(x+1),1.0/sqrt(2*x+2)); |
| 89 | + } |
| 90 | + mat(i,0) = x; |
| 91 | + mat(i,1) = y; |
| 92 | + } |
| 93 | + return mat; // Return to R |
| 94 | +}") |
| 95 | + |
| 96 | + |
| 97 | +## Use of the sourceCpp() is preferred for users who wish to source external |
| 98 | +## files or specify their headers and Rcpp attributes within their code. |
| 99 | +## Code here is able to easily be extracted and placed into its own C++ file. |
| 100 | + |
| 101 | +## Compile and Load |
| 102 | +sourceCpp(code=" |
| 103 | +#include <RcppGSL.h> |
| 104 | +#include <gsl/gsl_rng.h> |
| 105 | +#include <gsl/gsl_randist.h> |
| 106 | +
|
| 107 | +using namespace Rcpp; // just to be explicit |
| 108 | +
|
| 109 | +// [[Rcpp::depends(RcppGSL)]] |
| 110 | +
|
| 111 | +// [[Rcpp::export]] |
| 112 | +NumericMatrix GSLGibbs(int N, int thin){ |
| 113 | + gsl_rng *r = gsl_rng_alloc(gsl_rng_mt19937); |
| 114 | + double x = 0, y = 0; |
| 115 | + NumericMatrix mat(N, 2); |
| 116 | + for (int i = 0; i < N; i++) { |
| 117 | + for (int j = 0; j < thin; j++) { |
| 118 | + x = gsl_ran_gamma(r,3.0,1.0/(y*y+4)); |
| 119 | + y = 1.0/(x+1)+gsl_ran_gaussian(r,1.0/sqrt(2*x+2)); |
| 120 | + } |
| 121 | + mat(i,0) = x; |
| 122 | + mat(i,1) = y; |
| 123 | + } |
| 124 | + gsl_rng_free(r); |
| 125 | +
|
| 126 | + return mat; // Return to R |
| 127 | +}") |
| 128 | + |
| 129 | + |
| 130 | + |
| 131 | +## Now for some tests |
| 132 | +## You can try other values if you like |
| 133 | +## Note that the total number of interations are N*thin! |
| 134 | +Ns <- c(1000,5000,10000,20000) |
| 135 | +thins <- c(10,50,100,200) |
| 136 | +tim_R <- rep(0,4) |
| 137 | +tim_Rgsl <- rep(0,4) |
| 138 | +tim_Rcpp <- rep(0,4) |
| 139 | + |
| 140 | +for (i in seq_along(Ns)) { |
| 141 | + tim_R[i] <- system.time(mat <- Rgibbs(Ns[i],thins[i]))[3] |
| 142 | + tim_Rgsl[i] <- system.time(gslmat <- GSLGibbs(Ns[i],thins[i]))[3] |
| 143 | + tim_Rcpp[i] <- system.time(rcppmat <- RcppGibbs(Ns[i],thins[i]))[3] |
| 144 | + cat("Replication #", i, "complete \n") |
| 145 | +} |
| 146 | + |
| 147 | +## Comparison |
| 148 | +speedup <- round(tim_R/tim_Rcpp,2); |
| 149 | +speedup2 <- round(tim_R/tim_Rgsl,2); |
| 150 | +summtab <- round(rbind(tim_R, tim_Rcpp,tim_Rgsl,speedup,speedup2),3) |
| 151 | +colnames(summtab) <- c("N=1000","N=5000","N=10000","N=20000") |
| 152 | +rownames(summtab) <- c("Elasped Time (R)","Elapsed Time (Rcpp)", "Elapsed Time (Rgsl)", |
| 153 | + "SpeedUp Rcpp", "SpeedUp GSL") |
| 154 | +print(summtab) |
| 155 | + |
| 156 | +## Contour Plots -- based on Darren's example |
| 157 | +if (interactive() && require(KernSmooth)) { |
| 158 | + op <- par(mfrow=c(4,1),mar=c(3,3,3,1)) |
| 159 | + x <- seq(0,4,0.01) |
| 160 | + y <- seq(-2,4,0.01) |
| 161 | + z <- outer(x,y,fun) |
| 162 | + contour(x,y,z,main="Contours of actual distribution",xlim=c(0,2), ylim=c(-2,4)) |
| 163 | + fit <- bkde2D(as.matrix(mat),c(0.1,0.1)) |
| 164 | + contour(drawlabels=T, fit$x1, fit$x2, fit$fhat, xlim=c(0,2), ylim=c(-2,4), |
| 165 | + main=paste("Contours of empirical distribution:",round(tim_R[4],2)," seconds")) |
| 166 | + fitc <- bkde2D(as.matrix(rcppmat),c(0.1,0.1)) |
| 167 | + contour(fitc$x1,fitc$x2,fitc$fhat,xlim=c(0,2), ylim=c(-2,4), |
| 168 | + main=paste("Contours of Rcpp based empirical distribution:",round(tim_Rcpp[4],2)," seconds")) |
| 169 | + fitg <- bkde2D(as.matrix(gslmat),c(0.1,0.1)) |
| 170 | + contour(fitg$x1,fitg$x2,fitg$fhat,xlim=c(0,2), ylim=c(-2,4), |
| 171 | + main=paste("Contours of GSL based empirical distribution:",round(tim_Rgsl[4],2)," seconds")) |
| 172 | + par(op) |
| 173 | +} |
| 174 | + |
| 175 | + |
| 176 | +## also use rbenchmark package |
| 177 | +N <- 20000 |
| 178 | +thn <- 200 |
| 179 | +res <- benchmark(Rgibbs(N, thn), |
| 180 | + RcppGibbs(N, thn), |
| 181 | + GSLGibbs(N, thn), |
| 182 | + columns=c("test", "replications", "elapsed", |
| 183 | + "relative", "user.self", "sys.self"), |
| 184 | + order="relative", |
| 185 | + replications=10) |
| 186 | +print(res) |
| 187 | + |
| 188 | + |
| 189 | +## And we are done |
0 commit comments