Skip to content

Latest commit

 

History

History
1755 lines (1464 loc) · 52.8 KB

File metadata and controls

1755 lines (1464 loc) · 52.8 KB

13. Adventures in Covariance

Essence of the general varying effects strategy: any batch of parameters with exchangeable index values can and probably should be pooled.

Model both the population of intercepts and the population of slopes.

Ordinary varying effects work only with discrete, unordered categories.

13.1 Varying slopes by construction

Rather than having two independent Gaussian distributions of intercepts and of slopes, we can do better by assigning a two-dimensional Gaussian distribution to both the intercepts (First dimension) and the slopes (second dimension).

13.1.1. Simulate the population

library(rethinking)
a <- 3.5 # average morning wait time
b <- (-1) # average difference afternoon wait time
sigma_a <- 1 #std dev in intercepts
sigma_b <- 0.5 #std dev in slopes
rho <- (-0.7) #correlation between intercepts and slopes

Mu <- c(a, b)

cov_ab <- sigma_a * sigma_b*rho
Sigma <- matrix(c(sigma_a^2, cov_ab, cov_ab, sigma_b^2), ncol=2)

sigmas <- c(sigma_a, sigma_b)
Rho <- matrix(c(1,rho,rho,1), nrow=2)
Sigma <- diag(sigmas) %*% Rho %*% diag(sigmas) #matrix multiply
N_cafes <- 20
library(MASS)
set.seed(5)
vary_effects <- mvrnorm(N_cafes, Mu, Sigma)

a_cafe <- vary_effects[,1]
b_cafe <- vary_effects[,2]

plot(a_cafe, b_cafe, col=rangi2,
     xlab="intercepts (a_cafe)", ylab="slopes (b_cafe)")

library(ellipse)

for (l in c(0.1, 0.3, 0.5, 0.8, 0.99))
    lines(ellipse(Sigma, centre=Mu, level=l), col=col.alpha("black", 0.2))

13.1.2. Simulate Observations

set.seed(5)
N_visits <- 10
afternoon <- rep(0:1, N_visits*N_cafes/2)
cafe_id <- rep(1:N_cafes, each=N_visits)

mu <- a_cafe[cafe_id] + b_cafe[cafe_id]*afternoon
sigma <- 0.5 #std dev within cafes
wait <- rnorm(N_visits*N_cafes, mu, sigma)
d <- data.frame(cafe=cafe_id, afternoon=afternoon, wait=wait)

13.1.3. The Varying slopes model

set.seed(5)

m13.1 <- map2stan(
    alist(
        wait ~ dnorm(mu, sigma),
        mu <- a_cafe[cafe] + b_cafe[cafe]*afternoon,
        c(a_cafe, b_cafe)[cafe] ~ dmvnorm2(c(a,b), sigma_cafe, Rho),
        a ~ dnorm(0, 10),
        b ~ dnorm(0, 10),
        sigma_cafe ~ dcauchy(0,2),
        sigma ~ dcauchy(0,2),
        Rho ~ dlkjcorr(2)
    ), data=d, iter=5000, warmup=2000, chains=2)
set.seed(5)
post <- extract.samples(m13.1)
dens(post$Rho[,1,2])
# compute unpooled estimates directly from data
set.seed(5)
a1 <- sapply(1:N_cafes,
             function(i) mean(wait[cafe_id==i & afternoon==0]))
b1 <- sapply(1:N_cafes,
             function(i) mean(wait[cafe_id==i & afternoon==1])) - a1

                                        # extract posterior means of partially pooled estimates
post <- extract.samples(m13.1)
a2 <- apply(post$a_cafe, 2, mean)
b2 <- apply(post$b_cafe, 2, mean)

                                        #plot both and connect with lines
plot(a1, b1, xlab="intercept", ylab="slope",
     pch=16, col=rangi2, ylim=c(min(b1)-0.1, max(b1)+0.1),
     xlim=c(min(a1)-0.1, max(a1)+0.1))
points(a2, b2, pch=1)
for (i in 1:N_cafes) lines(c(a1[i], a2[i]), c(b1[i], b2[i]))

# compute posterior mean bivariate Gaussian
Mu_est <- c(mean(post$a), mean(post$b))
rho_est <- mean(post$Rho[,1,2])
sa_est <- mean(post$sigma_cafe[,1])
sb_est <- mean(post$sigma_cafe[,2])
cov_ab <- sa_est*sb_est*rho_est
Sigma_est <- matrix(c(sa_est^2, cov_ab, cov_ab, sb_est^2), ncol=2)

# draw contours
for (l in c(0.1, 0.3, 0.5, 0.8, 0.99))
    lines(ellipse(Sigma_est, centre=Mu_est, level=l),
          col=col.alpha("black", 0.2))

The angled shrinkage lines represent the negative correlation between intercepts and slopes.

13.2 Example: Admission decisions and gender

library(rethinking)
data(UCBadmit)
d <- UCBadmit
d$male <- ifelse(d$applicant.gender=="male", 1, 0)
d$dept_id <- coerce_index(d$dept)

13.2.1. Varying Intercepts

m13.2 <- map2stan(
    alist(
        admit ~ dbinom(applications, p),
        logit(p) <- a_dept[dept_id] + bm*male,
        a_dept[dept_id] ~ dnorm(a, sigma_dept),
        a ~ dnorm(0, 10),
        bm ~ dnorm(0, 1),
        sigma_dept ~ dcauchy(0, 2)
    ),
    data=d, warmup=500, iter=4500, chains=3)
precis(m13.2, depth=2)

13.2.2. Varying effects of being male

m13.3 <- map2stan(
    alist(
        admit ~ dbinom(applications, p),
        logit(p) <- a_dept[dept_id] + bm_dept[dept_id]*male,
        c(a_dept, bm_dept)[dept_id] ~ dmvnorm2(c(a, bm), sigma_dept, Rho),
        a ~ dnorm(0, 10),
        bm ~ dnorm(0, 1),
        sigma_dept ~ dcauchy(0, 2),
        Rho ~ dlkjcorr(2)
    ),
    data=d, warmup=1000, iter=5000, chains=4, cores=3)


plot(precis(m13.3, pars=c("a_dept", "bm_dept"), depth=2))

Look at the estimated correlation between intercepts and slopes, as well as the 2-dimensional shrinkage it induces.

13.2.3. Shrinkage

post <- extract.samples(m13.3)
dens(post$Rho[,1,2])

13.2.4. Model comparison

m13.4 <- map2stan(
    alist(
        admit ~ dbinom(applications, p),
        logit(p) <- a_dept[dept_id],
        a_dept[dept_id] ~ dnorm(a, sigma_dept),
        a ~ dnorm(0, 10),
        sigma_dept ~ dcauchy(0, 2)
    ),
    data=d, warmup=500, iter=4500, chains=3)

compare(m13.2, m13.3, m13.4)

13.2.5. More slopes

13.3. Example: Cross-classified chimpanzees with varying slopes

Non-centered parametrization tends to help with complex varying effects models

library(rethinking)
data(chimpanzees)
d <- chimpanzees
d$recipient <- NULL
d$block_id <- d$block

m13.6 <- map2stan(
    alist(
                                        #likelihood
        pulled_left ~ dbinom(1, p),

                                        # linear models
        logit(p) <- A + (BP + BPC*condition)*prosoc_left,
        A <- a + a_actor[actor] + a_block[block_id],
        BP <- bp + bp_actor[actor] + bp_block[block_id],
        BPC <- bpc + bpc_actor[actor] + bpc_block[block_id],

                                        # adaptive priors
        c(a_actor, bp_actor, bpc_actor)[actor] ~ dmvnorm2(0, sigma_actor, Rho_actor),
        c(a_block, bp_block, bpc_block)[block_id] ~ dmvnorm2(0, sigma_block, Rho_block),

                                        #fixed priors
        c(a, bp, bpc) ~ dnorm(0,1),
        sigma_actor ~ dcauchy(0,2),
        sigma_block ~ dcauchy(0,2),
        Rho_actor ~ dlkjcorr(4),
        Rho_block ~ dlkjcorr(4)
    ), data=d, iter=5000, warmup=1000, chains=3, cores=3)

Divergent iterations > here is where non-centered parametrization will help.

m13.6NC <- map2stan(
    alist(
                                        #likelihood
        pulled_left ~ dbinom(1, p),

                                        # linear models
        logit(p) <- A + (BP + BPC*condition)*prosoc_left,
        A <- a + a_actor[actor] + a_block[block_id],
        BP <- bp + bp_actor[actor] + bp_block[block_id],
        BPC <- bpc + bpc_actor[actor] + bpc_block[block_id],

                                        # adaptive non-centered priors
        c(a_actor, bp_actor, bpc_actor)[actor] ~ dmvnormNC(sigma_actor, Rho_actor),
        c(a_block, bp_block, bpc_block)[block_id] ~ dmvnormNC(sigma_block, Rho_block),

                                        #fixed priors
        c(a, bp, bpc) ~ dnorm(0,1),
        sigma_actor ~ dcauchy(0,2),
        sigma_block ~ dcauchy(0,2),
        Rho_actor ~ dlkjcorr(4),
        Rho_block ~ dlkjcorr(4)
    ), data=d, iter=5000, warmup=1000, chains=3, cores=3)


p <- link(m13.6NC)
str(p)

13.4. Continuous categories and the Gaussian process

Guassian Process Regression a way to apply the varying effects approach to continuous categories of this kind. This allows us to estimate a unique intercept (or slope) for any age, while still regarding age as a continuous dimension in which similar ages have more similar intercepts (or slopes).

13.4.1. Example: Spatial autocorrelation in Oceanic tools.

library(rethinking)

data(islandsDistMatrix)

Dmat <- islandsDistMatrix

colnames(Dmat) <- c("Ml","Ti", "SC","Ya","Fi","Tr","Ch","Mn","To","Ha")
round(Dmat, 1)

data(Kline2)
d <- Klprecis(m13.)ine2
d$society <- 1:10


M13.7 <- map2stan(
    alist(
        total_tools ~ dpois(lambda),
        log(lambda) <- a + g[society] + bp*logpop,
        g[society] ~ GPL2(Dmat, etasq, rhosq, 0.01),
        a ~ dnorm(0,10),
        bp ~ dnorm(0,1),
        etasq ~ dcauchy(0,1),
        rhosq ~ dcauchy(0,1)
    ), data=list(
           total_tools=d$total_tools,
           logpop=d$logpop,
           society=d$society,
           Dmat=islandsDistMatrix),
    warmup=2000, iter=1e4, chains=4)
post <- extract.samples(M13.7)

curve(median(post$etasq)*exp(-median(post$rhosq)*x^2), from=0, to=10,
      xlab="distance", ylab="covariance", ylim=c(0,1), yaxp=c(0,1,4), lwd=2)

for (i in 1:100)
    curve(post$etasq[i]*exp(-post$rhosq[i]*x^2), add=TRUE,
          col=col.alpha("black", 0.2))


Let’s consider the correlations among societies that are implied by the posterior median.

# compute posterior media covariance among societies
K <- matrix(0, nrow=10, ncol=10)
for (i in 1:10)
    for (j in 1:10)
        K[i,j] <- median(post$etasq) *
            exp(-median(post$rhosq) * islandsDistMatrix[i,j]^2)

diag(K) <- median(post$etasq) + 0.01

                                        #convert to a correlation matrix
Rho <- round(cov2cor(K), 2)
colnames(Rho) <- c("Ml","Ti", "SC","Ya","Fi","Tr","Ch","Mn","To","Ha")
rownames(Rho) <- colnames(Rho)
Rho

psize <- d$logpop / max(d$logpop)
psize <- exp(psize*1.5)-2


                                        #plot raw data and labels
plot(d$lon2, d$lat, xlab="longitude", ylab="latitude",
     col=rangi2, cex=psize, pch=16, xlim=c(-50,30))
labels <- as.character(d$culture)
text(d$lon2, d$lat, labels=labels, cex=0.7, pos=c(2,4,3,3,4,1,3,2,4,2))

                                        #overlay lines shaded by Rho
for (i in 1:10)
    for (j in 1:10)
        if (i < j)
            lines(c(d$lon2[i], d$lon2[j]), c(d$lat[i], d$lat[j]),
                    lwd=2, col=col.alpha("black", Rho[i,j]^2))
                                        # compute posterior median relationship, ignoring distance
logpop.seq <- seq(from=6, to=14, length.out=30)
lambda <- sapply(logpop.seq, function(lp) exp(post$a + post$bp*lp))
lambda.median <- apply(lambda, 2, median)
lambda.PI80 <- apply(lambda, 2, PI, prob=.8)

                                        # plot raw data and labels

                                        #plot raw data and labels
plot(d$logpop, d$total_tools, xlab="log pop", ylab="total tools",
     col=rangi2, cex=psize, pch=16)
text(d$logpop, d$total_tools, labels=labels, cex=0.7, pos=c(2,4,3,3,4,1,3,2,4,2))

                                        # display posterior predictions
lines(logpop.seq, lambda.median, lty=2)
lines(logpop.seq, lambda.PI80[1,], lty=2)
lines(logpop.seq, lambda.PI80[2,], lty=2)

                                        #overlay correlations
for (i in 1:10)
    for (j in 1:10)
        if (i < j)
            lines(c(d$logpop[i], d$logpop[j]),
                  c(d$total_tools[i], d$total_tools[j]),
                  lwd=2, col=col.alpha("black", Rho[i,j]^2))


13.6 Practice

13E1

y_i \sim \text{Normal}(\mu_i, \sigma) \\
        \mu_i = \alpha_{\textsc{group}[i]} + \beta_{\textsc{group}[i]} x_i \\
        \Big[\begin{smallmatrix}
               \alpha_{\textsc{group}} \\ 
               \beta_{\textsc{group}}
             \end{smallmatrix}
        \Big] \sim \text{MVNormal}(\Big[\begin{smallmatrix}
                                          \alpha \\ 
                                          \beta
                                        \end{smallmatrix}
                                   \Big], \textbf{S}) \\
        \textbf{S} = \Big(\begin{smallmatrix}
                            \sigma_a & 0 \\ 
                             0       & \sigma_b
                          \end{smallmatrix}
                     \Big) \textbf{R}
                     \Big(\begin{smallmatrix}
                            \sigma_a & 0 \\ 
                             0       & \sigma_b
                          \end{smallmatrix}
                     \Big) \\
        \alpha \sim \text{Normal}(0, 10) \\
        \beta \sim \text{Normal}(0, 1) \\
        \sigma \sim \text{HalfCauchy}(0, 2) \\
        (\sigma_\alpha, \sigma_\beta) \sim \text{HalfCauchy}(0, 2) \\
        \textbf{R} \sim \text{LKJcorr}(2)

13E2

Intercept > money inherited Slopes > increase in wealth

More money inherited at starting position generate a larger increase of wealth Rich get richer, even though this is clustered for different amounts of money

13E3

When there is no relationship between groups

13M1

library(rethinking)
library(ellipse)
library(MASS)

set.seed(66)

a <- 3.5 # average morning wait time
b <- (-1) # average difference afternoon wait time
sigma_a <- 1 #std dev in intercepts
sigma_b <- 0.5 #std dev in slopes
rho <- (0) #correlation between intercepts and slopes

Mu <- c(a, b)

sigmas <- c(sigma_a, sigma_b)
Rho <- matrix(c(1,rho,rho,1), nrow=2)
Sigma <- diag(sigmas) %*% Rho %*% diag(sigmas) 

N_cafes <- 20

vary_effects <- mvrnorm(N_cafes, Mu, Sigma)
a_cafe <- vary_effects[,1]
b_cafe <- vary_effects[,2]

#simulate observations
N_visits <- 10
afternoon <- rep(0:1, N_visits*N_cafes/2)
cafe_id <- rep(1:N_cafes, each=N_visits)
mu <- a_cafe[cafe_id] + b_cafe[cafe_id] * afternoon
sigma <- 0.5 
wait <- rnorm(N_visits*N_cafes, mu, sigma)
d <- data.frame(cafe=cafe_id, afternoon=afternoon, wait=wait)

m.13M1 <- map2stan(
    alist(
        wait ~ dnorm(mu, sigma),
        mu <- a_cafe[cafe] + b_cafe[cafe]*afternoon,
        c(a_cafe, b_cafe)[cafe] ~ dmvnorm2(c(a,b), sigma_cafe, Rho),
        a ~ dnorm(0, 10),
        b ~ dnorm(0, 10),
        sigma_cafe ~ dcauchy(0,2),
        sigma ~ dcauchy(0,2),
        Rho ~ dlkjcorr(4)
    ), data=d, iter=5000, warmup=2000, chains=2)


plot(a_cafe, b_cafe, col=rangi2,
     xlab="intercepts (a_cafe)", ylab="slopes (b_cafe)")

library(ellipse)

for (l in c(0.1, 0.3, 0.5, 0.8, 0.99))
    lines(ellipse(Sigma, centre=Mu, level=l), col=col.alpha("black", 0.2))

post <- extract.samples(m.13M1)

dens(post$Rho[,1,2] )

13M2

m.13M2 <- map2stan(
    alist(
        wait ~ dnorm(mu, sigma),
        mu <- a_cafe[cafe] + b_cafe[cafe]*afternoon,
        a_cafe[cafe] ~ dnorm(alpha, sigma_a),
        b_cafe[cafe] ~ dnorm(beta, sigma_b),
        alpha ~ dnorm(0, 10),
        beta ~ dnorm(0, 10),
        sigma ~ dcauchy(0,1),
        sigma_a ~ dcauchy(0,1),
        sigma_b ~ dcauchy(0,1)
    ), data=d, iter=5000, warmup=2000, chains=2)
compare(m.13M1, m.13M2)
post.13M1 <- extract.samples(m.13M1)
post.13M2 <- extract.samples(m.13M2)

a13M1 <- apply(post.13M1$a_cafe, 2, mean)
b13M1 <- apply(post.13M1$b_cafe, 2, mean)

a13M2 <- apply(post.13M2$a_cafe, 2, mean)
b13M2 <- apply(post.13M2$b_cafe, 2, mean)

plot(a13M1, b13M1 , xlab="intercept" , ylab="slope" ,
      pch=16, col=rangi2, ylim=c(min(b13M2)-0.5, max(b13M2)+0.5),
      xlim=c(min(a13M1)-0.5, max(a13M1)+0.5 ) )
points(a13M2, b13M2 , pch=1 )
for (i in 1:N_cafes) lines(c(a13M1[i], a13M2[i]), c(b13M1[i], b13M2[i]))

Rho is negative (-.7), thus negative correlation between intercept and slope. Left of center x, blue dots (m1) move up, and right off center they move down.

13M3

data(UCBadmit)
d <- UCBadmit
d$male <- ifelse(d$applicant.gender=="male", 1, 0)
d$dept_id <- coerce_index(d$dept)


m13.3 <- map2stan(
    alist(
        admit ~ dbinom(applications, p),
        logit(p) <- a_dept[dept_id] + bm_dept[dept_id]*male,
        c(a_dept, bm_dept)[dept_id] ~ dmvnorm2(c(a, bm), sigma_dept, Rho),
        a ~ dnorm(0, 10),
        bm ~ dnorm(0, 1),
        sigma_dept ~ dcauchy(0, 2),
        Rho ~ dlkjcorr(2)
    ),
    data=d, warmup=1000, iter=5000, chains=4, cores=3)


m13m3 <- map2stan(
    alist(
        admit ~ dbinom(applications, p),
        logit(p) <- a_dept[dept_id] + bm_dept[dept_id]*male,
        c(a_dept, bm_dept)[dept_id] ~ dmvnormNC(sigma_dept, Rho),
        a ~ dnorm(0, 10),
        bm ~ dnorm(0, 1),
        sigma_dept ~ dcauchy(0, 2),
        Rho ~ dlkjcorr(2)
    ),
    data=d, warmup=1000, iter=5000, chains=4, cores=3)

compare(m13.3, m13m3)

#plot(precis(m13.3, pars=c("a_dept", "bm_dept"), depth=2))

precis(m13.3, depth=2)

precis(m13m3, depth=2)

13M4

data(islandsDistMatrix)
Dmat <- islandsDistMatrix
colnames(Dmat) <- c("Ml","Ti", "SC","Ya","Fi","Tr","Ch","Mn","To","Ha")
round(Dmat, 1)


data(Kline2)
d <- Kline2
d$society <- 1:10

m13.7 <- map2stan(
    alist(
        total_tools ~ dpois(lambda),
        log(lambda) <- a + g[society] + bp*logpop,
        g[society] ~ GPL2(Dmat, etasq, rhosq, 0.01),
        a ~ dnorm(0,10),
        bp ~ dnorm(0,1),
        etasq ~ dcauchy(0,1),
        rhosq ~ dcauchy(0,1)
    ), data=list(
           total_tools=d$total_tools,
           logpop=d$logpop,
           society=d$society,
           Dmat=islandsDistMatrix),
    warmup=2000, iter=1e4, chains=4)

precis(m13.7, depth=2)

d$contact_high <- ifelse(d$contact=="high", 1, 0)

#no interaction
m10.11 <- map2stan(
    alist(
        total_tools ~ dpois(lambda),
        log(lambda) <- a + bp*logpop + bc*contact_high,
        a ~ dnorm(0, 100),
        c(bp, bc) ~ dnorm(0, 1)
    ), data =d)

# no contact rate
m10.12 <- map2stan(
    alist(
        total_tools ~ dpois(lambda),
        log(lambda) <- a + bp*logpop,
        a ~ dnorm(0, 100),
        bp ~ dnorm(0, 1)
    ), data =d)

# no log pop
m10.13 <- map2stan(
    alist(
        total_tools ~ dpois(lambda),
        log(lambda) <- a + bc*contact_high,
        a ~ dnorm(0, 100),
        bc ~ dnorm(0, 1)
    ), data =d)

# intercept only (null model)
m10.14 <- map2stan(
    alist(
        total_tools ~ dpois(lambda),
        log(lambda) <- a, 
        a ~ dnorm(0, 100)
        ), data =d)

d$logpop_c <- d$logpop - mean(d$logpop)

m10.10stan.c <- map2stan(
    alist(
        total_tools ~ dpois(lambda),
        log(lambda) <- a + bp*logpop_c + bc*contact_high +
            bcp * logpop_c * contact_high,
        a ~ dnorm(0,10),
        bp ~ dnorm(0,1),
        bc ~ dnorm(0,1),
        bcp ~ dnorm(0,1)
    ), data=d, iter=3000, warmup=1000, chains=4)
(islands.compare <- compare(m10.11, m10.12, m10.13, m10.14, m10.10stan.c, m13.7, n=1e4))
plot(islands.compare)

13H1

data(bangladesh)
d <- bangladesh

sort(unique(d$district))

d$district_id <- as.integer(as.factor(d$district))
#sort(unique(d$district_id))
d$use_contraception <- d$use.contraception


m13h1<- map2stan(
    alist(
        use_contraception ~ dbinom(1, p),
        logit(p) <- a + a_district[district_id] + (b + b_urban[district_id])*urban,
        c(a_district, b_urban)[district_id] ~ dmvnorm2(0, sigma_district, Rho),
        a ~ dnorm(0, 10),
        b ~ dnorm(0, 10),
        sigma_district ~ dcauchy(0, 1),
        Rho ~ dlkjcorr(2)
    ), data=d, iter=4000, warmup=1000, chains=4, cores=4) 

posterior.samples <- extract.samples(m13h1)
dens( posterior.samples$Rho[,1,2] )
plot(precis(m13h1, pars = c("a", "b"), depth = 2))
post <- extract.samples(m13h1)

a13h1 <- apply(post$a_district, 2, mean)
b13h1 <- apply(post$b_urban, 2, mean)

plot(a13h1, b13h1 , xlab="intercept" , ylab="slope" ,
       pch=16, col=rangi2, ylim=c(min(b13h1)-0.5, max(b13h1)+0.5),
       xlim=c(min(a13h1)-0.5, max(a13h1)+0.5 ) )
## points(a13M2, b13M2 , pch=1 )
## for (i in 1:N_cafes) lines(c(a13M1[i], a13M2[i]), c(b13M1[i], b13M2[i]))

There is a negative correlation between intercepts (districts) and slopes (urban). If urban is less influential in districts with higher use.

data.rural <- list(
  urban=rep(0,60),
  district_id=1:60 )

data.urban <- list(
  urban=rep(1,60),
  district_id=1:60 )

predictions.rural <- link(m13h1 , data=data.rural)
predictions.urban <- link(m13h1 , data=data.urban)
means.rural <- apply(predictions.rural , 2 , mean)
means.urban <- apply(predictions.urban , 2 , mean)
plot(means.rural , means.urban , col="red",
      xlim=c(0,1) , ylim=c(0,1) ,
      xlab="rural" , ylab="urban")
abline(a=0,b=1,lty=2)

Urban proportion using contraception is higher

13H2

data(Oxboys)
d <- Oxboys

d$height_c <- (d$height - mean(d$height)) / sd(d$height)

m13h2<- map2stan(
    alist(
        height_c ~ dnorm(mu, sigma),
        mu <- a_subject[Subject] + (b_subject[Subject])*age,
        c(a_subject, b_subject)[Subject] ~ dmvnorm2(c(a, b), sigma_subject, Rho),
        a ~ dnorm(0, 10),
        b ~ dnorm(0, 1),
        sigma ~ dcauchy(0,2),
        sigma_subject ~ dcauchy(0,2),
        Rho ~ dlkjcorr(2)
    ), data=d, iter=4000, warmup=1000, chains=4, cores=4) 

increased cauchy value

Intercept explains more of the variation

13H3

post <- extract.samples(m13h2)
a <- apply(post$a_subject, 2, mean)
b <- apply(post$b_subject, 2, mean )
plot(a, b, xlab="intercept" , ylab="slope",
     pch=16 , col=rangi2, xlim=c(-2.5, 2.5), ylim=c(-0.5, 0.5))

Mu_est <- c(mean(post$a_subject), mean(post$b_subject) )
rho_est <- mean(post$Rho[,1,2])
sa_est <- mean(post$sigma_subject[,1])
sb_est <- mean(post$sigma_subject[,2])
cov_ab <- sa_est*sb_est*rho_est
Sigma_est <- matrix(c(sa_est^2,cov_ab,cov_ab,sb_est^2) , ncol=2)

for (l in c(0.1,0.3,0.5,0.8,0.99)) {
  lines(ellipse(Sigma_est,centre=Mu_est,level=l), col=col.alpha("black",0.2))
}

13H4

library(MASS)

N_boys <- 100
params <- mvrnorm(N_boys, Mu_est, Sigma_est)

age.seq <- seq(from=-10, 10, by=0.1 )

plot(1, 1,
     xlim=c(-10,10),
     ylim=c(-10,10),
     type='n')


for (i in 1:N_boys){
    intercept <- params[i, 1]
    slope <- params[i, 2]
    height <- intercept + age.seq*slope
    lines(age.seq, height, col=col.alpha('blue'),
          xlab='age', ylab='height')}