Skip to content

Latest commit

 

History

History
204 lines (161 loc) · 6.12 KB

File metadata and controls

204 lines (161 loc) · 6.12 KB

Pieridae host repertoire - parameters

Mariana Braga 09 July, 2021


Script 1 for analyses performed in Braga et al. 2021 Phylogenetic reconstruction of ancestral ecological networks through time for pierid butterflies and their host plants, Ecology Letters.

Parameter Estimates

First we’ll read in the log files from the analyses in RevBayes. They contain all the sampled parameter values during MCMC. The first two chains used the time-calibrated host tree, while the last two used the transformed host tree (all branch lengths = 1).

chain1 <- read.table("./inference/out.2.real.pieridae.2s.log", header = TRUE)[,c(1,5,7:9)]
chain2 <- read.table("./inference/out.3.real.pieridae.2s.log", header = TRUE)[,c(1,5,7:9)]
chain3 <- read.table("./inference/out.3.bl1.pieridae.2s.log", header = TRUE)[,c(1,5,7:9)]
chain4 <- read.table("./inference/out.2.bl1.pieridae.2s.log", header = TRUE)[,c(1,5,7:9)]
colnames(chain1) <- colnames(chain2) <- colnames(chain3) <- colnames(chain4) <- 
  c("generation","clock","beta", "lambda[02]", "lambda[20]")

postt <- filter(chain1, generation >= 20000)
postt2 <- filter(chain2, generation >= 20000)
postb <- filter(chain3, generation >= 20000)
postb2 <- filter(chain4, generation >= 20000)
  • Convergence

We’ll use the Gelman and Rubin’s convergence diagnostic

# Test convergence when measuring anagenetic distance between hosts (with time-calibrated host tree)  
gelman.diag(mcmc.list(as.mcmc(postt), as.mcmc(postt2)))
## Potential scale reduction factors:
## 
##            Point est. Upper C.I.
## generation        NaN        NaN
## clock               1       1.01
## beta                1       1.02
## lambda[02]          1       1.00
## lambda[20]          1       1.00
## 
## Multivariate psrf
## 
## 1.01
# Test convergence when measuring cladogenetic distance between hosts (with transformed host tree)  
gelman.diag(mcmc.list(as.mcmc(postb), as.mcmc(postb2)))
## Potential scale reduction factors:
## 
##            Point est. Upper C.I.
## generation        NaN        NaN
## clock            1.01       1.03
## beta             1.00       1.00
## lambda[02]       1.00       1.00
## lambda[20]       1.00       1.00
## 
## Multivariate psrf
## 
## 1.01

We are good to go.

  • Mean estimates
posterior <- bind_rows(postt,postb) %>% 
  mutate(tree = c(rep("time", 3601),rep("bl1", 3601)))

means <- group_by(posterior, tree) %>% 
  dplyr::select(-generation) %>% 
  summarise_all(mean)
means
## # A tibble: 2 x 5
##   tree   clock  beta `lambda[02]` `lambda[20]`
##   <chr>  <dbl> <dbl>        <dbl>        <dbl>
## 1 bl1   0.0186  1.48       0.0268        0.973
## 2 time  0.0200  2.10       0.0347        0.965
  • Density plots
# slow chunk! Read RDS file below
# calculate prior distributions  
nrow = 1e6

priors <- tibble(
  generation = 1:nrow,
  clock = rexp(nrow, rate = 10),
  beta = rexp(nrow, rate = 1),
  lambda = rdirichlet(nrow,c(1,1))[,1],
  tree = rep("prior", nrow)
)

kd_beta <- kdensity(x = priors$beta[1:5000], kernel='gamma', support=c(0,Inf), bw = 0.05)
kd_clock <- kdensity(x = priors$clock[1:5000], kernel='gamma', support=c(0,Inf), bw = 0.01)
kd_lambda <- kdensity(x = priors$lambda, kernel='gamma', support=c(0,Inf), bw = 0.01)

x10 = seq(0,10,0.002)
x1 = seq(0,1,0.0002)


plot_priors <- tibble(
  param = c(rep("beta", length(x1)), rep("clock", length(x1)), rep("lambda", length(x1))),
  x = c(x10, x1, x1),
  y = c(kd_beta(x10), kd_clock(x1), kd_lambda(x1))
) 
# fast solution: read prior distributions from file
plot_priors <- readRDS("./inference/priors.rds")

all_post <- mutate(posterior, beta = case_when(is.na(beta) ~ 0, TRUE ~ beta),
                   tree = factor(tree, levels = c("time","bl1")))

pal <- c("#08519c","#6baed6")

ggbeta <- ggplot(all_post, aes(beta)) +
  geom_line(aes(x,y), data = filter(plot_priors, param == "beta")) +
  geom_density(aes(group = tree, fill = tree, col = tree), alpha = 0.7) +
  scale_fill_manual(values = pal) +
  scale_color_manual(values = pal) +
  xlim(c(0,7)) +
  theme_bw()

ggclock <- ggplot(all_post, aes(clock)) +
  geom_line(aes(x,y), data = filter(plot_priors, param == "clock")) +
  geom_density(aes(group = tree, fill = tree, col = tree), alpha = 0.7) +
  scale_fill_manual(values = pal) +
  scale_color_manual(values = pal) +
  xlim(c(0,0.1)) +
  theme_bw()

gg02 <- ggplot(all_post, aes(`lambda[02]`)) +
  geom_line(aes(x,y), data = filter(plot_priors, param == "lambda")) +
  geom_density(aes(group = tree, fill = tree, col = tree), alpha = 0.7) +
  scale_fill_manual(values = pal) +
  scale_color_manual(values = pal) +
  xlim(c(0,0.1)) +
  theme_bw()

gg20 <- ggplot(all_post, aes(`lambda[20]`)) +
  geom_line(aes(x,y), data = filter(plot_priors, param == "lambda")) +
  geom_density(aes(group = tree, fill = tree, col = tree), alpha = 0.7) +
  scale_fill_manual(values = pal) +
  scale_color_manual(values = pal) +
  xlim(c(0.9,1)) +
  theme_bw()
ggclock + ggbeta + gg02 + gg20 + plot_layout(ncol = 4, guides = 'collect')

Bayes factor

With Bayes factors we check which model has more support, the full model, where beta > 0, or the simplified model where beta = 0.

d_prior <- dexp(x=0, rate=1)

kd_beta_time <- kdensity(x = filter(posterior, tree == "time") %>% pull(beta), 
                         kernel='gamma', 
                         support=c(0,Inf), 
                         bw = 0.02)
kd_beta_bl1 <- kdensity(x = filter(posterior, tree == "bl1") %>% pull(beta), 
                         kernel='gamma', 
                         support=c(0,Inf), 
                         bw = 0.02)
max_time = kd_beta_time(0)
max_bl1 = kd_beta_bl1(0)

(BF_time <- d_prior/max_time)
## [1] 7.029484
(BF_bl1 <- d_prior/max_bl1)
## [1] 65545525

According to the calculated Bayes factors, both analyses support the full model, where beta > 0.

Now we are ready to move on to Character history inference.