fixing fig sizes in inference

leem44 · leem44 · commit e754989a6ea4 · 2021-10-26T23:31:03.000-07:00
diff --git a/inference.Rmd b/inference.Rmd
@@ -287,7 +287,7 @@ We have created this particular example
 such that we *do* have access to the full population, which lets us visualize the 
 sampling distribution directly for learning purposes.
 
-```{r 11-example-proportions7, echo = TRUE, message = FALSE, warning = FALSE,fig.cap = "Sampling distribution of the sample proportion for sample size 40.", fig.retina = 2, out.width = "100%"}
+```{r 11-example-proportions7, echo = TRUE, message = FALSE, warning = FALSE,fig.cap = "Sampling distribution of the sample proportion for sample size 40.", fig.height = 3.3, fig.width = 4.2}
 sampling_distribution <- ggplot(sample_estimates, aes(x = sample_proportion)) +
   geom_histogram(fill = "dodgerblue3", color = "lightgrey", bins = 12) +
   ylab("Count") +
@@ -335,7 +335,7 @@ We can visualize the population distribution of the price per night with a histo
 options(pillar.sigfig = 5)
 ```
 
-```{r 11-example-means2, echo = TRUE, message = FALSE, warning = FALSE, fig.cap = "Population distribution of price per night (Canadian dollars) for all Airbnb listings in Vancouver, Canada.", fig.retina = 2, out.width = "100%"}
+```{r 11-example-means2, echo = TRUE, message = FALSE, warning = FALSE, fig.cap = "Population distribution of price per night (Canadian dollars) for all Airbnb listings in Vancouver, Canada.", fig.height = 3.5, fig.width = 4.5}
 population_distribution <- ggplot(airbnb, aes(x = price)) +
   geom_histogram(fill = "dodgerblue3", color = "lightgrey") +
   ylab("Count") + 
@@ -380,7 +380,7 @@ We can create a histogram to visualize the distribution of observations in the
 sample (Figure \@ref(fig:11-example-means-sample-hist)), and calculate the mean
 of our sample.
 
-```{r 11-example-means-sample-hist, echo = TRUE, message = FALSE, warning = FALSE, fig.cap = "Distribution of price per night (Canadian dollars) for sample of 40 Airbnb listings.", fig.retina = 2, out.width = "100%"}
+```{r 11-example-means-sample-hist, echo = TRUE, message = FALSE, warning = FALSE, fig.cap = "Distribution of price per night (Canadian dollars) for sample of 40 Airbnb listings.", fig.height = 3.5, fig.width = 4.5}
 sample_distribution <- ggplot(one_sample, aes(price)) +
   geom_histogram(fill = "dodgerblue3", color = "lightgrey") +
   ylab("Count") + 
@@ -422,7 +422,7 @@ samples
 Now we can calculate the sample mean for each replicate and plot the sampling
 distribution of sample means for samples of size 40.
 
-```{r 11-example-means4, echo = TRUE, message = FALSE, warning = FALSE, fig.cap= "Sampling distribution of the sample means for sample size of 40.", fig.retina = 2, out.width = "100%"}
+```{r 11-example-means4, echo = TRUE, message = FALSE, warning = FALSE, fig.cap= "Sampling distribution of the sample means for sample size of 40.", fig.height = 3.5, fig.width = 4.5}
 sample_estimates <- samples |>
   group_by(replicate) |>
   summarize(sample_mean = mean(price))
@@ -468,15 +468,15 @@ Notice that the mean of the sample means is \$`r round(mean(sample_estimates$sam
 was \$`r round(mean(airbnb$price),2)`. 
 -->
 
-```{r 11-example-means5, echo = FALSE, message = FALSE, warning = FALSE, fig.cap = "Comparison of population distribution, sample distribution, and sampling distribution."}
+```{r 11-example-means5, echo = FALSE, message = FALSE, warning = FALSE, fig.height = 5.5, fig.width = 4, fig.cap = "Comparison of population distribution, sample distribution, and sampling distribution."}
 grid.arrange(population_distribution +
   ggtitle("Population") +
   xlim(min(airbnb$price), 600),
 sample_distribution +
   ggtitle("Sample (n = 40)") +
   xlim(min(airbnb$price), 600),
 sampling_distribution_40 +
-  ggtitle("Sampling distribution of the mean for samples of size 40") +
+  ggtitle("Sampling distribution of the mean \n for samples of size 40") +
   xlim(min(airbnb$price), 600),
 nrow = 3
 )
@@ -664,7 +664,7 @@ see that the sample’s distribution looks like that of the population for a
 large enough sample.
 
 
-```{r 11-example-bootstrapping0, echo = FALSE, message = FALSE, warning = FALSE, fig.cap = "Comparison of samples of different sizes from the population."}
+```{r 11-example-bootstrapping0, echo = FALSE, message = FALSE, warning = FALSE, fig.height = 7, fig.cap = "Comparison of samples of different sizes from the population."}
 sample_10 <- airbnb |>
   rep_sample_n(10)
 sample_distribution_10 <- ggplot(sample_10, aes(price)) +
@@ -773,7 +773,7 @@ one_sample <- one_sample |>
   ungroup() |> select(-replicate)
 ```
 
-```{r 11-bootstrapping1, echo = TRUE, message = FALSE, warning = FALSE, fig.cap = "Histogram of price per night (Canadian dollars) for one sample of size 40.", fig.retina = 2, out.width = "100%"}
+```{r 11-bootstrapping1, echo = TRUE, message = FALSE, warning = FALSE, fig.cap = "Histogram of price per night (Canadian dollars) for one sample of size 40.", fig.height = 3.5, fig.width = 4.5}
 one_sample
 
 one_sample_dist <- ggplot(one_sample, aes(price)) +
@@ -799,7 +799,7 @@ we change the argument for `replace` from its default value of `FALSE` to `TRUE`
 \index{bootstrap!in R}
 \index{rep\_sample\_n!bootstrap}
 
-```{r 11-bootstrapping3, echo = TRUE, message = FALSE, warning = FALSE, fig.cap = "Bootstrap distribution.", fig.retina = 2, out.width = "100%"}
+```{r 11-bootstrapping3, echo = TRUE, message = FALSE, warning = FALSE, fig.cap = "Bootstrap distribution.", fig.height = 3.5, fig.width = 4.5}
 boot1 <- one_sample |>
   rep_sample_n(size = 40, replace = TRUE, reps = 1)
 boot1_dist <- ggplot(boot1, aes(price)) +
@@ -865,7 +865,7 @@ generate a bootstrap distribution of our point estimates. The bootstrap
 distribution (Figure \@ref(fig:11-bootstrapping5)) suggests how we might expect
 our point estimate to behave if we took another sample.
 
-```{r 11-bootstrapping5, echo = TRUE, message = FALSE, warning = FALSE, fig.cap = "Distribution of the bootstrap sample means.", out.width = "100%"}
+```{r 11-bootstrapping5, echo = TRUE, message = FALSE, warning = FALSE, fig.cap = "Distribution of the bootstrap sample means.", fig.height = 3.5, fig.width = 4.5}
 boot20000_means <- boot20000 |>
   group_by(replicate) |>
   summarize(mean = mean(price))
@@ -884,7 +884,7 @@ boot_est_dist
 Let's compare the bootstrap distribution&mdash;which we construct by taking many samples from our original sample of size 40&mdash;with 
 the true sampling distribution&mdash;which corresponds to taking many samples from the population.
 
-```{r 11-bootstrapping6, echo = F, message = FALSE, warning = FALSE, fig.cap = "Comparison of the distribution of the bootstrap sample means and sampling distribution.", out.height = "70%"}
+```{r 11-bootstrapping6, echo = F, message = FALSE, warning = FALSE, fig.cap = "Comparison of the distribution of the bootstrap sample means and sampling distribution.", fig.height = 3.5}
 samples <- rep_sample_n(airbnb, size = 40, reps = 20000)
 
 sample_estimates <- samples |>
@@ -1125,11 +1125,11 @@ the middle 95\% of the sample mean prices in the bootstrap distribution. We can
 visualize the interval on our distribution in Figure
 \@ref(fig:11-bootstrapping9). 
 
-```{r 11-bootstrapping9, echo = F, message = FALSE, warning = FALSE, fig.cap = "Distribution of the bootstrap sample means with percentile lower and upper bounds.", out.width = "100%"}
+```{r 11-bootstrapping9, echo = F, message = FALSE, warning = FALSE, fig.cap = "Distribution of the bootstrap sample means with percentile lower and upper bounds.", fig.height=4, fig.width = 6.5}
 boot_est_dist +
   geom_vline(xintercept = bounds, col = "#E69F00", size = 2, linetype = 2) +
   annotate("text",
-    x = bounds[1], max_count(boot_est_dist), hjust = 0.5, vjust = 2,
+    x = bounds[1], max_count(boot_est_dist), hjust = 0.6, vjust = 2,
     label = paste("2.5th percentile =", round(bounds[1], 2))
   ) +
   annotate("text",