Skip to content

Commit e754989

Browse files
committed
fixing fig sizes in inference
1 parent 6edfb12 commit e754989

File tree

1 file changed

+13
-13
lines changed

1 file changed

+13
-13
lines changed

inference.Rmd

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ We have created this particular example
287287
such that we *do* have access to the full population, which lets us visualize the
288288
sampling distribution directly for learning purposes.
289289

290-
```{r 11-example-proportions7, echo = TRUE, message = FALSE, warning = FALSE,fig.cap = "Sampling distribution of the sample proportion for sample size 40.", fig.retina = 2, out.width = "100%"}
290+
```{r 11-example-proportions7, echo = TRUE, message = FALSE, warning = FALSE,fig.cap = "Sampling distribution of the sample proportion for sample size 40.", fig.height = 3.3, fig.width = 4.2}
291291
sampling_distribution <- ggplot(sample_estimates, aes(x = sample_proportion)) +
292292
geom_histogram(fill = "dodgerblue3", color = "lightgrey", bins = 12) +
293293
ylab("Count") +
@@ -335,7 +335,7 @@ We can visualize the population distribution of the price per night with a histo
335335
options(pillar.sigfig = 5)
336336
```
337337

338-
```{r 11-example-means2, echo = TRUE, message = FALSE, warning = FALSE, fig.cap = "Population distribution of price per night (Canadian dollars) for all Airbnb listings in Vancouver, Canada.", fig.retina = 2, out.width = "100%"}
338+
```{r 11-example-means2, echo = TRUE, message = FALSE, warning = FALSE, fig.cap = "Population distribution of price per night (Canadian dollars) for all Airbnb listings in Vancouver, Canada.", fig.height = 3.5, fig.width = 4.5}
339339
population_distribution <- ggplot(airbnb, aes(x = price)) +
340340
geom_histogram(fill = "dodgerblue3", color = "lightgrey") +
341341
ylab("Count") +
@@ -380,7 +380,7 @@ We can create a histogram to visualize the distribution of observations in the
380380
sample (Figure \@ref(fig:11-example-means-sample-hist)), and calculate the mean
381381
of our sample.
382382

383-
```{r 11-example-means-sample-hist, echo = TRUE, message = FALSE, warning = FALSE, fig.cap = "Distribution of price per night (Canadian dollars) for sample of 40 Airbnb listings.", fig.retina = 2, out.width = "100%"}
383+
```{r 11-example-means-sample-hist, echo = TRUE, message = FALSE, warning = FALSE, fig.cap = "Distribution of price per night (Canadian dollars) for sample of 40 Airbnb listings.", fig.height = 3.5, fig.width = 4.5}
384384
sample_distribution <- ggplot(one_sample, aes(price)) +
385385
geom_histogram(fill = "dodgerblue3", color = "lightgrey") +
386386
ylab("Count") +
@@ -422,7 +422,7 @@ samples
422422
Now we can calculate the sample mean for each replicate and plot the sampling
423423
distribution of sample means for samples of size 40.
424424

425-
```{r 11-example-means4, echo = TRUE, message = FALSE, warning = FALSE, fig.cap= "Sampling distribution of the sample means for sample size of 40.", fig.retina = 2, out.width = "100%"}
425+
```{r 11-example-means4, echo = TRUE, message = FALSE, warning = FALSE, fig.cap= "Sampling distribution of the sample means for sample size of 40.", fig.height = 3.5, fig.width = 4.5}
426426
sample_estimates <- samples |>
427427
group_by(replicate) |>
428428
summarize(sample_mean = mean(price))
@@ -468,15 +468,15 @@ Notice that the mean of the sample means is \$`r round(mean(sample_estimates$sam
468468
was \$`r round(mean(airbnb$price),2)`.
469469
-->
470470

471-
```{r 11-example-means5, echo = FALSE, message = FALSE, warning = FALSE, fig.cap = "Comparison of population distribution, sample distribution, and sampling distribution."}
471+
```{r 11-example-means5, echo = FALSE, message = FALSE, warning = FALSE, fig.height = 5.5, fig.width = 4, fig.cap = "Comparison of population distribution, sample distribution, and sampling distribution."}
472472
grid.arrange(population_distribution +
473473
ggtitle("Population") +
474474
xlim(min(airbnb$price), 600),
475475
sample_distribution +
476476
ggtitle("Sample (n = 40)") +
477477
xlim(min(airbnb$price), 600),
478478
sampling_distribution_40 +
479-
ggtitle("Sampling distribution of the mean for samples of size 40") +
479+
ggtitle("Sampling distribution of the mean \n for samples of size 40") +
480480
xlim(min(airbnb$price), 600),
481481
nrow = 3
482482
)
@@ -664,7 +664,7 @@ see that the sample’s distribution looks like that of the population for a
664664
large enough sample.
665665

666666

667-
```{r 11-example-bootstrapping0, echo = FALSE, message = FALSE, warning = FALSE, fig.cap = "Comparison of samples of different sizes from the population."}
667+
```{r 11-example-bootstrapping0, echo = FALSE, message = FALSE, warning = FALSE, fig.height = 7, fig.cap = "Comparison of samples of different sizes from the population."}
668668
sample_10 <- airbnb |>
669669
rep_sample_n(10)
670670
sample_distribution_10 <- ggplot(sample_10, aes(price)) +
@@ -773,7 +773,7 @@ one_sample <- one_sample |>
773773
ungroup() |> select(-replicate)
774774
```
775775

776-
```{r 11-bootstrapping1, echo = TRUE, message = FALSE, warning = FALSE, fig.cap = "Histogram of price per night (Canadian dollars) for one sample of size 40.", fig.retina = 2, out.width = "100%"}
776+
```{r 11-bootstrapping1, echo = TRUE, message = FALSE, warning = FALSE, fig.cap = "Histogram of price per night (Canadian dollars) for one sample of size 40.", fig.height = 3.5, fig.width = 4.5}
777777
one_sample
778778
779779
one_sample_dist <- ggplot(one_sample, aes(price)) +
@@ -799,7 +799,7 @@ we change the argument for `replace` from its default value of `FALSE` to `TRUE`
799799
\index{bootstrap!in R}
800800
\index{rep\_sample\_n!bootstrap}
801801

802-
```{r 11-bootstrapping3, echo = TRUE, message = FALSE, warning = FALSE, fig.cap = "Bootstrap distribution.", fig.retina = 2, out.width = "100%"}
802+
```{r 11-bootstrapping3, echo = TRUE, message = FALSE, warning = FALSE, fig.cap = "Bootstrap distribution.", fig.height = 3.5, fig.width = 4.5}
803803
boot1 <- one_sample |>
804804
rep_sample_n(size = 40, replace = TRUE, reps = 1)
805805
boot1_dist <- ggplot(boot1, aes(price)) +
@@ -865,7 +865,7 @@ generate a bootstrap distribution of our point estimates. The bootstrap
865865
distribution (Figure \@ref(fig:11-bootstrapping5)) suggests how we might expect
866866
our point estimate to behave if we took another sample.
867867

868-
```{r 11-bootstrapping5, echo = TRUE, message = FALSE, warning = FALSE, fig.cap = "Distribution of the bootstrap sample means.", out.width = "100%"}
868+
```{r 11-bootstrapping5, echo = TRUE, message = FALSE, warning = FALSE, fig.cap = "Distribution of the bootstrap sample means.", fig.height = 3.5, fig.width = 4.5}
869869
boot20000_means <- boot20000 |>
870870
group_by(replicate) |>
871871
summarize(mean = mean(price))
@@ -884,7 +884,7 @@ boot_est_dist
884884
Let's compare the bootstrap distribution&mdash;which we construct by taking many samples from our original sample of size 40&mdash;with
885885
the true sampling distribution&mdash;which corresponds to taking many samples from the population.
886886

887-
```{r 11-bootstrapping6, echo = F, message = FALSE, warning = FALSE, fig.cap = "Comparison of the distribution of the bootstrap sample means and sampling distribution.", out.height = "70%"}
887+
```{r 11-bootstrapping6, echo = F, message = FALSE, warning = FALSE, fig.cap = "Comparison of the distribution of the bootstrap sample means and sampling distribution.", fig.height = 3.5}
888888
samples <- rep_sample_n(airbnb, size = 40, reps = 20000)
889889
890890
sample_estimates <- samples |>
@@ -1125,11 +1125,11 @@ the middle 95\% of the sample mean prices in the bootstrap distribution. We can
11251125
visualize the interval on our distribution in Figure
11261126
\@ref(fig:11-bootstrapping9).
11271127

1128-
```{r 11-bootstrapping9, echo = F, message = FALSE, warning = FALSE, fig.cap = "Distribution of the bootstrap sample means with percentile lower and upper bounds.", out.width = "100%"}
1128+
```{r 11-bootstrapping9, echo = F, message = FALSE, warning = FALSE, fig.cap = "Distribution of the bootstrap sample means with percentile lower and upper bounds.", fig.height=4, fig.width = 6.5}
11291129
boot_est_dist +
11301130
geom_vline(xintercept = bounds, col = "#E69F00", size = 2, linetype = 2) +
11311131
annotate("text",
1132-
x = bounds[1], max_count(boot_est_dist), hjust = 0.5, vjust = 2,
1132+
x = bounds[1], max_count(boot_est_dist), hjust = 0.6, vjust = 2,
11331133
label = paste("2.5th percentile =", round(bounds[1], 2))
11341134
) +
11351135
annotate("text",

0 commit comments

Comments
 (0)