@@ -216,7 +216,6 @@ read_csv("data/mauna_loa.csv") |>
216
216
mutate(date_measured = ym(date_measured)) |>
217
217
select(-date_decimal) |>
218
218
filter(ppm > 0, date_measured > date("1980/01/01")) |>
219
- #filter(ppm > 0, date_measured > interval(ymd("1980/01/01"), ymd("2021-01-01"))) |>
220
219
write_csv("data/mauna_loa_data.csv")
221
220
```
222
221
@@ -291,6 +290,7 @@ knitr::include_graphics("img/ggplot_function_scatter.jpeg")
291
290
``` {r 03-data-co2-scatter, warning=FALSE, message=FALSE, fig.height = 4, fig.width = 6, fig.cap = "Scatter plot of atmospheric concentration of CO$_{2}$ over time"}
292
291
co2_scatter <- ggplot(co2_df, aes(x = date_measured, y = ppm)) +
293
292
geom_point()
293
+
294
294
co2_scatter
295
295
```
296
296
@@ -321,6 +321,7 @@ with just the default arguments:
321
321
``` {r 03-data-co2-line, warning=FALSE, message=FALSE, fig.cap = "Line plot of atmospheric concentration of CO$_{2}$ over time"}
322
322
co2_line <- ggplot(co2_df, aes(x = date_measured, y = ppm)) +
323
323
geom_line()
324
+
324
325
co2_line
325
326
```
326
327
@@ -393,6 +394,7 @@ co2_line <- ggplot(co2_df, aes(x = date_measured, y = ppm)) +
393
394
ylab("Atmospheric CO2 (ppm)") +
394
395
xlim(c(date("1990-01-01"), date("1993-12-01"))) +
395
396
theme(text = element_text(size = 16))
397
+
396
398
co2_line
397
399
```
398
400
@@ -455,6 +457,7 @@ The result is shown in Figure \@ref(fig:03-data-faithful-scatter).
455
457
``` {r 03-data-faithful-scatter, warning=FALSE, message=FALSE, fig.cap = "Scatter plot of waiting time and eruption time"}
456
458
faithful_scatter <- ggplot(faithful, aes(x = waiting, y = eruptions)) +
457
459
geom_point()
460
+
458
461
faithful_scatter
459
462
```
460
463
@@ -470,6 +473,7 @@ faithful_scatter <- ggplot(faithful, aes(x = waiting, y = eruptions)) +
470
473
geom_point() +
471
474
labs(x = "Waiting Time (mins)", y = "Eruption Duration (mins)") +
472
475
theme(text = element_text(size = 16))
476
+
473
477
faithful_scatter
474
478
```
475
479
@@ -517,12 +521,18 @@ ggplot(can_lang, aes(x = most_at_home, y = mother_tongue)) +
517
521
y = "Mother tongue \n (number of Canadian residents)") +
518
522
theme(text = element_text(size = 14))
519
523
```
524
+
520
525
``` {r mother-tongue-hidden-summaries, echo = FALSE, warning = FALSE, message = FALSE}
521
526
numlang_speakers <- can_lang |>
522
527
select(mother_tongue) |>
523
- summarize(maxsp = max(mother_tongue), minsp = min(mother_tongue))
524
- maxlang_speakers <- numlang_speakers |> pull(maxsp)
525
- minlang_speakers <- numlang_speakers |> pull(minsp)
528
+ summarize(maxsp = max(mother_tongue),
529
+ minsp = min(mother_tongue))
530
+
531
+ maxlang_speakers <- numlang_speakers |>
532
+ pull(maxsp)
533
+
534
+ minlang_speakers <- numlang_speakers |>
535
+ pull(minsp)
526
536
```
527
537
528
538
Okay! The axes and labels in Figure \@ ref(fig:03-mother-tongue-vs-most-at-home-labs) are
@@ -584,6 +594,7 @@ ggplot(can_lang, aes(x = most_at_home, y = mother_tongue)) +
584
594
english_mother_tongue <- can_lang |>
585
595
filter(language == "English") |>
586
596
pull(mother_tongue)
597
+
587
598
census_popn <- 35151728
588
599
```
589
600
@@ -614,6 +625,7 @@ can_lang <- can_lang |>
614
625
mother_tongue_percent = (mother_tongue / 35151728)*100,
615
626
most_at_home_percent = (most_at_home / 35151728)*100
616
627
)
628
+
617
629
can_lang |>
618
630
select(mother_tongue_percent, most_at_home_percent)
619
631
```
@@ -721,7 +733,9 @@ visual redundancy—i.e., conveying the same information with both scatter p
721
733
further improve the clarity and accessibility of your visualization.
722
734
723
735
```{r scatter-color-by-category-palette, fig.width=7.75, fig.height=4, warning=FALSE, fig.cap = "Scatter plot of percentage of Canadians reporting a language as their mother tongue vs the primary language at home colored by language category with color-blind friendly colors"}
724
- ggplot(can_lang, aes(x = most_at_home_percent, y = mother_tongue_percent, color = category, shape = category)) +
736
+ ggplot(can_lang, aes(x = most_at_home_percent,
737
+ y = mother_tongue_percent,
738
+ color = category, shape = category)) +
725
739
geom_point() +
726
740
labs(x = "Language spoken most at home \n (percentage of Canadian residents)",
727
741
y = "Mother tongue \n (percentage of Canadian residents)") +
@@ -808,8 +822,11 @@ The `islands.csv` data set \index{Island landmasses} contains a list of Earth's
808
822
islands_df <- read_csv("data/islands.csv")
809
823
continents <- c("Africa", "Antarctica", "Asia", "Australia",
810
824
"Europe", "North America", "South America")
825
+
811
826
islands_df <- mutate(islands_df,
812
- landmass_type = ifelse(landmass %in% continents, "Continent", "Other"))
827
+ landmass_type = ifelse(landmass %in% continents,
828
+ "Continent", "Other"))
829
+
813
830
write_csv(islands_df, "data/islands.csv")
814
831
```
815
832
@@ -838,6 +855,7 @@ shown in Figure \@ref(fig:03-data-islands-bar).
838
855
``` {r 03-data-islands-bar, warning=FALSE, message=FALSE, fig.cap = "Bar plot of all Earth's landmasses' size with squished labels"}
839
856
islands_bar <- ggplot(islands_df, aes(x = landmass, y = size)) +
840
857
geom_bar(stat = "identity")
858
+
841
859
islands_bar
842
860
```
843
861
@@ -857,6 +875,7 @@ swapping the `x` and `y` variables:
857
875
islands_top12 <- slice_max(islands_df, order_by = size, n = 12)
858
876
islands_bar <- ggplot(islands_top12, aes(x = size, y = landmass)) +
859
877
geom_bar(stat = "identity")
878
+
860
879
islands_bar
861
880
```
862
881
@@ -903,6 +922,7 @@ islands_bar <- ggplot(islands_top12,
903
922
geom_bar(stat = "identity") +
904
923
labs(x = "Size (1000 square mi)", y = "Landmass", fill = "Type") +
905
924
theme(text = element_text(size = 16))
925
+
906
926
islands_bar
907
927
```
908
928
@@ -961,6 +981,7 @@ let's use the default arguments just to see how things look.
961
981
``` {r 03-data-morley-hist, warning=FALSE, message=FALSE, fig.cap = "Histogram of Michelson's speed of light data"}
962
982
morley_hist <- ggplot(morley, aes(x = Speed)) +
963
983
geom_histogram()
984
+
964
985
morley_hist
965
986
```
966
987
@@ -991,6 +1012,7 @@ while *horizontal lines* are used to denote quantities on the *vertical axis*.
991
1012
morley_hist <- ggplot(morley, aes(x = Speed)) +
992
1013
geom_histogram() +
993
1014
geom_vline(xintercept = 792.458, linetype = "dashed", size = 1)
1015
+
994
1016
morley_hist
995
1017
```
996
1018
@@ -1019,6 +1041,7 @@ when they are colored by another categorical variable).
1019
1041
morley_hist <- ggplot(morley, aes(x = Speed, fill = Expt)) +
1020
1042
geom_histogram(alpha = 0.5, position = "identity") +
1021
1043
geom_vline(xintercept = 792.458, linetype = "dashed", size = 1.0)
1044
+
1022
1045
morley_hist
1023
1046
```
1024
1047
@@ -1042,6 +1065,7 @@ and the color will be mapped discretely.
1042
1065
morley_hist <- ggplot(morley, aes(x = Speed, fill = as_factor(Expt))) +
1043
1066
geom_histogram(alpha = 0.5, position = "identity") +
1044
1067
geom_vline(xintercept = 792.458, linetype = "dashed", size = 1.0)
1068
+
1045
1069
morley_hist
1046
1070
```
1047
1071
@@ -1081,6 +1105,7 @@ morley_hist <- ggplot(morley, aes(x = Speed, fill = as_factor(Expt))) +
1081
1105
geom_histogram() +
1082
1106
facet_grid(rows = vars(Expt)) +
1083
1107
geom_vline(xintercept = 792.458, linetype = "dashed", size = 1.0)
1108
+
1084
1109
morley_hist
1085
1110
```
1086
1111
@@ -1101,13 +1126,21 @@ To answer this question, we'll use the `mutate` function to transform our data i
1101
1126
\index{ggplot!labs}\index{ggplot!theme}
1102
1127
1103
1128
``` {r 03-data-morley-hist-5, warning=FALSE, message=FALSE, fig.height = 7, fig.cap = "Histogram of relative accuracy split vertically by experiment with clearer axes and labels"}
1104
- morley_rel <- mutate(morley, relative_accuracy = 100 * ((299000 + Speed) - 299792.458) / (299792.458))
1105
- morley_hist <- ggplot(morley_rel, aes(x = relative_accuracy, fill = as_factor(Expt))) +
1129
+ morley_rel <- mutate(morley,
1130
+ relative_accuracy = 100 *
1131
+ ((299000 + Speed) - 299792.458) / (299792.458))
1132
+
1133
+ morley_hist <- ggplot(morley_rel,
1134
+ aes(x = relative_accuracy,
1135
+ fill = as_factor(Expt))) +
1106
1136
geom_histogram() +
1107
1137
facet_grid(rows = vars(Expt)) +
1108
1138
geom_vline(xintercept = 0, linetype = "dashed", size = 1.0) +
1109
- labs(x = "Relative Accuracy (%)", y = "# Measurements", fill = "Experiment ID") +
1139
+ labs(x = "Relative Accuracy (%)",
1140
+ y = "# Measurements",
1141
+ fill = "Experiment ID") +
1110
1142
theme(text = element_text(size = 14))
1143
+
1111
1144
morley_hist
1112
1145
```
1113
1146
@@ -1149,35 +1182,51 @@ and the binwidth of 0.01 are effective for helping answer our question.
1149
1182
On the other hand, the bin widths of 0.001 and 0.1 are too small and too big, respectively.
1150
1183
1151
1184
``` {r 03-data-morley-hist-binwidth, echo = FALSE, warning = FALSE, message = FALSE, fig.height = 10, fig.cap = "Effect of varying bin width on histograms."}
1152
- morley_hist_default <- ggplot(morley_rel, aes(x = relative_accuracy, fill = as_factor(Expt))) +
1185
+ morley_hist_default <- ggplot(morley_rel,
1186
+ aes(x = relative_accuracy,
1187
+ fill = as_factor(Expt))) +
1153
1188
geom_histogram() +
1154
1189
facet_grid(rows = vars(Expt)) +
1155
1190
geom_vline(xintercept = 0, linetype = "dashed", size = 1.0) +
1156
- labs(x = "Relative Accuracy (%)", y = "# Measurements", fill = "Experiment ID") +
1191
+ labs(x = "Relative Accuracy (%)",
1192
+ y = "# Measurements",
1193
+ fill = "Experiment ID") +
1157
1194
theme(legend.position = "none") +
1158
1195
ggtitle("Default bin width (bins = 30)")
1159
1196
1160
- morley_hist_big <- ggplot(morley_rel, aes(x = relative_accuracy, fill = as_factor(Expt))) +
1197
+ morley_hist_big <- ggplot(morley_rel,
1198
+ aes(x = relative_accuracy,
1199
+ fill = as_factor(Expt))) +
1161
1200
geom_histogram(binwidth = 0.1) +
1162
1201
facet_grid(rows = vars(Expt)) +
1163
1202
geom_vline(xintercept = 0, linetype = "dashed", size = 1.0) +
1164
- labs(x = "Relative Accuracy (%)", y = "# Measurements", fill = "Experiment ID") +
1203
+ labs(x = "Relative Accuracy (%)",
1204
+ y = "# Measurements",
1205
+ fill = "Experiment ID") +
1165
1206
theme(legend.position = "none") +
1166
1207
ggtitle( "binwidth = 0.1")
1167
1208
1168
- morley_hist_med <- ggplot(morley_rel, aes(x = relative_accuracy, fill = as_factor(Expt))) +
1209
+ morley_hist_med <- ggplot(morley_rel,
1210
+ aes(x = relative_accuracy,
1211
+ fill = as_factor(Expt))) +
1169
1212
geom_histogram(binwidth = 0.01) +
1170
1213
facet_grid(rows = vars(Expt)) +
1171
1214
geom_vline(xintercept = 0, linetype = "dashed", size = 1.0) +
1172
- labs(x = "Relative Accuracy (%)", y = "# Measurements", fill = "Experiment ID") +
1215
+ labs(x = "Relative Accuracy (%)",
1216
+ y = "# Measurements",
1217
+ fill = "Experiment ID") +
1173
1218
theme(legend.position = "none") +
1174
1219
ggtitle("binwidth = 0.01")
1175
1220
1176
- morley_hist_small <- ggplot(morley_rel, aes(x = relative_accuracy, fill = as_factor(Expt))) +
1221
+ morley_hist_small <- ggplot(morley_rel,
1222
+ aes(x = relative_accuracy,
1223
+ fill = as_factor(Expt))) +
1177
1224
geom_histogram(binwidth = 0.001) +
1178
1225
facet_grid(rows = vars(Expt)) +
1179
1226
geom_vline(xintercept = 0, linetype = "dashed", size = 1.0) +
1180
- labs(x = "Relative Accuracy (%)", y = "# Measurements", fill = "Experiment ID") +
1227
+ labs(x = "Relative Accuracy (%)",
1228
+ y = "# Measurements",
1229
+ fill = "Experiment ID") +
1181
1230
theme(legend.position = "none") +
1182
1231
ggtitle("binwidth = 0.001")
1183
1232
@@ -1200,7 +1249,8 @@ we can use the `+` operator to add a title layer with the `ggtitle` function.
1200
1249
1201
1250
``` {r 03-data-morley-hist-addlayer, warning = FALSE, message = FALSE, fig.height = 7, fig.cap = "Histogram of relative accuracy split vertically by experiment with a descriptive title highlighting the take home message of the visualization."}
1202
1251
morley_hist_title <- morley_hist +
1203
- ggtitle("Michelson's speed of light experiments \n were accurate to about 0.05%")
1252
+ ggtitle("Speed of light experiments \n were accurate to about 0.05%")
1253
+
1204
1254
morley_hist_title
1205
1255
```
1206
1256
@@ -1369,13 +1419,19 @@ file_sizes <- tibble(`Image type` = c("Bitmap / Raster",
1369
1419
"Bitmap / Raster",
1370
1420
"Vector / Scalable Graphics"),
1371
1421
`File type` = c("PNG", "JPG", "BMP", "TIFF", "SVG"),
1372
- `Image size` = c(paste(round(file.info("img/faithful_plot.png")["size"] / 1000000, 2), "MB"),
1373
- paste(round(file.info("img/faithful_plot.jpg")["size"] / 1000000, 2), "MB"),
1374
- paste(round(file.info("img/faithful_plot.bmp")["size"] / 1000000, 2), "MB"),
1375
- paste(round(file.info("img/faithful_plot.tiff")["size"] / 1000000, 2), "MB"),
1376
- paste(round(file.info("img/faithful_plot.svg")["size"] / 1000000, 2), "MB")))
1422
+ `Image size` = c(paste(round(file.info("img/faithful_plot.png")["size"]
1423
+ / 1000000, 2), "MB"),
1424
+ paste(round(file.info("img/faithful_plot.jpg")["size"]
1425
+ / 1000000, 2), "MB"),
1426
+ paste(round(file.info("img/faithful_plot.bmp")["size"]
1427
+ / 1000000, 2), "MB"),
1428
+ paste(round(file.info("img/faithful_plot.tiff")["size"]
1429
+ / 1000000, 2), "MB"),
1430
+ paste(round(file.info("img/faithful_plot.svg")["size"]
1431
+ / 1000000, 2), "MB")))
1377
1432
kable(file_sizes,
1378
- caption = "File sizes of `faithful_plot` when saved as different file formats.")
1433
+ caption = paste0("File sizes of `faithful_plot`",
1434
+ "when saved as different file formats."))
1379
1435
```
1380
1436
1381
1437
Take a look at the file sizes in Table \@ ref(tab: filesizes ).
0 commit comments