@@ -5,6 +5,7 @@ library(gridExtra)
5
5
library(cowplot)
6
6
library(stringr)
7
7
library(knitr)
8
+ library(ggplot2)
8
9
9
10
knitr::opts_chunk$set(fig.align = "center")
10
11
@@ -24,6 +25,9 @@ print_tidymodels <- function(tidymodels_object) {
24
25
}
25
26
}
26
27
}
28
+
29
+ theme_update(axis.title = element_text(size = 12)) # modify axis label size in plots
30
+
27
31
```
28
32
29
33
## Overview
@@ -227,7 +231,8 @@ perim_concav <- cancer |>
227
231
geom_point(alpha = 0.5) +
228
232
labs(color = "Diagnosis") +
229
233
scale_color_manual(labels = c("Malignant", "Benign"),
230
- values = c("orange2", "steelblue2"))
234
+ values = c("orange2", "steelblue2")) +
235
+ theme(text = element_text(size = 12))
231
236
232
237
perim_concav
233
238
```
@@ -782,7 +787,8 @@ as shown in Figure \@ref(fig:06-find-k).
782
787
accuracy_vs_k <- ggplot(accuracies, aes(x = neighbors, y = mean)) +
783
788
geom_point() +
784
789
geom_line() +
785
- labs(x = "Neighbors", y = "Accuracy Estimate")
790
+ labs(x = "Neighbors", y = "Accuracy Estimate") +
791
+ theme(text = element_text(size = 12))
786
792
787
793
accuracy_vs_k
788
794
```
@@ -839,7 +845,8 @@ accuracies <- knn_results |>
839
845
accuracy_vs_k_lots <- ggplot(accuracies, aes(x = neighbors, y = mean)) +
840
846
geom_point() +
841
847
geom_line() +
842
- labs(x = "Neighbors", y = "Accuracy Estimate")
848
+ labs(x = "Neighbors", y = "Accuracy Estimate") +
849
+ theme(text = element_text(size = 12))
843
850
844
851
accuracy_vs_k_lots
845
852
```
@@ -919,7 +926,7 @@ for (i in 1:length(ks)) {
919
926
ggtitle(paste("K = ", ks[[i]])) +
920
927
scale_color_manual(labels = c("Malignant", "Benign"),
921
928
values = c("orange2", "steelblue2")) +
922
- theme(text = element_text(size = 18))
929
+ theme(text = element_text(size = 18), axis.title=element_text(size=18))
923
930
}
924
931
925
932
p_no_legend <- lapply(plots, function(x) x + theme(legend.position = "none"))
@@ -1029,7 +1036,7 @@ variables there are, the more (random) influence they have, and the more they
1029
1036
corrupt the set of nearest neighbors that vote on the class of the new
1030
1037
observation to predict.
1031
1038
1032
- ``` {r 06-performance-irrelevant-features, echo = FALSE, warning = FALSE, fig.retina = 2, out.width = "60 %", fig.cap = "Effect of inclusion of irrelevant predictors."}
1039
+ ``` {r 06-performance-irrelevant-features, echo = FALSE, warning = FALSE, fig.retina = 2, out.width = "65 %", fig.cap = "Effect of inclusion of irrelevant predictors."}
1033
1040
# get accuracies after including k irrelevant features
1034
1041
ks <- c(0, 5, 10, 15, 20, 40)
1035
1042
fixedaccs <- list()
@@ -1103,7 +1110,7 @@ plt_irrelevant_accuracies <- ggplot(res) +
1103
1110
geom_line(mapping = aes(x=ks, y=accs)) +
1104
1111
labs(x = "Number of Irrelevant Predictors",
1105
1112
y = "Model Accuracy Estimate") +
1106
- theme(text = element_text(size = 18))
1113
+ theme(text = element_text(size = 18), axis.title=element_text(size=18))
1107
1114
1108
1115
plt_irrelevant_accuracies
1109
1116
```
@@ -1119,12 +1126,12 @@ variables, the number of neighbors does not increase smoothly; but the general t
1119
1126
Figure \@ ref(fig:06-fixed-irrelevant-features) corroborates
1120
1127
this evidence; if we fix the number of neighbors to $K=3$, the accuracy falls off more quickly.
1121
1128
1122
- ``` {r 06-neighbors-irrelevant-features, echo = FALSE, warning = FALSE, fig.retina = 2, out.width = "60 %", fig.cap = "Tuned number of neighbors for varying number of irrelevant predictors."}
1129
+ ``` {r 06-neighbors-irrelevant-features, echo = FALSE, warning = FALSE, fig.retina = 2, out.width = "65 %", fig.cap = "Tuned number of neighbors for varying number of irrelevant predictors."}
1123
1130
plt_irrelevant_nghbrs <- ggplot(res) +
1124
1131
geom_line(mapping = aes(x=ks, y=nghbrs)) +
1125
1132
labs(x = "Number of Irrelevant Predictors",
1126
1133
y = "Number of neighbors") +
1127
- theme(text = element_text(size = 18))
1134
+ theme(text = element_text(size = 18), axis.title=element_text(size=18))
1128
1135
1129
1136
plt_irrelevant_nghbrs
1130
1137
```
@@ -1138,7 +1145,7 @@ plt_irrelevant_nghbrs <- ggplot(res_tmp) +
1138
1145
geom_line(mapping = aes(x=ks, y=accuracy, color=Type)) +
1139
1146
labs(x = "Number of Irrelevant Predictors", y = "Accuracy") +
1140
1147
scale_color_discrete(labels= c("Tuned K", "K = 3")) +
1141
- theme(text = element_text(size = 16))
1148
+ theme(text = element_text(size = 17), axis.title=element_text(size=17))
1142
1149
1143
1150
plt_irrelevant_nghbrs
1144
1151
```
@@ -1366,12 +1373,12 @@ where the elbow occurs, and whether adding a variable provides a meaningful incr
1366
1373
> part of tuning your classifier, you * cannot use your test data* for this
1367
1374
> process!
1368
1375
1369
- ``` {r 06-fwdsel-3, echo = FALSE, warning = FALSE, fig.retina = 2, out.width = "60 %", fig.cap = "Estimated accuracy versus the number of predictors for the sequence of models built using forward selection."}
1376
+ ``` {r 06-fwdsel-3, echo = FALSE, warning = FALSE, fig.retina = 2, out.width = "65 %", fig.cap = "Estimated accuracy versus the number of predictors for the sequence of models built using forward selection."}
1370
1377
fwd_sel_accuracies_plot <- accuracies |>
1371
1378
ggplot(aes(x = size, y = accuracy)) +
1372
1379
geom_line() +
1373
1380
labs(x = "Number of Predictors", y = "Estimated Accuracy") +
1374
- theme(text = element_text(size = 18))
1381
+ theme(text = element_text(size = 20), axis.title=element_text(size=20))
1375
1382
1376
1383
fwd_sel_accuracies_plot
1377
1384
```
0 commit comments