UBC-DSCI
diff --git a/‎classification1.Rmd
Lines changed: 13 additions & 7 deletions b/‎classification1.Rmd
Lines changed: 13 additions & 7 deletions
diff --git a/‎classification2.Rmd
Lines changed: 18 additions & 11 deletions b/‎classification2.Rmd
Lines changed: 18 additions & 11 deletions
diff --git a/‎clustering.Rmd
Lines changed: 20 additions & 12 deletions b/‎clustering.Rmd
Lines changed: 20 additions & 12 deletions
@@ -7,6 +7,7 @@ library(knitr)
 library(kableExtra)
 library(ggpubr)
 library(stringr)
+library(ggplot2)
 
 knitr::opts_chunk$set(echo = TRUE, 
                       fig.align = "center")
@@ -31,6 +32,8 @@ print_tidymodels <- function(tidymodels_object) {
     }
   }
 }
+
+theme_update(axis.title = element_text(size = 12)) # modify axis label size in plots 
 ```
 
 ## Overview 
@@ -236,7 +239,8 @@ perim_concav <- cancer |>
        y = "Concavity (standardized)",
        color = "Diagnosis") +
   scale_color_manual(labels = c("Malignant", "Benign"), 
-                     values = c("orange2", "steelblue2"))
+                     values = c("orange2", "steelblue2")) +
+  theme(text = element_text(size = 12))
 perim_concav
 ```
 
@@ -613,9 +617,9 @@ scaled_cancer_3 <- bind_rows(cancer, new_obs_3) |>
 plot_3d <- scaled_cancer_3 |>
   plot_ly() |>
   layout(scene = list(
-    xaxis = list(title = "Perimeter"),
-    yaxis = list(title = "Concavity"),
-    zaxis = list(title = "Symmetry")
+    xaxis = list(title = "Perimeter", titlefont = list(size = 14)),
+    yaxis = list(title = "Concavity", titlefont = list(size = 14)),
+    zaxis = list(title = "Symmetry", titlefont = list(size = 14))
   )) |> 
   add_trace(x = ~Perimeter,
             y = ~Concavity,
@@ -1057,7 +1061,7 @@ ggplot(unscaled_cancer, aes(x = Area,
    facet_zoom(x = ( Area > 380 & Area < 420) , 
               y = (Smoothness > 0.08 & Smoothness < 0.14), zoom.size = 2) + 
     theme_bw() + 
-    theme(text = element_text(size = 14), legend.position="bottom")
+    theme(text = element_text(size = 18), axis.title=element_text(size=18), legend.position="bottom")
 ```
 
 ### Balancing
@@ -1103,7 +1107,8 @@ rare_plot <- rare_cancer |>
        y = "Concavity (standardized)",
        color = "Diagnosis") +
   scale_color_manual(labels = c("Malignant", "Benign"), 
-                     values = c("orange2", "steelblue2"))
+                     values = c("orange2", "steelblue2")) +
+  theme(text = element_text(size = 12))
 
 rare_plot
 ```
@@ -1398,7 +1403,8 @@ wkflw_plot <-
        x = "Area (standardized)", 
        y = "Smoothness (standardized)") +
   scale_color_manual(labels = c("Malignant", "Benign"), 
-                     values = c("orange2", "steelblue2"))
+                     values = c("orange2", "steelblue2")) +
+  theme(text = element_text(size = 12))
 
 wkflw_plot
 ```
 
@@ -5,6 +5,7 @@ library(gridExtra)
 library(cowplot)
 library(stringr)
 library(knitr)
+library(ggplot2)
 
 knitr::opts_chunk$set(fig.align = "center")
 
@@ -24,6 +25,9 @@ print_tidymodels <- function(tidymodels_object) {
     }
   }
 }
+
+theme_update(axis.title = element_text(size = 12)) # modify axis label size in plots 
+
 ```
 
 ## Overview 
@@ -227,7 +231,8 @@ perim_concav <- cancer |>
   geom_point(alpha = 0.5) +
   labs(color = "Diagnosis") +
   scale_color_manual(labels = c("Malignant", "Benign"), 
-                     values = c("orange2", "steelblue2"))
+                     values = c("orange2", "steelblue2")) + 
+  theme(text = element_text(size = 12))
 
 perim_concav
 ```
@@ -782,7 +787,8 @@ as shown in Figure \@ref(fig:06-find-k).
 accuracy_vs_k <- ggplot(accuracies, aes(x = neighbors, y = mean)) +
   geom_point() +
   geom_line() +
-  labs(x = "Neighbors", y = "Accuracy Estimate")
+  labs(x = "Neighbors", y = "Accuracy Estimate") + 
+  theme(text = element_text(size = 12))
 
 accuracy_vs_k
 ```
@@ -839,7 +845,8 @@ accuracies <- knn_results |>
 accuracy_vs_k_lots <- ggplot(accuracies, aes(x = neighbors, y = mean)) +
   geom_point() +
   geom_line() +
-  labs(x = "Neighbors", y = "Accuracy Estimate")
+  labs(x = "Neighbors", y = "Accuracy Estimate") + 
+  theme(text = element_text(size = 12))
 
 accuracy_vs_k_lots
 ```
@@ -919,7 +926,7 @@ for (i in 1:length(ks)) {
     ggtitle(paste("K = ", ks[[i]])) +
     scale_color_manual(labels = c("Malignant", "Benign"), 
                        values = c("orange2", "steelblue2"))  +
-  theme(text = element_text(size = 18))
+  theme(text = element_text(size = 18), axis.title=element_text(size=18)) 
   }
 
 p_no_legend <- lapply(plots, function(x) x + theme(legend.position = "none"))
@@ -1029,7 +1036,7 @@ variables there are, the more (random) influence they have, and the more they
 corrupt the set of nearest neighbors that vote on the class of the new
 observation to predict.  
 
-```{r 06-performance-irrelevant-features, echo = FALSE, warning = FALSE, fig.retina = 2, out.width = "60%", fig.cap = "Effect of inclusion of irrelevant predictors."}
+```{r 06-performance-irrelevant-features, echo = FALSE, warning = FALSE, fig.retina = 2, out.width = "65%", fig.cap = "Effect of inclusion of irrelevant predictors."}
 # get accuracies after including k irrelevant features
 ks <- c(0, 5, 10, 15, 20, 40)
 fixedaccs <- list()
@@ -1103,7 +1110,7 @@ plt_irrelevant_accuracies <- ggplot(res) +
               geom_line(mapping = aes(x=ks, y=accs)) +
               labs(x = "Number of Irrelevant Predictors", 
                    y = "Model Accuracy Estimate") + 
-  theme(text = element_text(size = 18))
+  theme(text = element_text(size = 18), axis.title=element_text(size=18)) 
 
 plt_irrelevant_accuracies
 ```
@@ -1119,12 +1126,12 @@ variables, the number of neighbors does not increase smoothly; but the general t
 Figure \@ref(fig:06-fixed-irrelevant-features) corroborates
 this evidence; if we fix the number of neighbors to $K=3$, the accuracy falls off more quickly.
 
-```{r 06-neighbors-irrelevant-features, echo = FALSE, warning = FALSE, fig.retina = 2, out.width = "60%", fig.cap = "Tuned number of neighbors for varying number of irrelevant predictors."}
+```{r 06-neighbors-irrelevant-features, echo = FALSE, warning = FALSE, fig.retina = 2, out.width = "65%", fig.cap = "Tuned number of neighbors for varying number of irrelevant predictors."}
 plt_irrelevant_nghbrs <- ggplot(res) +
               geom_line(mapping = aes(x=ks, y=nghbrs)) +
               labs(x = "Number of Irrelevant Predictors", 
                    y = "Number of neighbors") + 
-  theme(text = element_text(size = 18))
+  theme(text = element_text(size = 18), axis.title=element_text(size=18)) 
 
 plt_irrelevant_nghbrs
 ```
@@ -1138,7 +1145,7 @@ plt_irrelevant_nghbrs <- ggplot(res_tmp) +
               geom_line(mapping = aes(x=ks, y=accuracy, color=Type)) +
               labs(x = "Number of Irrelevant Predictors", y = "Accuracy") + 
               scale_color_discrete(labels= c("Tuned K", "K = 3")) + 
-  theme(text = element_text(size = 16))
+  theme(text = element_text(size = 17), axis.title=element_text(size=17)) 
 
 plt_irrelevant_nghbrs
 ```
@@ -1366,12 +1373,12 @@ where the elbow occurs, and whether adding a variable provides a meaningful incr
 > part of tuning your classifier, you *cannot use your test data* for this
 > process! 
 
-```{r 06-fwdsel-3, echo = FALSE, warning = FALSE, fig.retina = 2, out.width = "60%", fig.cap = "Estimated accuracy versus the number of predictors for the sequence of models built using forward selection."}
+```{r 06-fwdsel-3, echo = FALSE, warning = FALSE, fig.retina = 2, out.width = "65%", fig.cap = "Estimated accuracy versus the number of predictors for the sequence of models built using forward selection."}
 fwd_sel_accuracies_plot <- accuracies |>
   ggplot(aes(x = size, y = accuracy)) +
   geom_line() +
   labs(x = "Number of Predictors", y = "Estimated Accuracy")  +
-  theme(text = element_text(size = 18))
+  theme(text = element_text(size = 20), axis.title=element_text(size=20)) 
 
 fwd_sel_accuracies_plot
 ```
 
@@ -16,6 +16,8 @@ knitr::opts_chunk$set(warning = FALSE, fig.align = "default")
 # some graphs with the code shown to students are hard coded 
 cbbPalette <- c(brewer.pal(9, "Paired"))
 cbpalette <- c("darkorange3", "dodgerblue3", "goldenrod1")
+
+theme_update(axis.title = element_text(size = 12)) # modify axis label size in plots 
 ```
 
 ## Overview 
@@ -171,7 +173,8 @@ ggplot(data, aes(x = flipper_length_standardized,
                  y = bill_length_standardized)) +
   geom_point() +
   xlab("Flipper Length (standardized)") +
-  ylab("Bill Length (standardized)")
+  ylab("Bill Length (standardized)") + 
+  theme(text = element_text(size = 12))
 ```
 
 Based \index{ggplot}\index{ggplot!geom\_point} on the visualization 
@@ -470,7 +473,7 @@ for (i in 1:4) {
                color = "black", 
                fill = cbpalette) +
     annotate("text", x = -0.5, y = 1.5, label = paste0("Iteration ", i), size = 5)+ 
-    theme(text = element_text(size = 14))
+    theme(text = element_text(size = 14), axis.title=element_text(size=14)) 
   
   if (i == 1 | i == 2) {
     plt_ctr <- plt_ctr +
@@ -503,8 +506,8 @@ for (i in 1:4) {
                stroke = 1, 
                color = "black", 
                fill = cbpalette) +
-    annotate("text", x = -0.5, y = 1.5, label = paste0("Iteration ", i), size = 5)+ 
-    theme(text = element_text(size = 14))
+    annotate("text", x = -0.5, y = 1.5, label = paste0("Iteration ", i), size = 5) + 
+    theme(text = element_text(size = 14), axis.title=element_text(size=14)) 
 
   if (i == 1 | i ==2) {
     plt_lbl <- plt_lbl +
@@ -643,7 +646,7 @@ for (i in 1:5) {
                color = "black", 
                fill = cbpalette) +
     annotate("text", x = -0.5, y = 1.5, label = paste0("Iteration ", i), size = 5) + 
-    theme(text = element_text(size = 14))
+    theme(text = element_text(size = 14), axis.title=element_text(size=14)) 
 
   if (i == 1 | i == 2) {
     plt_ctr <- plt_ctr +
@@ -676,7 +679,7 @@ for (i in 1:5) {
                color = "black", 
                fill = cbpalette) +
     annotate("text", x = -0.5, y = 1.5, label = paste0("Iteration ", i), size = 5) + 
-    theme(text = element_text(size = 14))
+    theme(text = element_text(size = 14), axis.title=element_text(size=14)) 
 
   if (i == 1 | i == 2) {
     plt_lbl <- plt_lbl +
@@ -726,15 +729,15 @@ ggarrange(iter_plot_list[[1]] +
                theme(axis.text.x = element_blank(),
                      axis.ticks.x = element_blank(),
                      axis.title.x = element_blank(),
-                     plot.margin = margin(r = 2, t = 2, b = 1)),
+                     plot.margin = margin(r = 2, t = 2, b = 2)),
           iter_plot_list[[6]] + 
                theme(axis.text.y = element_blank(),
                      axis.ticks.y = element_blank(),
                      axis.title.y = element_blank(),
                      axis.text.x = element_blank(),
                      axis.ticks.x = element_blank(),
                      axis.title.x = element_blank(),
-                     plot.margin = margin(r = 2, l = 2, t = 2, b = 1) ), 
+                     plot.margin = margin(r = 2, l = 2, t = 2, b = 2) ), 
           iter_plot_list[[7]] + 
                theme(axis.text.y = element_blank(),
                      axis.ticks.y = element_blank(),
@@ -823,7 +826,9 @@ p1 <- ggplot(assignments, aes(flipper_length_standardized,
              size = 4, 
              shape = 21, 
              stroke = 1) + 
-  scale_fill_manual(values = cbbPalette)
+  scale_fill_manual(values = cbbPalette) +     
+  theme(text = element_text(size = 12), axis.title=element_text(size=12)) 
+
 
 p1
 ```
@@ -938,7 +943,8 @@ cluster_plot <- ggplot(clustered_data,
        color = "Cluster") + 
   scale_color_manual(values = c("dodgerblue3",
                                 "darkorange3",  
-                                "goldenrod1"))
+                                "goldenrod1")) + 
+  theme(text = element_text(size = 12))
 
 cluster_plot
 ```
@@ -1040,7 +1046,8 @@ elbow_plot <- ggplot(clustering_statistics, aes(x = k, y = tot.withinss)) +
   geom_line() +
   xlab("K") +
   ylab("Total within-cluster sum of squares") +
-  scale_x_continuous(breaks = 1:9)
+  scale_x_continuous(breaks = 1:9) + 
+  theme(text = element_text(size = 12))
 
 elbow_plot
 ```
@@ -1082,7 +1089,8 @@ elbow_plot <- ggplot(clustering_statistics, aes(x = k, y = tot.withinss)) +
   geom_line() +
   xlab("K") +
   ylab("Total within-cluster sum of squares") +
-  scale_x_continuous(breaks = 1:9)
+  scale_x_continuous(breaks = 1:9) + 
+  theme(text = element_text(size = 12))
 
 elbow_plot
 ```