UBC-DSCI
diff --git a/‎classification1.Rmd
Lines changed: 8 additions & 2 deletions b/‎classification1.Rmd
Lines changed: 8 additions & 2 deletions
diff --git a/‎clustering.Rmd
Lines changed: 15 additions & 5 deletions b/‎clustering.Rmd
Lines changed: 15 additions & 5 deletions
diff --git a/‎img/generate-pat_01.png
120 KB b/‎img/generate-pat_01.png
120 KB
diff --git a/‎img/generate-pat_02.png
402 KB b/‎img/generate-pat_02.png
402 KB
diff --git a/‎img/generate-pat_03.png
202 KB b/‎img/generate-pat_03.png
202 KB
diff --git a/‎regression1.Rmd
Lines changed: 8 additions & 4 deletions b/‎regression1.Rmd
Lines changed: 8 additions & 4 deletions
diff --git a/‎regression2.Rmd
Lines changed: 8 additions & 4 deletions b/‎regression2.Rmd
Lines changed: 8 additions & 4 deletions
@@ -11,6 +11,7 @@ knitr::opts_chunk$set(echo = TRUE,
 options(knitr.table.format = function() {
   if (knitr::is_latex_output()) 'latex' else 'pandoc'
 })
+reticulate::use_miniconda('r-reticulate')
 ```
 
 ## Overview 
@@ -572,7 +573,7 @@ Based on $K=5$ nearest neighbors with these three predictors we would classify t
 Figure \@ref(fig:05-more) shows what the data look like when we visualize them 
 as a 3-dimensional scatter with lines from the new observation to its five nearest neighbors.
 
-```{r 05-more, echo = FALSE, message = FALSE, fig.cap = "3D scatter plot of the standardized symmetry, concavity, and perimeter variables. Note that in general we recommend against using 3D visualizations; here we show the data in 3D only to illustrate what higher dimensions and nearest neighbors look like, for learning purposes.", fig.retina=2, out.width="80%"}
+```{r 05-more, echo = FALSE, message = FALSE, fig.cap = "3D scatter plot of the standardized symmetry, concavity, and perimeter variables. Note that in general we recommend against using 3D visualizations; here we show the data in 3D only to illustrate what higher dimensions and nearest neighbors look like, for learning purposes.", fig.retina=2, out.width="100%"}
 attrs <- c("Perimeter", "Concavity", "Symmetry")
 
 # create new scaled obs and get NNs
@@ -602,7 +603,7 @@ plot_3d <- scaled_cancer_3 |>
             z = ~Symmetry,
             color = ~Class,
             opacity = 0.4,
-            size = 150,
+            size = 2,
             colors = c("orange2", "steelblue2", "red"), 
             symbol = ~Class, symbols = c('circle','circle','diamond'))
 
@@ -641,6 +642,11 @@ plot_3d <- plot_3d  %>%
 if(!is_latex_output()){  
   plot_3d
 } else {
+  # scene = list(camera = list(eye = list(x=2, y=2, z = 1.5)))
+  # plot_3d <- plot_3d  %>% layout(scene = scene)
+  # save_image(plot_3d, "img/plot3d_knn_classification.png", scale = 10)
+  # cannot adjust size of points in this plot for pdf 
+  # so using a screenshot for now instead
   knitr::include_graphics("img/plot3d_knn_classification.png")
 }
 ```
 
@@ -254,7 +254,9 @@ In the first cluster from the example, there are `r nrow(clus1)` data points. Th
 (`r paste("flipper_length_standardized =", round(mean(clus1$flipper_length_standardized),2))` and `r paste("bill_length_standardized =", round(mean(clus1$bill_length_standardized),2))`) highlighted 
 in Figure \@ref(fig:10-toy-example-clus1-center).
 
-```{r 10-toy-example-clus1-center, echo = FALSE, warning = FALSE, fig.height = 4, fig.width = 4.35, fig.cap = "Cluster 1 from the `penguin_data` data set example. Observations are in blue, with the cluster center highlighted in red."}
+(ref:10-toy-example-clus1-center) Cluster 1 from the `penguin_data` data set example. Observations are in blue, with the cluster center highlighted in red.
+
+```{r 10-toy-example-clus1-center, echo = FALSE, warning = FALSE, fig.height = 4, fig.width = 4.35, fig.cap = "(ref:10-toy-example-clus1-center)"}
 base <- ggplot(data, aes(x = flipper_length_standardized, y = bill_length_standardized)) +
   geom_point() +
   xlab("Flipper Length (standardized)") +
@@ -299,7 +301,9 @@ S^2 = \left((x_1 - \mu_x)^2 + (y_1 - \mu_y)^2\right) + \left((x_2 - \mu_x)^2 + (
 
 These distances are denoted by lines in Figure \@ref(fig:10-toy-example-clus1-dists) for the first cluster of the penguin data example. 
 
-```{r 10-toy-example-clus1-dists, echo = FALSE, warning = FALSE, fig.height = 4, fig.width = 4.35, fig.cap = "Cluster 1 from the `penguin_data` data set example. Observations are in blue, with the cluster center highlighted in red. The distances from the observations to the cluster center are represented as black lines."}
+(ref:10-toy-example-clus1-dists) Cluster 1 from the `penguin_data` data set example. Observations are in blue, with the cluster center highlighted in red. The distances from the observations to the cluster center are represented as black lines.
+
+```{r 10-toy-example-clus1-dists, echo = FALSE, warning = FALSE, fig.height = 4, fig.width = 4.35, fig.cap = "(ref:10-toy-example-clus1-dists)"}
 base <- ggplot(clus1) +
   geom_point(aes(y = bill_length_standardized, 
                  x = flipper_length_standardized),
@@ -336,7 +340,9 @@ this means adding up all the squared distances for the 18 observations.
 These distances are denoted by black lines in
 Figure \@ref(fig:10-toy-example-all-clus-dists).
 
-```{r 10-toy-example-all-clus-dists, echo = FALSE, warning = FALSE, fig.height = 4, fig.width = 5, fig.cap = "All clusters from the `penguin_data` data set example. Observations are in orange, blue, and yellow with the cluster center highlighted in red. The distances from the observations to each of the respective cluster centers are represented as black lines."}
+(ref:10-toy-example-all-clus-dists) All clusters from the `penguin_data` data set example. Observations are in orange, blue, and yellow with the cluster center highlighted in red. The distances from the observations to each of the respective cluster centers are represented as black lines.
+
+```{r 10-toy-example-all-clus-dists, echo = FALSE, warning = FALSE, fig.height = 4, fig.width = 5, fig.cap = "(ref:10-toy-example-all-clus-dists)"}
 
 
 all_clusters_base <- data |>
@@ -431,7 +437,9 @@ There each row corresponds to an iteration,
 where the left column depicts the center update, 
 and the right column depicts the reassignment of data to clusters.
 
-```{r 10-toy-kmeans-iter, echo = FALSE, warning = FALSE, fig.height = 16, fig.width = 8, fig.cap = "First four iterations of K-means clustering on the `penguin_data` example data set. Each row corresponds to an iteration, where the left column depicts the center update, and the right column depicts the reassignment of data to clusters. Cluster centers are indicated by larger points that are outlined in black."}
+(ref:10-toy-kmeans-iter) First four iterations of K-means clustering on the `penguin_data` example data set. Each row corresponds to an iteration, where the left column depicts the center update, and the right column depicts the reassignment of data to clusters. Cluster centers are indicated by larger points that are outlined in black.
+
+```{r 10-toy-kmeans-iter, echo = FALSE, warning = FALSE, fig.height = 16, fig.width = 8, fig.cap = "(ref:10-toy-kmeans-iter)"}
 list_plot_cntrs <- vector(mode = "list", length = 4)
 list_plot_lbls <- vector(mode = "list", length = 4)
 
@@ -557,7 +565,9 @@ plt_lbl
 
 Figure \@ref(fig:10-toy-kmeans-bad-iter) shows what the iterations of K-means would look like with the unlucky random initialization shown in Figure \@ref(fig:10-toy-kmeans-bad-init).
 
-```{r 10-toy-kmeans-bad-iter, echo = FALSE, warning = FALSE, fig.height = 20, fig.width = 8, fig.cap = "First five iterations of K-means clustering on the `penguin_data` example data set with a poor random initialization. Each row corresponds to an iteration, where the left column depicts the center update, and the right column depicts the reassignment of data to clusters. Cluster centers are indicated by larger points that are outlined in black."}
+(ref:10-toy-kmeans-bad-iter) First five iterations of K-means clustering on the `penguin_data` example data set with a poor random initialization. Each row corresponds to an iteration, where the left column depicts the center update, and the right column depicts the reassignment of data to clusters. Cluster centers are indicated by larger points that are outlined in black.
+
+```{r 10-toy-kmeans-bad-iter, echo = FALSE, warning = FALSE, fig.height = 20, fig.width = 8, fig.cap = "(ref:10-toy-kmeans-bad-iter)"}
 list_plot_cntrs <- vector(mode = "list", length = 5)
 list_plot_lbls <- vector(mode = "list", length = 5)
 
 
@@ -5,6 +5,7 @@ library(knitr)
 library(plotly)
 
 knitr::opts_chunk$set(fig.align = "center")
+reticulate::use_miniconda('r-reticulate')
 ```
 
 ## Overview 
@@ -759,7 +760,7 @@ Figure \@ref(fig:07-knn-mult-viz) visualizes the model's predictions overlaid on
 time the predictions are a surface in 3D space, instead of a line in 2D space, as we have 2
 predictors instead of 1.  
 
-```{r 07-knn-mult-viz, echo = FALSE, message = FALSE, warning = FALSE, fig.cap = "KNN regression model’s predictions represented as a surface in 3D space overlaid on top of the data using three predictors (price, house size, and the number of bedrooms). Note that in general we recommend against using 3D visualizations; here we use a 3D visualization only to illustrate what the surface of predictions looks like for learning purposes.", out.width="80%"}
+```{r 07-knn-mult-viz, echo = FALSE, message = FALSE, warning = FALSE, fig.cap = "KNN regression model’s predictions represented as a surface in 3D space overlaid on top of the data using three predictors (price, house size, and the number of bedrooms). Note that in general we recommend against using 3D visualizations; here we use a 3D visualization only to illustrate what the surface of predictions looks like for learning purposes.", out.width="100%"}
 xvals <- seq(from = min(sacramento_train$sqft), 
              to = max(sacramento_train$sqft), 
              length = 50)
@@ -780,12 +781,12 @@ plot_3d <- plot_ly() |>
     x = ~sqft,
     y = ~beds,
     z = ~price,
-    marker = list(size = 5, opacity = 0.4, color = "red")
+    marker = list(size = 2, opacity = 0.4, color = "red")
   ) |>
   layout(scene = list(
-    xaxis = list(title = "House size (square feet)"),
+    xaxis = list(title = "Size (sq ft)"),
     zaxis = list(title = "Price (USD)"),
-    yaxis = list(title = "Number of bedrooms")
+    yaxis = list(title = "Bedrooms")
   )) |>
   add_surface(
     x = ~xvals,
@@ -797,6 +798,9 @@ plot_3d <- plot_ly() |>
 if(!is_latex_output()){  
   plot_3d
 } else {
+  scene = list(camera = list(eye = list(x = -2.1, y = -2.2, z = 0.75)))
+  plot_3d <- plot_3d  |> layout(scene = scene)
+  save_image(plot_3d, "img/plot3d_knn_regression.png", scale = 10)
   knitr::include_graphics("img/plot3d_knn_regression.png")
 }
 ```
 
@@ -5,6 +5,7 @@ library(knitr)
 library(plotly)
 
 knitr::opts_chunk$set(fig.align = "center")
+reticulate::use_miniconda('r-reticulate')
 ```
 
 ## Overview 
@@ -453,7 +454,7 @@ is `r format(round(lm_mult_test_results %>% filter(.metric == 'rmse') %>% pull(.
 In the case of two predictors, we can plot the predictions made by our linear regression creates a *plane* of best fit, as
 shown in Figure \@ref(fig:08-3DlinReg).
 
-```{r 08-3DlinReg, echo = FALSE, message = FALSE, warning = FALSE, fig.cap = "Linear regression plane of best fit overlaid on top of the data (using price, house size, and number of bedrooms as predictors). Note that in general we recommend against using 3D visualizations; here we use a 3D visualization only to illustrate what the regression plane looks like for learning purposes.", out.width="80%"}
+```{r 08-3DlinReg, echo = FALSE, message = FALSE, warning = FALSE, fig.cap = "Linear regression plane of best fit overlaid on top of the data (using price, house size, and number of bedrooms as predictors). Note that in general we recommend against using 3D visualizations; here we use a 3D visualization only to illustrate what the regression plane looks like for learning purposes.", out.width="100%"}
 xvals <- seq(from = min(sacramento_train$sqft), 
              to = max(sacramento_train$sqft), 
              length = 50)
@@ -474,12 +475,12 @@ plot_3d <- plot_ly() |>
     x = ~sqft,
     y = ~beds,
     z = ~price,
-    marker = list(size = 5, opacity = 0.4, color = "red")
+    marker = list(size = 2, opacity = 0.4, color = "red")
   ) |>
   layout(scene = list(
-    xaxis = list(title = "House size (square feet)"),
+    xaxis = list(title = "Size (sq ft)"),
     zaxis = list(title = "Price (USD)"),
-    yaxis = list(title = "Number of bedrooms")
+    yaxis = list(title = "Bedrooms")
   )) |>
   add_surface(
     x = ~xvals,
@@ -491,6 +492,9 @@ plot_3d <- plot_ly() |>
 if(!is_latex_output()){  
   plot_3d
 } else {
+  scene = list(camera = list(eye = list(x = -2.1, y = -2.2, z = 0.75)))
+  plot_3d <- plot_3d  %>% layout(scene = scene)
+  save_image(plot_3d, "img/plot3d_linear_regression.png", scale = 10)
   knitr::include_graphics("img/plot3d_linear_regression.png")
 }
 ```