Merge pull request #506 from UBC-DSCI/deprecation-fixes

trevorcampbell · web-flow · commit 18cb5f7b6316 · 2023-07-13T11:40:13.000-07:00
Deprecation fixes
diff --git a/source/classification1.Rmd b/source/classification1.Rmd
@@ -10,9 +10,7 @@ library(ggplot2)
 
 knitr::opts_chunk$set(echo = TRUE, 
                       fig.align = "center")
-options(knitr.table.format = function() {
-  if (knitr::is_latex_output()) 'latex' else 'pandoc'
-})
+options(knitr.table.format = ifelse(knitr::is_latex_output(), 'latex', 'html'))
 
 cleanup_and_print <- function(output){
    for (i in seq_along(output)) {
@@ -360,7 +358,7 @@ perim_concav_with_new_point +
     y = new_point[2],
     xend = pull(neighbors[1, attrs[1]]),
     yend = pull(neighbors[1, attrs[2]])
-  ), color = "black", size = 0.5)
+  ), color = "black", linewidth = 0.5, show.legend = FALSE)
 ```
 
 ```{r 05-knn-3, echo = FALSE}
@@ -407,7 +405,7 @@ perim_concav_with_new_point2 +
     y = new_point[2],
     xend = pull(neighbors[1, attrs[1]]),
     yend = pull(neighbors[1, attrs[2]])
-  ), color = "black", size = 0.5)
+  ), color = "black", linewidth = 0.5, show.legend = FALSE)
 ```
 
 To improve the prediction we can consider several
@@ -424,17 +422,17 @@ perim_concav_with_new_point2 +
     x = new_point[1], y = new_point[2],
     xend = pull(neighbors[1, attrs[1]]),
     yend = pull(neighbors[1, attrs[2]])
-  ), color = "black", size = 0.5) +
+  ), color = "black", linewidth = 0.5, show.legend = FALSE) +
   geom_segment(aes(
     x = new_point[1], y = new_point[2],
     xend = pull(neighbors[2, attrs[1]]),
     yend = pull(neighbors[2, attrs[2]])
-  ), color = "black", size = 0.5) +
+  ), color = "black", linewidth = 0.5, show.legend = FALSE) +
   geom_segment(aes(
     x = new_point[1], y = new_point[2],
     xend = pull(neighbors[3, attrs[1]]),
     yend = pull(neighbors[3, attrs[2]])
-  ), color = "black", size = 0.5)
+  ), color = "black", linewidth = 0.5, show.legend = FALSE)
 ```
 
 Here we chose the $K=3$ nearest observations, but there is nothing special
@@ -987,17 +985,17 @@ unscaled <- ggplot(unscaled_cancer, aes(x = Area,
     x = unlist(new_obs[1]), y = unlist(new_obs[2]),
     xend = unlist(neighbors[1, attrs[1]]),
     yend = unlist(neighbors[1, attrs[2]])
-  ), color = "black", size = 0.5) +
+  ), color = "black", linewidth = 0.5, show.legend = FALSE) +
   geom_segment(aes(
     x = unlist(new_obs[1]), y = unlist(new_obs[2]),
     xend = unlist(neighbors[2, attrs[1]]),
     yend = unlist(neighbors[2, attrs[2]])
-  ), color = "black", size = 0.5) +
+  ), color = "black", linewidth = 0.5, show.legend = FALSE) +
   geom_segment(aes(
     x = unlist(new_obs[1]), y = unlist(new_obs[2]),
     xend = unlist(neighbors[3, attrs[1]]),
     yend = unlist(neighbors[3, attrs[2]])
-  ), color = "black", size = 0.5)
+  ), color = "black", linewidth = 0.5, show.legend = FALSE)
 
 # create new scaled obs and get NNs
 new_obs_scaled <- tibble(Area = -0.72, Smoothness = 2.8, Class = "unknown")
@@ -1032,17 +1030,17 @@ scaled <- ggplot(scaled_cancer, aes(x = Area,
     x = unlist(new_obs_scaled[1]), y = unlist(new_obs_scaled[2]),
     xend = unlist(neighbors_scaled[1, attrs[1]]),
     yend = unlist(neighbors_scaled[1, attrs[2]])
-  ), color = "black", size = 0.5) +
+  ), color = "black", linewidth = 0.5, show.legend = FALSE) +
   geom_segment(aes(
     x = unlist(new_obs_scaled[1]), y = unlist(new_obs_scaled[2]),
     xend = unlist(neighbors_scaled[2, attrs[1]]),
     yend = unlist(neighbors_scaled[2, attrs[2]])
-  ), color = "black", size = 0.5) +
+  ), color = "black", linewidth = 0.5, show.legend = FALSE) +
   geom_segment(aes(
     x = unlist(new_obs_scaled[1]), y = unlist(new_obs_scaled[2]),
     xend = unlist(neighbors_scaled[3, attrs[1]]),
     yend = unlist(neighbors_scaled[3, attrs[2]])
-  ), color = "black", size = 0.5)
+  ), color = "black", linewidth = 0.5, show.legend = FALSE)
 
 ggarrange(unscaled, scaled, ncol = 2, common.legend = TRUE, legend = "bottom")
 
@@ -1070,17 +1068,17 @@ ggplot(unscaled_cancer, aes(x = Area,
     x = unlist(new_obs[1]), y = unlist(new_obs[2]),
     xend = unlist(neighbors[1, attrs[1]]),
     yend = unlist(neighbors[1, attrs[2]])
-  ), color = "black") +
+  ), color = "black", show.legend = FALSE) +
   geom_segment(aes(
     x = unlist(new_obs[1]), y = unlist(new_obs[2]),
     xend = unlist(neighbors[2, attrs[1]]),
     yend = unlist(neighbors[2, attrs[2]])
-  ), color = "black") +
+  ), color = "black", show.legend = FALSE) +
   geom_segment(aes(
     x = unlist(new_obs[1]), y = unlist(new_obs[2]),
     xend = unlist(neighbors[3, attrs[1]]),
     yend = unlist(neighbors[3, attrs[2]])
-  ), color = "black") +  
+  ), color = "black", show.legend = FALSE) +  
    facet_zoom(x = ( Area > 380 & Area < 420) , 
               y = (Smoothness > 0.08 & Smoothness < 0.14), zoom.size = 2) + 
     theme_bw() + 
@@ -1185,7 +1183,7 @@ for (i in 1:7) {
       x = new_point[1],
       y = new_point[2],
       xend = pull(neighbors[i, attrs[1]]),
-      yend = pull(neighbors[i, attrs[2]]), color = clr
+      yend = pull(neighbors[i, attrs[2]]), color = clr, show.legend = FALSE
     )
 }
 rare_plot + geom_point(aes(x = new_point[1], y = new_point[2]),
diff --git a/source/regression2.Rmd b/source/regression2.Rmd
@@ -263,16 +263,16 @@ hidden_print(lm_fit)
 > the best fit coefficients are usually easier to interpret afterward.
 
 Our coefficients are 
-(intercept) $\beta_0=$ `r format(round(pull(tidy(pull_workflow_fit(lm_fit)), estimate)[1]), scientific=FALSE)`
-and (slope) $\beta_1=$ `r format(round(pull(tidy(pull_workflow_fit(lm_fit)), estimate)[2]), scientific=FALSE)`.
+(intercept) $\beta_0=$ `r format(round(pull(tidy(extract_fit_parsnip(lm_fit)), estimate)[1]), scientific=FALSE)`
+and (slope) $\beta_1=$ `r format(round(pull(tidy(extract_fit_parsnip(lm_fit)), estimate)[2]), scientific=FALSE)`.
 This means that the equation of the line of best fit is
 
-$$\text{house sale price} = `r format(round(pull(tidy(pull_workflow_fit(lm_fit)), estimate)[1]), scientific=FALSE)` + `r format(round(pull(tidy(pull_workflow_fit(lm_fit)), estimate)[2]), scientific=FALSE)`\cdot (\text{house size}).$$
+$$\text{house sale price} = `r format(round(pull(tidy(extract_fit_parsnip(lm_fit)), estimate)[1]), scientific=FALSE)` + `r format(round(pull(tidy(extract_fit_parsnip(lm_fit)), estimate)[2]), scientific=FALSE)`\cdot (\text{house size}).$$
 
 In other words, the model predicts that houses 
-start at \$`r format(round(pull(tidy(pull_workflow_fit(lm_fit)), estimate)[1]), big.mark=",", nsmall=0, scientific=FALSE)` for 0 square feet, and that
+start at \$`r format(round(pull(tidy(extract_fit_parsnip(lm_fit)), estimate)[1]), big.mark=",", nsmall=0, scientific=FALSE)` for 0 square feet, and that
 every extra square foot increases the cost of 
-the house by \$`r format(round(pull(tidy(pull_workflow_fit(lm_fit)), estimate)[2]), scientific=FALSE)`. Finally, 
+the house by \$`r format(round(pull(tidy(extract_fit_parsnip(lm_fit)), estimate)[2]), scientific=FALSE)`. Finally, 
 we predict on the test data set to assess how well our model does:
 
 ```{r 08-assessFinal}
@@ -314,12 +314,12 @@ lm_plot_final
 
 We can extract the coefficients from our model by accessing the
 fit object that is output by the `fit` \index{tidymodels!fit} function; we first have to extract
-it from the workflow using the `pull_workflow_fit` function, and then apply
+it from the workflow using the `extract_fit_parsnip` function, and then apply
 the `tidy` function to convert the result into a data frame:
 
 ```{r 08-lm-get-coeffs}
 coeffs <- lm_fit |>
-             pull_workflow_fit() |>
+             extract_fit_parsnip() |>
              tidy()
 
 coeffs
@@ -550,7 +550,7 @@ as shown below:
 
 ```{r 08-lm-multi-get-coeffs}
 mcoeffs <- mlm_fit |>
-             pull_workflow_fit() |>
+             extract_fit_parsnip() |>
              tidy()
 
 mcoeffs
@@ -728,7 +728,7 @@ lm_fit1 <- workflow() |>
   add_model(lm_spec) |>
   fit(data = sacramento_train)
 
-coeffs <- tidy(pull_workflow_fit(lm_fit1))
+coeffs <- tidy(extract_fit_parsnip(lm_fit1))
 
 icept1 <- format(round(coeffs |> 
                          filter(term == "(Intercept)") |>  
@@ -750,7 +750,7 @@ lm_fit2 <- workflow() |>
   add_model(lm_spec) |>
   fit(data = sacramento_train)
 
-coeffs <- tidy(pull_workflow_fit(lm_fit2))
+coeffs <- tidy(extract_fit_parsnip(lm_fit2))
 icept2 <- format(round(coeffs |> 
                          filter(term == "(Intercept)") |> 
                          pull(estimate)), 
@@ -771,7 +771,7 @@ lm_fit3 <- workflow() |>
   add_model(lm_spec) |>
   fit(data = sacramento_train)
 
-coeffs <- tidy(pull_workflow_fit(lm_fit3))
+coeffs <- tidy(extract_fit_parsnip(lm_fit3))
 icept3 <- format(round(coeffs |> 
                          filter(term == "(Intercept)") |> 
                          pull(estimate)), 
diff --git a/source/viz.Rmd b/source/viz.Rmd
@@ -9,6 +9,8 @@ library(magick)
 
 
 knitr::opts_chunk$set(fig.align = "center")
+
+options(knitr.table.format = ifelse(knitr::is_latex_output(), "latex", "html"))
 ```
 
 ## Overview 
@@ -1055,7 +1057,7 @@ We would also like to fine tune this vertical line,
 styling it so that it is dashed and 1 point in thickness.
 A point is a measurement unit commonly used with fonts, 
 and 1 point is about 0.353 mm. 
-We do this by setting `linetype = "dashed"` and `size = 1`, respectively. 
+We do this by setting `linetype = "dashed"` and `linewidth = 1`, respectively. 
 There is a similar function, `geom_hline`, 
 that is used for plotting horizontal lines. 
 Note that 
@@ -1065,7 +1067,7 @@ while *horizontal lines* are used to denote quantities on the *vertical axis*.
 ```{r 03-data-morley-hist-2, warning=FALSE,  fig.height = 2.75, fig.width = 4.5, fig.align = "center", fig.pos = "H", out.extra="", message=FALSE,fig.cap = "Histogram of Michelson's speed of light data with vertical line indicating true speed of light."}
 morley_hist <- ggplot(morley, aes(x = Speed)) +
   geom_histogram() +
-  geom_vline(xintercept = 792.458, linetype = "dashed", size = 1)
+  geom_vline(xintercept = 792.458, linetype = "dashed", linewidth = 1)
 
 morley_hist
 ```
@@ -1094,7 +1096,7 @@ when they are colored by another categorical variable).
 ```{r 03-data-morley-hist-3, warning=FALSE, message=FALSE,  fig.height = 2.75, fig.width = 4.5, fig.align = "center", fig.pos = "H", out.extra="", fig.cap = "Histogram of Michelson's speed of light data where an attempt is made to color the bars by experiment."}
 morley_hist <- ggplot(morley, aes(x = Speed, fill = Expt)) +
   geom_histogram(alpha = 0.5, position = "identity") +
-  geom_vline(xintercept = 792.458, linetype = "dashed", size = 1.0)
+  geom_vline(xintercept = 792.458, linetype = "dashed", linewidth = 1.0)
 
 morley_hist
 ```
@@ -1119,7 +1121,7 @@ and the color will be mapped discretely.
 ```{r 03-data-morley-hist-with-factor, warning=FALSE, message=FALSE,  fig.height = 2.75, fig.width = 5, fig.pos = "H", out.extra="", fig.align = "center", fig.cap = "Histogram of Michelson's speed of light data colored by experiment as factor."}
 morley_hist <- ggplot(morley, aes(x = Speed, fill = as_factor(Expt))) +
   geom_histogram(alpha = 0.5, position = "identity") +
-  geom_vline(xintercept = 792.458, linetype = "dashed", size = 1.0)
+  geom_vline(xintercept = 792.458, linetype = "dashed", linewidth = 1.0)
 
 morley_hist
 ```
@@ -1158,7 +1160,7 @@ in the context of the data frame.
 morley_hist <- ggplot(morley, aes(x = Speed, fill = as_factor(Expt))) +
   geom_histogram() +
   facet_grid(rows = vars(Expt)) +
-  geom_vline(xintercept = 792.458, linetype = "dashed", size = 1.0)
+  geom_vline(xintercept = 792.458, linetype = "dashed", linewidth = 1.0)
 
 morley_hist
 ```
@@ -1189,7 +1191,7 @@ morley_hist <- ggplot(morley_rel,
                           fill = as_factor(Expt))) +
   geom_histogram() +
   facet_grid(rows = vars(Expt)) +
-  geom_vline(xintercept = 0, linetype = "dashed", size = 1.0) +
+  geom_vline(xintercept = 0, linetype = "dashed", linewidth = 1.0) +
   labs(x = "Relative Accuracy (%)", 
        y = "# Measurements", 
        fill = "Experiment ID") +
@@ -1243,7 +1245,7 @@ morley_hist_default <- ggplot(morley_rel,
                                   fill = as_factor(Expt))) +
   geom_histogram() +
   facet_grid(rows = vars(Expt)) +
-  geom_vline(xintercept = 0, linetype = "dashed", size = 1.0) +
+  geom_vline(xintercept = 0, linetype = "dashed", linewidth = 1.0) +
   labs(x = "Relative Accuracy (%)", 
        y = "# Measurements", 
        fill = "Experiment ID") +
@@ -1256,7 +1258,7 @@ morley_hist_big <- ggplot(morley_rel,
                               fill = as_factor(Expt))) +
   geom_histogram(binwidth = 0.1) +
   facet_grid(rows = vars(Expt)) +
-  geom_vline(xintercept = 0, linetype = "dashed", size = 1.0) +
+  geom_vline(xintercept = 0, linetype = "dashed", linewidth = 1.0) +
   labs(x = "Relative Accuracy (%)", 
        y = "# Measurements", 
        fill = "Experiment ID") +
@@ -1269,7 +1271,7 @@ morley_hist_med <- ggplot(morley_rel,
                               fill = as_factor(Expt))) +
   geom_histogram(binwidth = 0.01) +
   facet_grid(rows = vars(Expt)) +
-  geom_vline(xintercept = 0, linetype = "dashed", size = 1.0) +
+  geom_vline(xintercept = 0, linetype = "dashed", linewidth = 1.0) +
   labs(x = "Relative Accuracy (%)", 
        y = "# Measurements", 
        fill = "Experiment ID") +
@@ -1282,7 +1284,7 @@ morley_hist_small <- ggplot(morley_rel,
                                 fill = as_factor(Expt))) +
   geom_histogram(binwidth = 0.001) +
   facet_grid(rows = vars(Expt)) +
-  geom_vline(xintercept = 0, linetype = "dashed", size = 1.0) +
+  geom_vline(xintercept = 0, linetype = "dashed", linewidth = 1.0) +
   labs(x = "Relative Accuracy (%)", 
        y = "# Measurements", 
        fill = "Experiment ID") +
@@ -1491,7 +1493,7 @@ file_sizes <- tibble(`Image type` = c("Raster",
                                     / 1000000, 2), "MB"),
                         paste(round(file.info("img/viz/faithful_plot.svg")["size"] 
                                     / 1000000, 2), "MB")))
-kable(file_sizes,
+kable(file_sizes, booktabs = TRUE,
       caption = "File sizes of the scatter plot of the Old Faithful data set when saved as different file formats.") |>
   kable_styling(latex_options = "hold_position")
 ```
diff --git a/source/wrangling.Rmd b/source/wrangling.Rmd
@@ -1374,16 +1374,20 @@ region_lang |>
 > also return `NA`s when we apply them to columns that 
 > contain `NA`s in the data frame.  \index{missing data}
 > 
-> To avoid this, again we need to add the argument `na.rm = TRUE`,
-> but in this case we need to use it a little bit differently.
-> In this case, we need to add a `,` and then `na.rm = TRUE`,
-> after specifying the function we want `summarize` + `across` to apply, 
-> as illustrated below:
+> To resolve this issue, again we need to add the argument `na.rm = TRUE`.
+> But in this case we need to use it a little bit differently:
+> we write a `~`, and then call the summary function
+> with the first argument `.x` and the second argument `na.rm = TRUE`.
+> For example, for the previous example with the `max` function, we would write 
 > 
 > ``` {r}
 > region_lang_na |>
->   summarize(across(mother_tongue:lang_known, max, na.rm = TRUE))
+>   summarize(across(mother_tongue:lang_known, ~ max(.x, na.rm = TRUE)))
 > ```
+> The meaning of this unusual syntax is a bit beyond the scope of this book,
+> but interested readers can look up *anonymous functions* in the `purrr` 
+> package from `tidyverse`.
+
 
 #### `map` for calculating summary statistics on many columns {-}