tidymodels · topepo · Nov 1, 2023 · Nov 2, 2023 · Nov 2, 2023 · Nov 2, 2023
diff --git a/NAMESPACE b/NAMESPACE
@@ -48,6 +48,7 @@ export(brulee_logistic_reg)
 export(brulee_mlp)
 export(brulee_multinomial_reg)
 export(coef)
+export(guess_brulee_device)
 export(matrix_to_dataset)
 export(schedule_cyclic)
 export(schedule_decay_expo)

diff --git a/NEWS.md b/NEWS.md
@@ -2,6 +2,12 @@
 
 * Fixed a bug where SGD always being used as the optimizer (#61). 
 
+* Added many more activation functions for `brulee_mlp()` (#74).
+
+* Enabled GPUs for computations (#).
+
+* Rewrote the unit test suite due to irreproducibility issues across operating system (#75).
+
 # brulee 0.2.0
 
 * Several learning rate schedulers were added to the modeling functions (#12).

diff --git a/R/activation.R b/R/activation.R
@@ -7,6 +7,13 @@ allowed_activation <-
 #' Activation functions for neural networks in brulee
 #'
 #' @return A character vector of values.
+#' @seealso [torch::nn_celu()], [torch::nn_elu()], [torch::nn_gelu()],
+#' [torch::nn_hardshrink()], [torch::nn_hardsigmoid()], [torch::nn_hardtanh()],
+#' [torch::nn_leaky_relu()], [torch::nn_identity()], [torch::nn_log_sigmoid()],
+#' [torch::nn_relu()], [torch::nn_relu6()], [torch::nn_rrelu()], [torch::nn_selu()],
+#' [torch::nn_sigmoid()], [torch::nn_silu()], [torch::nn_softplus()],
+#' [torch::nn_softshrink()], [torch::nn_softsign()], [torch::nn_tanh()],
+#' [torch::nn_tanhshrink()]
 #' @export
 brulee_activations <- function() {
  allowed_activation

diff --git a/R/checks.R b/R/checks.R
@@ -177,14 +177,14 @@ check_logical <- function(x, single = TRUE, fn = NULL) {
 }
 
 
-check_class_weights <- function(wts, lvls, xtab, fn) {
+check_class_weights <- function(wts, lvls, xtab, fn, device = NULL) {
   if (length(lvls) == 0) {
     return(NULL)
   }
 
   if (is.null(wts)) {
     wts <- rep(1, length(lvls))
-    return(torch::torch_tensor(wts))
+    return(torch::torch_tensor(wts, device = device))
   }
   if (!is.numeric(wts)) {
     msg <- paste(format_msg(fn, "class_weights"), "to a numeric vector")
@@ -218,5 +218,5 @@ check_class_weights <- function(wts, lvls, xtab, fn) {
   }
 
 
-  torch::torch_tensor(wts)
+  torch::torch_tensor(wts, device = device)
 }
diff --git a/R/convert_data.R b/R/convert_data.R
@@ -6,6 +6,9 @@
 #' @param x A numeric matrix of predictors.
 #' @param y A vector. If regression than `y` is numeric. For classification, it
 #'  is a factor.
+#' @param device A character string to denote which processor to use with
+#' possibles values: `"cpu"`, `"cuda"`, `"mps"`, and `"auto"`. The last value
+#' uses [guess_brulee_device()] to make the determination.
 #' @return An R6 index sampler object with classes "training_set",
 #'  "dataset", and "R6".
 #' @details Missing values should be removed before passing data to this function.
@@ -14,13 +17,13 @@
 #'   matrix_to_dataset(as.matrix(mtcars[, -1]), mtcars$mpg)
 #' }
 #' @export
-matrix_to_dataset <- function(x, y) {
-  x <- torch::torch_tensor(x)
+matrix_to_dataset <- function(x, y, device = "cpu") {
+  x <- torch::torch_tensor(x, device = device)
   if (is.factor(y)) {
     y <- as.numeric(y)
-    y <- torch::torch_tensor(y, dtype = torch_long())
+    y <- torch::torch_tensor(y, dtype = torch_long(), device = device)
   } else {
-    y <- torch::torch_tensor(y)
+    y <- torch::torch_tensor(y, device = device)
   }
   torch::tensor_dataset(x = x, y = y)
 }

diff --git a/R/device.R b/R/device.R
@@ -0,0 +1,17 @@
+#' Determine an appropriate computational device for torch
+#'
+#' Uses \pkg{torch} functions to determine if there is a GPU available for use.
+#' @return A character string, one of: `"cpu"`, `"cuda"`, or `"mps"`.
+#' @examples
+#' guess_brulee_device()
+#' @export
+guess_brulee_device <- function() {
+ if (torch::backends_mps_is_available()) {
+  dev <- "mps"
+ } else if (torch::cuda_is_available()) {
+  dev <- "cuda"
+ } else {
+  dev <- "cpu"
+ }
+ dev
+}
diff --git a/R/linear_reg-fit.R b/R/linear_reg-fit.R
@@ -53,6 +53,8 @@
 #' The zeroing out of parameters is a specific feature the optimization method
 #' used in those packages.
 #'
+#' If GPU computing is requested via the `device` argument, note that torch
+#' can't set the random number seeds in the GPU.
 #' @seealso [predict.brulee_linear_reg()], [coef.brulee_linear_reg()],
 #' [autoplot.brulee_linear_reg()]
 #'
@@ -89,7 +91,7 @@
 #'  set.seed(1)
 #'  brulee_linear_reg(x = as.matrix(ames_train[, c("Longitude", "Latitude")]),
 #'                     y = ames_train$Sale_Price,
-#'                     penalty = 0.10, epochs = 1, batch_size = 64)
+#'                     penalty = 0.10, epochs = 10)
 #'
 #'  # Using recipe
 #'  library(recipes)
@@ -110,8 +112,7 @@
 #'     step_normalize(all_numeric_predictors())
 #'
 #'  set.seed(2)
-#'  fit <- brulee_linear_reg(ames_rec, data = ames_train,
-#'                            epochs = 5, batch_size = 32)
+#'  fit <- brulee_linear_reg(ames_rec, data = ames_train, epochs = 5)
 #'  fit
 #'
 #'  autoplot(fit)
@@ -161,6 +162,7 @@ brulee_linear_reg.data.frame <-
            momentum = 0.0,
            batch_size = NULL,
            stop_iter = 5,
+           device = "cpu",
            verbose = FALSE,
            ...) {
     processed <- hardhat::mold(x, y)
@@ -176,6 +178,7 @@ brulee_linear_reg.data.frame <-
       momentum = momentum,
       batch_size = batch_size,
       stop_iter = stop_iter,
+      device = device,
       verbose = verbose,
       ...
     )
@@ -196,6 +199,7 @@ brulee_linear_reg.matrix <- function(x,
                                      momentum = 0.0,
                                      batch_size = NULL,
                                      stop_iter = 5,
+                                     device = "cpu",
                                      verbose = FALSE,
                                      ...) {
   processed <- hardhat::mold(x, y)
@@ -211,6 +215,7 @@ brulee_linear_reg.matrix <- function(x,
     validation = validation,
     batch_size = batch_size,
     stop_iter = stop_iter,
+    device = device,
     verbose = verbose,
     ...
   )
@@ -232,6 +237,7 @@ brulee_linear_reg.formula <-
            momentum = 0.0,
            batch_size = NULL,
            stop_iter = 5,
+           device = "cpu",
            verbose = FALSE,
            ...) {
     processed <- hardhat::mold(formula, data)
@@ -247,6 +253,7 @@ brulee_linear_reg.formula <-
       validation = validation,
       batch_size = batch_size,
       stop_iter = stop_iter,
+      device = device,
       verbose = verbose,
       ...
     )
@@ -268,6 +275,7 @@ brulee_linear_reg.recipe <-
            momentum = 0.0,
            batch_size = NULL,
            stop_iter = 5,
+           device = "cpu",
            verbose = FALSE,
            ...) {
     processed <- hardhat::mold(x, data)
@@ -283,6 +291,7 @@ brulee_linear_reg.recipe <-
       validation = validation,
       batch_size = batch_size,
       stop_iter = stop_iter,
+      device = device,
       verbose = verbose,
       ...
     )
@@ -293,7 +302,8 @@ brulee_linear_reg.recipe <-
 
 brulee_linear_reg_bridge <- function(processed, epochs, optimizer,
                                      learn_rate, momentum, penalty, mixture, dropout,
-                                     validation, batch_size, stop_iter, verbose, ...) {
+                                     validation, batch_size, stop_iter, device,
+                                     verbose, ...) {
   if(!torch::torch_is_installed()) {
     cli::cli_abort("The torch backend has not been installed; use `torch::install_torch()`.")
   }
@@ -318,6 +328,13 @@ brulee_linear_reg_bridge <- function(processed, epochs, optimizer,
   check_double(learn_rate, single = TRUE, 0, incl = c(FALSE, TRUE), fn = f_nm)
   check_logical(verbose, single = TRUE, fn = f_nm)
 
+  # ------------------------------------------------------------------------------
+
+  device <- rlang::arg_match(device, c("cpu", "auto", "cuda", "mps"))
+  if (device == "auto") {
+   device <- guess_brulee_device()
+  }
+
   ## -----------------------------------------------------------------------------
 
   predictors <- processed$predictors
@@ -353,6 +370,7 @@ brulee_linear_reg_bridge <- function(processed, epochs, optimizer,
       validation = validation,
       batch_size = batch_size,
       stop_iter = stop_iter,
+      device = device,
       verbose = verbose
     )
 
@@ -413,6 +431,7 @@ linear_reg_fit_imp <-
            learn_rate = 1,
            momentum = 0.0,
            stop_iter = 5,
+           device = "cpu",
            verbose = FALSE,
            ...) {
 
@@ -465,17 +484,18 @@ linear_reg_fit_imp <-
 
     ## ---------------------------------------------------------------------------
     # Convert to index sampler and data loader
-    ds <- brulee::matrix_to_dataset(x, y)
+    ds <- brulee::matrix_to_dataset(x, y, device = device)
     dl <- torch::dataloader(ds, batch_size = batch_size)
 
     if (validation > 0) {
-      ds_val <- brulee::matrix_to_dataset(x_val, y_val)
+      ds_val <- brulee::matrix_to_dataset(x_val, y_val, device = device)
       dl_val <- torch::dataloader(ds_val)
     }
 
     ## ---------------------------------------------------------------------------
     # Initialize model and optimizer
     model <- linear_reg_module(ncol(x))
+    model$to(device = device)
     loss_fn <- make_penalized_loss(loss_fn, model, penalty, mixture)
     optimizer_obj <- set_optimizer(optimizer, model, learn_rate, momentum)
 

diff --git a/R/logistic_reg-fit.R b/R/logistic_reg-fit.R
@@ -57,6 +57,8 @@
 #' The zeroing out of parameters is a specific feature the optimization method
 #' used in those packages.
 #'
+#' If GPU computing is requested via the `device` argument, note that torch
+#' can't set the random number seeds in the GPU.
 #' @seealso [predict.brulee_logistic_reg()], [coef.brulee_logistic_reg()],
 #' [autoplot.brulee_logistic_reg()]
 #'
@@ -149,6 +151,7 @@ brulee_logistic_reg.data.frame <-
            batch_size = NULL,
            class_weights = NULL,
            stop_iter = 5,
+           device = "cpu",
            verbose = FALSE,
            ...) {
     processed <- hardhat::mold(x, y)
@@ -165,6 +168,7 @@ brulee_logistic_reg.data.frame <-
       batch_size = batch_size,
       class_weights = class_weights,
       stop_iter = stop_iter,
+      device = device,
       verbose = verbose,
       ...
     )
@@ -186,6 +190,7 @@ brulee_logistic_reg.matrix <- function(x,
                                        batch_size = NULL,
                                        class_weights = NULL,
                                        stop_iter = 5,
+                                       device = "cpu",
                                        verbose = FALSE,
                                        ...) {
   processed <- hardhat::mold(x, y)
@@ -202,6 +207,7 @@ brulee_logistic_reg.matrix <- function(x,
     batch_size = batch_size,
     class_weights = class_weights,
     stop_iter = stop_iter,
+    device = device,
     verbose = verbose,
     ...
   )
@@ -224,6 +230,7 @@ brulee_logistic_reg.formula <-
            batch_size = NULL,
            class_weights = NULL,
            stop_iter = 5,
+           device = "cpu",
            verbose = FALSE,
            ...) {
     processed <- hardhat::mold(formula, data)
@@ -240,6 +247,7 @@ brulee_logistic_reg.formula <-
       batch_size = batch_size,
       class_weights = class_weights,
       stop_iter = stop_iter,
+      device = device,
       verbose = verbose,
       ...
     )
@@ -262,6 +270,7 @@ brulee_logistic_reg.recipe <-
            batch_size = NULL,
            class_weights = NULL,
            stop_iter = 5,
+           device = "cpu",
            verbose = FALSE,
            ...) {
     processed <- hardhat::mold(x, data)
@@ -278,6 +287,7 @@ brulee_logistic_reg.recipe <-
       batch_size = batch_size,
       class_weights = class_weights,
       stop_iter = stop_iter,
+      device = device,
       verbose = verbose,
       ...
     )
@@ -288,7 +298,7 @@ brulee_logistic_reg.recipe <-
 
 brulee_logistic_reg_bridge <- function(processed, epochs, optimizer,
                                        learn_rate, momentum, penalty, mixture, class_weights,
-                                       validation, batch_size, stop_iter, verbose, ...) {
+                                       validation, batch_size, stop_iter, device, verbose, ...) {
   if(!torch::torch_is_installed()) {
     cli::cli_abort("The torch backend has not been installed; use `torch::install_torch()`.")
   }
@@ -312,6 +322,13 @@ brulee_logistic_reg_bridge <- function(processed, epochs, optimizer,
   check_double(learn_rate, single = TRUE, 0, incl = c(FALSE, TRUE), fn = f_nm)
   check_logical(verbose, single = TRUE, fn = f_nm)
 
+  # ------------------------------------------------------------------------------
+
+  device <- rlang::arg_match(device, c("cpu", "auto", "cuda", "mps"))
+  if (device == "auto") {
+   device <- guess_brulee_device()
+  }
+
   ## -----------------------------------------------------------------------------
 
   predictors <- processed$predictors
@@ -362,6 +379,7 @@ brulee_logistic_reg_bridge <- function(processed, epochs, optimizer,
       batch_size = batch_size,
       class_weights = class_weights,
       stop_iter = stop_iter,
+      device = device,
       verbose = verbose
     )
 
@@ -423,6 +441,7 @@ logistic_reg_fit_imp <-
            momentum = 0.0,
            class_weights = NULL,
            stop_iter = 5,
+           device = "cpu",
            verbose = FALSE,
            ...) {
 
@@ -475,17 +494,18 @@ logistic_reg_fit_imp <-
 
     ## ---------------------------------------------------------------------------
     # Convert to index sampler and data loader
-    ds <- brulee::matrix_to_dataset(x, y)
+    ds <- brulee::matrix_to_dataset(x, y, device = device)
     dl <- torch::dataloader(ds, batch_size = batch_size)
 
     if (validation > 0) {
-      ds_val <- brulee::matrix_to_dataset(x_val, y_val)
+      ds_val <- brulee::matrix_to_dataset(x_val, y_val, device = device)
       dl_val <- torch::dataloader(ds_val)
     }
 
     ## ---------------------------------------------------------------------------
     # Initialize model and optimizer
     model <- logistic_module(ncol(x), y_dim)
+    model$to(device = device)
     loss_fn <- make_penalized_loss(loss_fn, model, penalty, mixture)
     optimizer_obj <- set_optimizer(optimizer, model, learn_rate, momentum)