From 65d21b6bd34f80ce0f93bff55f98129746b7eeb6 Mon Sep 17 00:00:00 2001
From: topepo <mxkuhn@gmail.com>
Date: Wed, 1 Nov 2023 17:10:08 -0400
Subject: [PATCH 1/6] initial setup for MLP models

---
 R/checks.R       |  6 +++---
 R/convert_data.R |  8 ++++----
 R/device.R       | 16 ++++++++++++++++
 R/mlp-fit.R      | 21 ++++++++++++++++++---
 4 files changed, 41 insertions(+), 10 deletions(-)
 create mode 100644 R/device.R

diff --git a/R/checks.R b/R/checks.R
index e4c6b32..61c2417 100644
--- a/R/checks.R
+++ b/R/checks.R
@@ -177,14 +177,14 @@ check_logical <- function(x, single = TRUE, fn = NULL) {
 }
 
 
-check_class_weights <- function(wts, lvls, xtab, fn) {
+check_class_weights <- function(wts, lvls, xtab, fn, device = NULL) {
   if (length(lvls) == 0) {
     return(NULL)
   }
 
   if (is.null(wts)) {
     wts <- rep(1, length(lvls))
-    return(torch::torch_tensor(wts))
+    return(torch::torch_tensor(wts, device = device))
   }
   if (!is.numeric(wts)) {
     msg <- paste(format_msg(fn, "class_weights"), "to a numeric vector")
@@ -217,5 +217,5 @@ check_class_weights <- function(wts, lvls, xtab, fn) {
   }
 
 
-  torch::torch_tensor(wts)
+  torch::torch_tensor(wts, device = device)
 }
diff --git a/R/convert_data.R b/R/convert_data.R
index 90cc0b8..c5f5e3d 100644
--- a/R/convert_data.R
+++ b/R/convert_data.R
@@ -14,13 +14,13 @@
 #'   matrix_to_dataset(as.matrix(mtcars[, -1]), mtcars$mpg)
 #' }
 #' @export
-matrix_to_dataset <- function(x, y) {
-  x <- torch::torch_tensor(x)
+matrix_to_dataset <- function(x, y, device) {
+  x <- torch::torch_tensor(x, device = device)
   if (is.factor(y)) {
     y <- as.numeric(y)
-    y <- torch::torch_tensor(y, dtype = torch_long())
+    y <- torch::torch_tensor(y, dtype = torch_long(), device = device)
   } else {
-    y <- torch::torch_tensor(y)
+    y <- torch::torch_tensor(y, device = device)
   }
   torch::tensor_dataset(x = x, y = y)
 }
diff --git a/R/device.R b/R/device.R
new file mode 100644
index 0000000..3bbd3c5
--- /dev/null
+++ b/R/device.R
@@ -0,0 +1,16 @@
+#' Determine an appropriate computational device for torch
+#' Uses \pkg{torch} functions to determine if there is a GPU available for use.
+#' @return A character string, one of: `"cpu"`, `"cuda"`, or `"mps"`.
+#' @examplesI
+#' guess_brulee_device()
+#' @export
+guess_brulee_device <- function() {
+ if (torch::backends_mps_is_available()) {
+  dev <- "mps"
+ } else if (torch::cuda_is_available()) {
+  dev <- "cuda"
+ } else {
+  dev <- "cpu"
+ }
+ dev
+}
diff --git a/R/mlp-fit.R b/R/mlp-fit.R
index 3ccbd45..bf3fc57 100644
--- a/R/mlp-fit.R
+++ b/R/mlp-fit.R
@@ -248,6 +248,7 @@ brulee_mlp.data.frame <-
            batch_size = NULL,
            class_weights = NULL,
            stop_iter = 5,
+           device = "cpu",
            verbose = FALSE,
            ...) {
     processed <- hardhat::mold(x, y)
@@ -268,6 +269,7 @@ brulee_mlp.data.frame <-
       batch_size = batch_size,
       class_weights = class_weights,
       stop_iter = stop_iter,
+      device = device,
       verbose = verbose,
       ...
     )
@@ -293,6 +295,7 @@ brulee_mlp.matrix <- function(x,
                               batch_size = NULL,
                               class_weights = NULL,
                               stop_iter = 5,
+                              device = "cpu",
                               verbose = FALSE,
                               ...) {
   processed <- hardhat::mold(x, y)
@@ -313,6 +316,7 @@ brulee_mlp.matrix <- function(x,
     batch_size = batch_size,
     class_weights = class_weights,
     stop_iter = stop_iter,
+    device = device,
     verbose = verbose,
     ...
   )
@@ -339,6 +343,7 @@ brulee_mlp.formula <-
            batch_size = NULL,
            class_weights = NULL,
            stop_iter = 5,
+           device = device,
            verbose = FALSE,
            ...) {
     processed <- hardhat::mold(formula, data)
@@ -359,6 +364,7 @@ brulee_mlp.formula <-
       batch_size = batch_size,
       class_weights = class_weights,
       stop_iter = stop_iter,
+      device = device,
       verbose = verbose,
       ...
     )
@@ -385,6 +391,7 @@ brulee_mlp.recipe <-
            batch_size = NULL,
            class_weights = NULL,
            stop_iter = 5,
+           device = "cpu",
            verbose = FALSE,
            ...) {
     processed <- hardhat::mold(x, data)
@@ -405,6 +412,7 @@ brulee_mlp.recipe <-
       batch_size = batch_size,
       class_weights = class_weights,
       stop_iter = stop_iter,
+      device = device,
       verbose = verbose,
       ...
     )
@@ -416,7 +424,7 @@ brulee_mlp.recipe <-
 brulee_mlp_bridge <- function(processed, epochs, hidden_units, activation,
                               learn_rate, rate_schedule, momentum, penalty,
                               mixture, dropout, class_weights, validation, optimizer,
-                              batch_size, stop_iter, verbose, ...) {
+                              batch_size, stop_iter, device, verbose, ...) {
   if(!torch::torch_is_installed()) {
     rlang::abort("The torch backend has not been installed; use `torch::install_torch()`.")
   }
@@ -457,7 +465,11 @@ brulee_mlp_bridge <- function(processed, epochs, hidden_units, activation,
   check_logical(verbose, single = TRUE, fn = f_nm)
   check_character(activation, single = FALSE, fn = f_nm)
 
+  # ------------------------------------------------------------------------------
 
+  if (is.null(device)) {
+   device <- guess_brulee_device()
+  }
 
   ## -----------------------------------------------------------------------------
 
@@ -483,7 +495,7 @@ brulee_mlp_bridge <- function(processed, epochs, hidden_units, activation,
 
   lvls <- levels(outcome)
   xtab <- table(outcome)
-  class_weights <- check_class_weights(class_weights, lvls, xtab, f_nm)
+  class_weights <- check_class_weights(class_weights, lvls, xtab, f_nm, device = device)
 
   ## -----------------------------------------------------------------------------
 
@@ -505,6 +517,7 @@ brulee_mlp_bridge <- function(processed, epochs, hidden_units, activation,
       batch_size = batch_size,
       class_weights = class_weights,
       stop_iter = stop_iter,
+      device = device,
       verbose = verbose,
       ...
     )
@@ -577,6 +590,7 @@ mlp_fit_imp <-
            activation = "relu",
            class_weights = NULL,
            stop_iter = 5,
+           device = "cpu",
            verbose = FALSE,
            ...) {
 
@@ -643,7 +657,7 @@ mlp_fit_imp <-
 
     ## ---------------------------------------------------------------------------
     # Convert to index sampler and data loader
-    ds <- brulee::matrix_to_dataset(x, y)
+    ds <- brulee::matrix_to_dataset(x, y, device = device)
     dl <- torch::dataloader(ds, batch_size = batch_size)
 
     if (validation > 0) {
@@ -654,6 +668,7 @@ mlp_fit_imp <-
     ## ---------------------------------------------------------------------------
     # Initialize model and optimizer
     model <- mlp_module(ncol(x), hidden_units, activation, dropout, y_dim)
+    model$to(device = device)
     loss_fn <- make_penalized_loss(loss_fn, model, penalty, mixture)
 
     # Set the optimizer

From fe3c827dbc1cba31fedfa91d19a24f7449c36e62 Mon Sep 17 00:00:00 2001
From: topepo <mxkuhn@gmail.com>
Date: Thu, 2 Nov 2023 07:29:34 -0400
Subject: [PATCH 2/6] initial support for GPU for #55

---
 NAMESPACE                  |  1 +
 R/convert_data.R           |  2 +-
 R/device.R                 |  3 ++-
 R/mlp-fit.R                |  9 +++++++--
 man/brulee_mlp.Rd          |  7 +++++++
 man/guess_brulee_device.Rd | 17 +++++++++++++++++
 man/matrix_to_dataset.Rd   |  2 +-
 7 files changed, 36 insertions(+), 5 deletions(-)
 create mode 100644 man/guess_brulee_device.Rd

diff --git a/NAMESPACE b/NAMESPACE
index e28f00d..52107df 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -47,6 +47,7 @@ export(brulee_logistic_reg)
 export(brulee_mlp)
 export(brulee_multinomial_reg)
 export(coef)
+export(guess_brulee_device)
 export(matrix_to_dataset)
 export(schedule_cyclic)
 export(schedule_decay_expo)
diff --git a/R/convert_data.R b/R/convert_data.R
index c5f5e3d..f4434e2 100644
--- a/R/convert_data.R
+++ b/R/convert_data.R
@@ -14,7 +14,7 @@
 #'   matrix_to_dataset(as.matrix(mtcars[, -1]), mtcars$mpg)
 #' }
 #' @export
-matrix_to_dataset <- function(x, y, device) {
+matrix_to_dataset <- function(x, y, device = "cpu") {
   x <- torch::torch_tensor(x, device = device)
   if (is.factor(y)) {
     y <- as.numeric(y)
diff --git a/R/device.R b/R/device.R
index 3bbd3c5..47a9c8a 100644
--- a/R/device.R
+++ b/R/device.R
@@ -1,7 +1,8 @@
 #' Determine an appropriate computational device for torch
+#'
 #' Uses \pkg{torch} functions to determine if there is a GPU available for use.
 #' @return A character string, one of: `"cpu"`, `"cuda"`, or `"mps"`.
-#' @examplesI
+#' @examples
 #' guess_brulee_device()
 #' @export
 guess_brulee_device <- function() {
diff --git a/R/mlp-fit.R b/R/mlp-fit.R
index bf3fc57..f6b8d7d 100644
--- a/R/mlp-fit.R
+++ b/R/mlp-fit.R
@@ -65,6 +65,9 @@
 #' @param stop_iter A non-negative integer for how many iterations with no
 #' improvement before stopping.
 #' @param verbose A logical that prints out the iteration history.
+#' @param device A character string or `NULL` (if you want it to guess). Possible
+#' values are `"cpu"`, `"cuda"`, `"mps"`, `"auto"`. The last value uses
+#' [guess_brulee_device()].
 #' @param ... Options to pass to the learning rate schedulers via
 #' [set_learn_rate()]. For example, the `reduction` or `steps` arguments to
 #' [schedule_step()] could be passed here.
@@ -465,9 +468,11 @@ brulee_mlp_bridge <- function(processed, epochs, hidden_units, activation,
   check_logical(verbose, single = TRUE, fn = f_nm)
   check_character(activation, single = FALSE, fn = f_nm)
 
+
   # ------------------------------------------------------------------------------
 
-  if (is.null(device)) {
+  device <- rlang::arg_match(device, c("cpu", "auto", "cuda", "mps"))
+  if (device == "auto") {
    device <- guess_brulee_device()
   }
 
@@ -661,7 +666,7 @@ mlp_fit_imp <-
     dl <- torch::dataloader(ds, batch_size = batch_size)
 
     if (validation > 0) {
-      ds_val <- brulee::matrix_to_dataset(x_val, y_val)
+      ds_val <- brulee::matrix_to_dataset(x_val, y_val, device = device)
       dl_val <- torch::dataloader(ds_val)
     }
 
diff --git a/man/brulee_mlp.Rd b/man/brulee_mlp.Rd
index d73a870..26d37c3 100644
--- a/man/brulee_mlp.Rd
+++ b/man/brulee_mlp.Rd
@@ -30,6 +30,7 @@ brulee_mlp(x, ...)
   batch_size = NULL,
   class_weights = NULL,
   stop_iter = 5,
+  device = "cpu",
   verbose = FALSE,
   ...
 )
@@ -51,6 +52,7 @@ brulee_mlp(x, ...)
   batch_size = NULL,
   class_weights = NULL,
   stop_iter = 5,
+  device = "cpu",
   verbose = FALSE,
   ...
 )
@@ -72,6 +74,7 @@ brulee_mlp(x, ...)
   batch_size = NULL,
   class_weights = NULL,
   stop_iter = 5,
+  device = device,
   verbose = FALSE,
   ...
 )
@@ -93,6 +96,7 @@ brulee_mlp(x, ...)
   batch_size = NULL,
   class_weights = NULL,
   stop_iter = 5,
+  device = "cpu",
   verbose = FALSE,
   ...
 )
@@ -174,6 +178,9 @@ and all other classes receive a weight of one.
 \item{stop_iter}{A non-negative integer for how many iterations with no
 improvement before stopping.}
 
+\item{device}{A character string or \code{NULL} (if you want it to guess). Possible
+values are \code{"cpu"}, \code{"cuda"}, or \code{"mps"}. See \code{\link[=guess_brulee_device]{guess_brulee_device()}}.}
+
 \item{verbose}{A logical that prints out the iteration history.}
 
 \item{formula}{A formula specifying the outcome term(s) on the left-hand side,
diff --git a/man/guess_brulee_device.Rd b/man/guess_brulee_device.Rd
new file mode 100644
index 0000000..2e7d9ef
--- /dev/null
+++ b/man/guess_brulee_device.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/device.R
+\name{guess_brulee_device}
+\alias{guess_brulee_device}
+\title{Determine an appropriate computational device for torch}
+\usage{
+guess_brulee_device()
+}
+\value{
+A character string, one of: \code{"cpu"}, \code{"cuda"}, or \code{"mps"}.
+}
+\description{
+Uses \pkg{torch} functions to determine if there is a GPU available for use.
+}
+\examples{
+guess_brulee_device()
+}
diff --git a/man/matrix_to_dataset.Rd b/man/matrix_to_dataset.Rd
index 7fb2fdf..6350b30 100644
--- a/man/matrix_to_dataset.Rd
+++ b/man/matrix_to_dataset.Rd
@@ -4,7 +4,7 @@
 \alias{matrix_to_dataset}
 \title{Convert data to torch format}
 \usage{
-matrix_to_dataset(x, y)
+matrix_to_dataset(x, y, device)
 }
 \arguments{
 \item{x}{A numeric matrix of predictors.}

From b3b836f4d00e825bc4d21eb389999a0f9bb10077 Mon Sep 17 00:00:00 2001
From: topepo <mxkuhn@gmail.com>
Date: Thu, 2 Nov 2023 08:14:53 -0400
Subject: [PATCH 3/6] linear and logistic regression

---
 R/linear_reg-fit.R         | 25 ++++++++++++++++++++++---
 R/logistic_reg-fit.R       | 24 +++++++++++++++++++++---
 R/mlp-fit.R                |  7 +++----
 man/brulee_linear_reg.Rd   |  8 ++++++++
 man/brulee_logistic_reg.Rd |  8 ++++++++
 man/brulee_mlp.Rd          |  5 +++--
 man/matrix_to_dataset.Rd   |  2 +-
 7 files changed, 66 insertions(+), 13 deletions(-)

diff --git a/R/linear_reg-fit.R b/R/linear_reg-fit.R
index 39af2f1..628724b 100644
--- a/R/linear_reg-fit.R
+++ b/R/linear_reg-fit.R
@@ -161,6 +161,7 @@ brulee_linear_reg.data.frame <-
            momentum = 0.0,
            batch_size = NULL,
            stop_iter = 5,
+           device = "cpu",
            verbose = FALSE,
            ...) {
     processed <- hardhat::mold(x, y)
@@ -176,6 +177,7 @@ brulee_linear_reg.data.frame <-
       momentum = momentum,
       batch_size = batch_size,
       stop_iter = stop_iter,
+      device = device,
       verbose = verbose,
       ...
     )
@@ -196,6 +198,7 @@ brulee_linear_reg.matrix <- function(x,
                                      momentum = 0.0,
                                      batch_size = NULL,
                                      stop_iter = 5,
+                                     device = "cpu",
                                      verbose = FALSE,
                                      ...) {
   processed <- hardhat::mold(x, y)
@@ -211,6 +214,7 @@ brulee_linear_reg.matrix <- function(x,
     validation = validation,
     batch_size = batch_size,
     stop_iter = stop_iter,
+    device = device,
     verbose = verbose,
     ...
   )
@@ -232,6 +236,7 @@ brulee_linear_reg.formula <-
            momentum = 0.0,
            batch_size = NULL,
            stop_iter = 5,
+           device = "cpu",
            verbose = FALSE,
            ...) {
     processed <- hardhat::mold(formula, data)
@@ -247,6 +252,7 @@ brulee_linear_reg.formula <-
       validation = validation,
       batch_size = batch_size,
       stop_iter = stop_iter,
+      device = device,
       verbose = verbose,
       ...
     )
@@ -268,6 +274,7 @@ brulee_linear_reg.recipe <-
            momentum = 0.0,
            batch_size = NULL,
            stop_iter = 5,
+           device = "cpu",
            verbose = FALSE,
            ...) {
     processed <- hardhat::mold(x, data)
@@ -283,6 +290,7 @@ brulee_linear_reg.recipe <-
       validation = validation,
       batch_size = batch_size,
       stop_iter = stop_iter,
+      device = device,
       verbose = verbose,
       ...
     )
@@ -293,7 +301,8 @@ brulee_linear_reg.recipe <-
 
 brulee_linear_reg_bridge <- function(processed, epochs, optimizer,
                                      learn_rate, momentum, penalty, mixture, dropout,
-                                     validation, batch_size, stop_iter, verbose, ...) {
+                                     validation, batch_size, stop_iter, device,
+                                     verbose, ...) {
   if(!torch::torch_is_installed()) {
     rlang::abort("The torch backend has not been installed; use `torch::install_torch()`.")
   }
@@ -318,6 +327,13 @@ brulee_linear_reg_bridge <- function(processed, epochs, optimizer,
   check_double(learn_rate, single = TRUE, 0, incl = c(FALSE, TRUE), fn = f_nm)
   check_logical(verbose, single = TRUE, fn = f_nm)
 
+  # ------------------------------------------------------------------------------
+
+  device <- rlang::arg_match(device, c("cpu", "auto", "cuda", "mps"))
+  if (device == "auto") {
+   device <- guess_brulee_device()
+  }
+
   ## -----------------------------------------------------------------------------
 
   predictors <- processed$predictors
@@ -353,6 +369,7 @@ brulee_linear_reg_bridge <- function(processed, epochs, optimizer,
       validation = validation,
       batch_size = batch_size,
       stop_iter = stop_iter,
+      device = device,
       verbose = verbose
     )
 
@@ -413,6 +430,7 @@ linear_reg_fit_imp <-
            learn_rate = 1,
            momentum = 0.0,
            stop_iter = 5,
+           device = "cpu",
            verbose = FALSE,
            ...) {
 
@@ -460,17 +478,18 @@ linear_reg_fit_imp <-
 
     ## ---------------------------------------------------------------------------
     # Convert to index sampler and data loader
-    ds <- brulee::matrix_to_dataset(x, y)
+    ds <- brulee::matrix_to_dataset(x, y, device = device)
     dl <- torch::dataloader(ds, batch_size = batch_size)
 
     if (validation > 0) {
-      ds_val <- brulee::matrix_to_dataset(x_val, y_val)
+      ds_val <- brulee::matrix_to_dataset(x_val, y_val, device = device)
       dl_val <- torch::dataloader(ds_val)
     }
 
     ## ---------------------------------------------------------------------------
     # Initialize model and optimizer
     model <- linear_reg_module(ncol(x))
+    model$to(device = device)
     loss_fn <- make_penalized_loss(loss_fn, model, penalty, mixture)
 
     # Write a optim wrapper
diff --git a/R/logistic_reg-fit.R b/R/logistic_reg-fit.R
index 859bd36..e88dcc4 100644
--- a/R/logistic_reg-fit.R
+++ b/R/logistic_reg-fit.R
@@ -149,6 +149,7 @@ brulee_logistic_reg.data.frame <-
            batch_size = NULL,
            class_weights = NULL,
            stop_iter = 5,
+           device = "cpu",
            verbose = FALSE,
            ...) {
     processed <- hardhat::mold(x, y)
@@ -165,6 +166,7 @@ brulee_logistic_reg.data.frame <-
       batch_size = batch_size,
       class_weights = class_weights,
       stop_iter = stop_iter,
+      device = device,
       verbose = verbose,
       ...
     )
@@ -186,6 +188,7 @@ brulee_logistic_reg.matrix <- function(x,
                                        batch_size = NULL,
                                        class_weights = NULL,
                                        stop_iter = 5,
+                                       device = "cpu",
                                        verbose = FALSE,
                                        ...) {
   processed <- hardhat::mold(x, y)
@@ -202,6 +205,7 @@ brulee_logistic_reg.matrix <- function(x,
     batch_size = batch_size,
     class_weights = class_weights,
     stop_iter = stop_iter,
+    device = device,
     verbose = verbose,
     ...
   )
@@ -224,6 +228,7 @@ brulee_logistic_reg.formula <-
            batch_size = NULL,
            class_weights = NULL,
            stop_iter = 5,
+           device = "cpu",
            verbose = FALSE,
            ...) {
     processed <- hardhat::mold(formula, data)
@@ -240,6 +245,7 @@ brulee_logistic_reg.formula <-
       batch_size = batch_size,
       class_weights = class_weights,
       stop_iter = stop_iter,
+      device = device,
       verbose = verbose,
       ...
     )
@@ -262,6 +268,7 @@ brulee_logistic_reg.recipe <-
            batch_size = NULL,
            class_weights = NULL,
            stop_iter = 5,
+           device = "cpu",
            verbose = FALSE,
            ...) {
     processed <- hardhat::mold(x, data)
@@ -278,6 +285,7 @@ brulee_logistic_reg.recipe <-
       batch_size = batch_size,
       class_weights = class_weights,
       stop_iter = stop_iter,
+      device = device,
       verbose = verbose,
       ...
     )
@@ -288,7 +296,7 @@ brulee_logistic_reg.recipe <-
 
 brulee_logistic_reg_bridge <- function(processed, epochs, optimizer,
                                        learn_rate, momentum, penalty, mixture, class_weights,
-                                       validation, batch_size, stop_iter, verbose, ...) {
+                                       validation, batch_size, stop_iter, device, verbose, ...) {
   if(!torch::torch_is_installed()) {
     rlang::abort("The torch backend has not been installed; use `torch::install_torch()`.")
   }
@@ -312,6 +320,13 @@ brulee_logistic_reg_bridge <- function(processed, epochs, optimizer,
   check_double(learn_rate, single = TRUE, 0, incl = c(FALSE, TRUE), fn = f_nm)
   check_logical(verbose, single = TRUE, fn = f_nm)
 
+  # ------------------------------------------------------------------------------
+
+  device <- rlang::arg_match(device, c("cpu", "auto", "cuda", "mps"))
+  if (device == "auto") {
+   device <- guess_brulee_device()
+  }
+
   ## -----------------------------------------------------------------------------
 
   predictors <- processed$predictors
@@ -362,6 +377,7 @@ brulee_logistic_reg_bridge <- function(processed, epochs, optimizer,
       batch_size = batch_size,
       class_weights = class_weights,
       stop_iter = stop_iter,
+      device = device,
       verbose = verbose
     )
 
@@ -423,6 +439,7 @@ logistic_reg_fit_imp <-
            momentum = 0.0,
            class_weights = NULL,
            stop_iter = 5,
+           device = "cpu",
            verbose = FALSE,
            ...) {
 
@@ -471,17 +488,18 @@ logistic_reg_fit_imp <-
 
     ## ---------------------------------------------------------------------------
     # Convert to index sampler and data loader
-    ds <- brulee::matrix_to_dataset(x, y)
+    ds <- brulee::matrix_to_dataset(x, y, device = device)
     dl <- torch::dataloader(ds, batch_size = batch_size)
 
     if (validation > 0) {
-      ds_val <- brulee::matrix_to_dataset(x_val, y_val)
+      ds_val <- brulee::matrix_to_dataset(x_val, y_val, device = device)
       dl_val <- torch::dataloader(ds_val)
     }
 
     ## ---------------------------------------------------------------------------
     # Initialize model and optimizer
     model <- logistic_module(ncol(x), y_dim)
+    model$to(device = device)
     loss_fn <- make_penalized_loss(loss_fn, model, penalty, mixture)
 
     # Write a optim wrapper
diff --git a/R/mlp-fit.R b/R/mlp-fit.R
index f6b8d7d..454143f 100644
--- a/R/mlp-fit.R
+++ b/R/mlp-fit.R
@@ -65,9 +65,9 @@
 #' @param stop_iter A non-negative integer for how many iterations with no
 #' improvement before stopping.
 #' @param verbose A logical that prints out the iteration history.
-#' @param device A character string or `NULL` (if you want it to guess). Possible
-#' values are `"cpu"`, `"cuda"`, `"mps"`, `"auto"`. The last value uses
-#' [guess_brulee_device()].
+#' @param device A character string to denote which processor to use with
+#' possibles values: `"cpu"`, `"cuda"`, `"mps"`, and `"auto"`. The last value
+#' uses [guess_brulee_device()] to make the determination.
 #' @param ... Options to pass to the learning rate schedulers via
 #' [set_learn_rate()]. For example, the `reduction` or `steps` arguments to
 #' [schedule_step()] could be passed here.
@@ -468,7 +468,6 @@ brulee_mlp_bridge <- function(processed, epochs, hidden_units, activation,
   check_logical(verbose, single = TRUE, fn = f_nm)
   check_character(activation, single = FALSE, fn = f_nm)
 
-
   # ------------------------------------------------------------------------------
 
   device <- rlang::arg_match(device, c("cpu", "auto", "cuda", "mps"))
diff --git a/man/brulee_linear_reg.Rd b/man/brulee_linear_reg.Rd
index bef2b6f..5324b20 100644
--- a/man/brulee_linear_reg.Rd
+++ b/man/brulee_linear_reg.Rd
@@ -25,6 +25,7 @@ brulee_linear_reg(x, ...)
   momentum = 0,
   batch_size = NULL,
   stop_iter = 5,
+  device = "cpu",
   verbose = FALSE,
   ...
 )
@@ -41,6 +42,7 @@ brulee_linear_reg(x, ...)
   momentum = 0,
   batch_size = NULL,
   stop_iter = 5,
+  device = "cpu",
   verbose = FALSE,
   ...
 )
@@ -57,6 +59,7 @@ brulee_linear_reg(x, ...)
   momentum = 0,
   batch_size = NULL,
   stop_iter = 5,
+  device = "cpu",
   verbose = FALSE,
   ...
 )
@@ -73,6 +76,7 @@ brulee_linear_reg(x, ...)
   momentum = 0,
   batch_size = NULL,
   stop_iter = 5,
+  device = "cpu",
   verbose = FALSE,
   ...
 )
@@ -127,6 +131,10 @@ batch. (\code{optimizer = "SGD"} only)}
 \item{stop_iter}{A non-negative integer for how many iterations with no
 improvement before stopping.}
 
+\item{device}{A character string to denote which processor to use with
+possibles values: \code{"cpu"}, \code{"cuda"}, \code{"mps"}, and \code{"auto"}. The last value
+uses \code{\link[=guess_brulee_device]{guess_brulee_device()}} to make the determination.}
+
 \item{verbose}{A logical that prints out the iteration history.}
 
 \item{formula}{A formula specifying the outcome term(s) on the left-hand side,
diff --git a/man/brulee_logistic_reg.Rd b/man/brulee_logistic_reg.Rd
index 315dbc7..728d6c7 100644
--- a/man/brulee_logistic_reg.Rd
+++ b/man/brulee_logistic_reg.Rd
@@ -26,6 +26,7 @@ brulee_logistic_reg(x, ...)
   batch_size = NULL,
   class_weights = NULL,
   stop_iter = 5,
+  device = "cpu",
   verbose = FALSE,
   ...
 )
@@ -43,6 +44,7 @@ brulee_logistic_reg(x, ...)
   batch_size = NULL,
   class_weights = NULL,
   stop_iter = 5,
+  device = "cpu",
   verbose = FALSE,
   ...
 )
@@ -60,6 +62,7 @@ brulee_logistic_reg(x, ...)
   batch_size = NULL,
   class_weights = NULL,
   stop_iter = 5,
+  device = "cpu",
   verbose = FALSE,
   ...
 )
@@ -77,6 +80,7 @@ brulee_logistic_reg(x, ...)
   batch_size = NULL,
   class_weights = NULL,
   stop_iter = 5,
+  device = "cpu",
   verbose = FALSE,
   ...
 )
@@ -142,6 +146,10 @@ and all other classes receive a weight of one.
 \item{stop_iter}{A non-negative integer for how many iterations with no
 improvement before stopping.}
 
+\item{device}{A character string to denote which processor to use with
+possibles values: \code{"cpu"}, \code{"cuda"}, \code{"mps"}, and \code{"auto"}. The last value
+uses \code{\link[=guess_brulee_device]{guess_brulee_device()}} to make the determination.}
+
 \item{verbose}{A logical that prints out the iteration history.}
 
 \item{formula}{A formula specifying the outcome term(s) on the left-hand side,
diff --git a/man/brulee_mlp.Rd b/man/brulee_mlp.Rd
index 26d37c3..f14ed0c 100644
--- a/man/brulee_mlp.Rd
+++ b/man/brulee_mlp.Rd
@@ -178,8 +178,9 @@ and all other classes receive a weight of one.
 \item{stop_iter}{A non-negative integer for how many iterations with no
 improvement before stopping.}
 
-\item{device}{A character string or \code{NULL} (if you want it to guess). Possible
-values are \code{"cpu"}, \code{"cuda"}, or \code{"mps"}. See \code{\link[=guess_brulee_device]{guess_brulee_device()}}.}
+\item{device}{A character string to denote which processor to use with
+possibles values: \code{"cpu"}, \code{"cuda"}, \code{"mps"}, and \code{"auto"}. The last value
+uses \code{\link[=guess_brulee_device]{guess_brulee_device()}} to make the determination.}
 
 \item{verbose}{A logical that prints out the iteration history.}
 
diff --git a/man/matrix_to_dataset.Rd b/man/matrix_to_dataset.Rd
index 6350b30..b8b2dc1 100644
--- a/man/matrix_to_dataset.Rd
+++ b/man/matrix_to_dataset.Rd
@@ -4,7 +4,7 @@
 \alias{matrix_to_dataset}
 \title{Convert data to torch format}
 \usage{
-matrix_to_dataset(x, y, device)
+matrix_to_dataset(x, y, device = "cpu")
 }
 \arguments{
 \item{x}{A numeric matrix of predictors.}

From b00f68a21cdbfb34411fa1d004ace7eb1440aee5 Mon Sep 17 00:00:00 2001
From: topepo <mxkuhn@gmail.com>
Date: Thu, 2 Nov 2023 10:26:51 -0400
Subject: [PATCH 4/6] added docs

---
 R/linear_reg-fit.R         | 2 ++
 R/logistic_reg-fit.R       | 2 ++
 R/mlp-fit.R                | 3 +++
 man/brulee_linear_reg.Rd   | 3 +++
 man/brulee_logistic_reg.Rd | 3 +++
 man/brulee_mlp.Rd          | 3 +++
 6 files changed, 16 insertions(+)

diff --git a/R/linear_reg-fit.R b/R/linear_reg-fit.R
index 628724b..4e746ad 100644
--- a/R/linear_reg-fit.R
+++ b/R/linear_reg-fit.R
@@ -53,6 +53,8 @@
 #' The zeroing out of parameters is a specific feature the optimization method
 #' used in those packages.
 #'
+#' If GPU computing is requested via the `device` argument, note that torch
+#' can't set the random number seeds in the GPU.
 #' @seealso [predict.brulee_linear_reg()], [coef.brulee_linear_reg()],
 #' [autoplot.brulee_linear_reg()]
 #'
diff --git a/R/logistic_reg-fit.R b/R/logistic_reg-fit.R
index e88dcc4..4a76a67 100644
--- a/R/logistic_reg-fit.R
+++ b/R/logistic_reg-fit.R
@@ -57,6 +57,8 @@
 #' The zeroing out of parameters is a specific feature the optimization method
 #' used in those packages.
 #'
+#' If GPU computing is requested via the `device` argument, note that torch
+#' can't set the random number seeds in the GPU.
 #' @seealso [predict.brulee_logistic_reg()], [coef.brulee_logistic_reg()],
 #' [autoplot.brulee_logistic_reg()]
 #'
diff --git a/R/mlp-fit.R b/R/mlp-fit.R
index 454143f..04942b2 100644
--- a/R/mlp-fit.R
+++ b/R/mlp-fit.R
@@ -102,6 +102,9 @@
 #' The zeroing out of parameters is a specific feature the optimization method
 #' used in those packages.
 #'
+#' If GPU computing is requested via the `device` argument, note that torch
+#' can't set the random number seeds in the GPU.
+#'
 #' ## Learning Rates
 #'
 #' The learning rate can be set to constant (the default) or dynamically set
diff --git a/man/brulee_linear_reg.Rd b/man/brulee_linear_reg.Rd
index 5324b20..c951dcf 100644
--- a/man/brulee_linear_reg.Rd
+++ b/man/brulee_linear_reg.Rd
@@ -189,6 +189,9 @@ The use of the L1 penalty (a.k.a. the lasso penalty) does \emph{not} force
 parameters to be strictly zero (as it does in packages such as \pkg{glmnet}).
 The zeroing out of parameters is a specific feature the optimization method
 used in those packages.
+
+If GPU computing is requested via the \code{device} argument, note that torch
+can't set the random number seeds in the GPU.
 }
 \examples{
 \donttest{
diff --git a/man/brulee_logistic_reg.Rd b/man/brulee_logistic_reg.Rd
index 728d6c7..57ccb6d 100644
--- a/man/brulee_logistic_reg.Rd
+++ b/man/brulee_logistic_reg.Rd
@@ -200,6 +200,9 @@ The use of the L1 penalty (a.k.a. the lasso penalty) does \emph{not} force
 parameters to be strictly zero (as it does in packages such as \pkg{glmnet}).
 The zeroing out of parameters is a specific feature the optimization method
 used in those packages.
+
+If GPU computing is requested via the \code{device} argument, note that torch
+can't set the random number seeds in the GPU.
 }
 \examples{
 \donttest{
diff --git a/man/brulee_mlp.Rd b/man/brulee_mlp.Rd
index f14ed0c..cecfd07 100644
--- a/man/brulee_mlp.Rd
+++ b/man/brulee_mlp.Rd
@@ -239,6 +239,9 @@ The use of the L1 penalty (a.k.a. the lasso penalty) does \emph{not} force
 parameters to be strictly zero (as it does in packages such as \pkg{glmnet}).
 The zeroing out of parameters is a specific feature the optimization method
 used in those packages.
+
+If GPU computing is requested via the \code{device} argument, note that torch
+can't set the random number seeds in the GPU.
 \subsection{Learning Rates}{
 
 The learning rate can be set to constant (the default) or dynamically set

From 69f7628b3129daef51634b84193459befccaef95 Mon Sep 17 00:00:00 2001
From: Max Kuhn <mxkuhn@gmail.com>
Date: Sun, 5 Nov 2023 11:41:10 -0500
Subject: [PATCH 5/6] gpu tests

---
 tests/testthat/test-gpu.R | 67 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)
 create mode 100644 tests/testthat/test-gpu.R

diff --git a/tests/testthat/test-gpu.R b/tests/testthat/test-gpu.R
new file mode 100644
index 0000000..975f8ff
--- /dev/null
+++ b/tests/testthat/test-gpu.R
@@ -0,0 +1,67 @@
+test_that('device type - mac ARM', {
+
+ skip_if_not(torch::torch_is_installed())
+ skip_on_os(c("windows", "linux", "solaris"))
+ skip_on_os("mac", arch = "x86_64")
+ expect_snapshot(guess_brulee_device())
+})
+
+test_that('device type - cpu', {
+
+ skip_if_not(torch::torch_is_installed())
+ skip_on_os("mac", arch = "aarch64")
+ expect_snapshot(guess_brulee_device())
+})
+
+
+test_that('linear regression on gpu', {
+
+ skip_if_not(torch::torch_is_installed())
+ skip_on_os(c("windows", "linux", "solaris"))
+ skip_on_os("mac", arch = "x86_64")
+
+ skip_if_not_installed("modeldata")
+
+ set.seed(591)
+ tr <- sim_regression(1000)
+
+ expect_error(
+  fit <- brulee_linear_reg(outcome ~ ., data = tr, device = "mps"),
+  regex = NA
+ )
+})
+
+test_that('logistic regression on gpu', {
+
+ skip_if_not(torch::torch_is_installed())
+ skip_on_os(c("windows", "linux", "solaris"))
+ skip_on_os("mac", arch = "x86_64")
+
+ skip_if_not_installed("modeldata")
+
+ set.seed(591)
+ tr <- sim_classification(1000)
+
+ expect_error(
+  fit <- brulee_logistic_reg(class ~ ., data = tr, device = "mps"),
+  regex = NA
+ )
+})
+
+test_that('mlp on gpu', {
+
+ skip_if_not(torch::torch_is_installed())
+ skip_on_os(c("windows", "linux", "solaris"))
+ skip_on_os("mac", arch = "x86_64")
+
+ skip_if_not_installed("modeldata")
+
+ set.seed(591)
+ tr <- sim_regression(1000)
+
+ expect_error(
+  fit <- brulee_mlp(outcome ~ ., data = tr, device = "mps"),
+  regex = NA
+ )
+})
+

From b46122beeb1fd80bb5aa76a930b2c5fc0e80f45d Mon Sep 17 00:00:00 2001
From: Max Kuhn <mxkuhn@gmail.com>
Date: Sun, 5 Nov 2023 12:35:11 -0500
Subject: [PATCH 6/6] documentation updates

---
 NEWS.md                   |  6 ++++++
 R/activation.R            |  7 +++++++
 R/convert_data.R          |  3 +++
 R/linear_reg-fit.R        |  5 ++---
 README.Rmd                | 16 ++++++++++++----
 README.md                 | 20 ++++++++++++++------
 inst/WORDLIST             |  3 ++-
 man/brulee_activations.Rd |  9 +++++++++
 man/brulee_linear_reg.Rd  |  5 ++---
 man/brulee_mlp.Rd         |  2 +-
 man/matrix_to_dataset.Rd  |  4 ++++
 11 files changed, 62 insertions(+), 18 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 6d4cf27..2d259e0 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -2,6 +2,12 @@
 
 * Fixed a bug where SGD always being used as the optimizer (#61). 
 
+* Added many more activation functions for `brulee_mlp()` (#74).
+
+* Enabled GPUs for computations (#).
+
+* Rewrote the unit test suite due to irreproducibility issues across operating system (#75).
+
 # brulee 0.2.0
 
 * Several learning rate schedulers were added to the modeling functions (#12).
diff --git a/R/activation.R b/R/activation.R
index c650af8..8bdf561 100644
--- a/R/activation.R
+++ b/R/activation.R
@@ -7,6 +7,13 @@ allowed_activation <-
 #' Activation functions for neural networks in brulee
 #'
 #' @return A character vector of values.
+#' @seealso [torch::nn_celu()], [torch::nn_elu()], [torch::nn_gelu()],
+#' [torch::nn_hardshrink()], [torch::nn_hardsigmoid()], [torch::nn_hardtanh()],
+#' [torch::nn_leaky_relu()], [torch::nn_identity()], [torch::nn_log_sigmoid()],
+#' [torch::nn_relu()], [torch::nn_relu6()], [torch::nn_rrelu()], [torch::nn_selu()],
+#' [torch::nn_sigmoid()], [torch::nn_silu()], [torch::nn_softplus()],
+#' [torch::nn_softshrink()], [torch::nn_softsign()], [torch::nn_tanh()],
+#' [torch::nn_tanhshrink()]
 #' @export
 brulee_activations <- function() {
  allowed_activation
diff --git a/R/convert_data.R b/R/convert_data.R
index 1b38709..35536a5 100644
--- a/R/convert_data.R
+++ b/R/convert_data.R
@@ -6,6 +6,9 @@
 #' @param x A numeric matrix of predictors.
 #' @param y A vector. If regression than `y` is numeric. For classification, it
 #'  is a factor.
+#' @param device A character string to denote which processor to use with
+#' possibles values: `"cpu"`, `"cuda"`, `"mps"`, and `"auto"`. The last value
+#' uses [guess_brulee_device()] to make the determination.
 #' @return An R6 index sampler object with classes "training_set",
 #'  "dataset", and "R6".
 #' @details Missing values should be removed before passing data to this function.
diff --git a/R/linear_reg-fit.R b/R/linear_reg-fit.R
index 6bcaa04..38b85e9 100644
--- a/R/linear_reg-fit.R
+++ b/R/linear_reg-fit.R
@@ -91,7 +91,7 @@
 #'  set.seed(1)
 #'  brulee_linear_reg(x = as.matrix(ames_train[, c("Longitude", "Latitude")]),
 #'                     y = ames_train$Sale_Price,
-#'                     penalty = 0.10, epochs = 1, batch_size = 64)
+#'                     penalty = 0.10, epochs = 10)
 #'
 #'  # Using recipe
 #'  library(recipes)
@@ -112,8 +112,7 @@
 #'     step_normalize(all_numeric_predictors())
 #'
 #'  set.seed(2)
-#'  fit <- brulee_linear_reg(ames_rec, data = ames_train,
-#'                            epochs = 5, batch_size = 32)
+#'  fit <- brulee_linear_reg(ames_rec, data = ames_train, epochs = 5)
 #'  fit
 #'
 #'  autoplot(fit)
diff --git a/README.Rmd b/README.Rmd
index 9acf4e6..530834d 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -28,6 +28,14 @@ The R `brulee` package contains several basic modeling functions that use the `t
 * [logistic regression](https://brulee.tidymodels.org/reference/brulee_logistic_reg.html)
 * [multinomial regression](https://brulee.tidymodels.org/reference/brulee_multinomial_reg.html)
 
+Some interesting features: 
+
+ * Early stopping based on an internal validation set. 
+ * L1 and L2 penalization, also dropout. 
+ * GPU utilization for models. 
+ * Class-specific weights for cost-sensitive learning. 
+ * Data frame, matrix, formula, and recipe interfaces for models. 
+
 
 ## Installation
 
@@ -59,8 +67,9 @@ library(yardstick)
 
 data(bivariate, package = "modeldata")
 set.seed(20)
-nn_log_biv <- brulee_mlp(Class ~ log(A) + log(B), data = bivariate_train, 
-                         epochs = 150, hidden_units = 3)
+nn_log_biv <- brulee_mlp(Class ~ log(A) + log(B), 
+                         data = bivariate_train, 
+                         hidden_units = 3)
 
 # We use the tidymodels semantics to always return a tibble when predicting
 predict(nn_log_biv, bivariate_test, type = "prob") %>% 
@@ -80,9 +89,8 @@ rec <-
 
 set.seed(20)
 nn_rec_biv <- brulee_mlp(rec, data = bivariate_train, 
-                         epochs = 150, hidden_units = 3)
+                         hidden_units = 3)
 
-# A little better
 predict(nn_rec_biv, bivariate_test, type = "prob") %>% 
   bind_cols(bivariate_test) %>% 
   roc_auc(Class, .pred_One)
diff --git a/README.md b/README.md
index f4d8736..d89cebd 100644
--- a/README.md
+++ b/README.md
@@ -24,6 +24,14 @@ use the `torch` package infrastructure, such as:
 - [multinomial
   regression](https://brulee.tidymodels.org/reference/brulee_multinomial_reg.html)
 
+Some interesting features:
+
+- Early stopping based on an internal validation set.
+- L1 and L2 penalization, also dropout.
+- GPU utilization for models.
+- Class-specific weights for cost-sensitive learning.
+- Data frame, matrix, formula, and recipe interfaces for models.
+
 ## Installation
 
 You can install the released version of brulee from
@@ -53,8 +61,9 @@ library(yardstick)
 
 data(bivariate, package = "modeldata")
 set.seed(20)
-nn_log_biv <- brulee_mlp(Class ~ log(A) + log(B), data = bivariate_train, 
-                         epochs = 150, hidden_units = 3)
+nn_log_biv <- brulee_mlp(Class ~ log(A) + log(B), 
+                         data = bivariate_train, 
+                         hidden_units = 3)
 
 # We use the tidymodels semantics to always return a tibble when predicting
 predict(nn_log_biv, bivariate_test, type = "prob") %>% 
@@ -63,7 +72,7 @@ predict(nn_log_biv, bivariate_test, type = "prob") %>%
 #> # A tibble: 1 × 3
 #>   .metric .estimator .estimate
 #>   <chr>   <chr>          <dbl>
-#> 1 roc_auc binary         0.410
+#> 1 roc_auc binary         0.838
 ```
 
 A recipe can also be used if the data require some sort of preprocessing
@@ -79,16 +88,15 @@ rec <-
 
 set.seed(20)
 nn_rec_biv <- brulee_mlp(rec, data = bivariate_train, 
-                         epochs = 150, hidden_units = 3)
+                         hidden_units = 3)
 
-# A little better
 predict(nn_rec_biv, bivariate_test, type = "prob") %>% 
   bind_cols(bivariate_test) %>% 
   roc_auc(Class, .pred_One)
 #> # A tibble: 1 × 3
 #>   .metric .estimator .estimate
 #>   <chr>   <chr>          <dbl>
-#> 1 roc_auc binary         0.708
+#> 1 roc_auc binary         0.866
 ```
 
 ## Code of Conduct
diff --git a/inst/WORDLIST b/inst/WORDLIST
index 56eb8d0..4da6301 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -1,13 +1,14 @@
 CMD
 Codecov
+GPUs
 LBFGS
 Lifecycle
 ORCID
 PBC
 SGD
-elu
 extensibility
 funder
+irreproducibility
 mlp
 multilayer
 perceptrons
diff --git a/man/brulee_activations.Rd b/man/brulee_activations.Rd
index ac2cf93..078ce06 100644
--- a/man/brulee_activations.Rd
+++ b/man/brulee_activations.Rd
@@ -12,3 +12,12 @@ A character vector of values.
 \description{
 Activation functions for neural networks in brulee
 }
+\seealso{
+\code{\link[torch:nn_celu]{torch::nn_celu()}}, \code{\link[torch:nn_elu]{torch::nn_elu()}}, \code{\link[torch:nn_gelu]{torch::nn_gelu()}},
+\code{\link[torch:nn_hardshrink]{torch::nn_hardshrink()}}, \code{\link[torch:nn_hardsigmoid]{torch::nn_hardsigmoid()}}, \code{\link[torch:nn_hardtanh]{torch::nn_hardtanh()}},
+\code{\link[torch:nn_leaky_relu]{torch::nn_leaky_relu()}}, \code{\link[torch:nn_identity]{torch::nn_identity()}}, \code{\link[torch:nn_log_sigmoid]{torch::nn_log_sigmoid()}},
+\code{\link[torch:nn_relu]{torch::nn_relu()}}, \code{\link[torch:nn_relu6]{torch::nn_relu6()}}, \code{\link[torch:nn_rrelu]{torch::nn_rrelu()}}, \code{\link[torch:nn_selu]{torch::nn_selu()}},
+\code{\link[torch:nn_sigmoid]{torch::nn_sigmoid()}}, \code{\link[torch:nn_silu]{torch::nn_silu()}}, \code{\link[torch:nn_softplus]{torch::nn_softplus()}},
+\code{\link[torch:nn_softshrink]{torch::nn_softshrink()}}, \code{\link[torch:nn_softsign]{torch::nn_softsign()}}, \code{\link[torch:nn_tanh]{torch::nn_tanh()}},
+\code{\link[torch:nn_tanhshrink]{torch::nn_tanhshrink()}}
+}
diff --git a/man/brulee_linear_reg.Rd b/man/brulee_linear_reg.Rd
index c951dcf..237d94a 100644
--- a/man/brulee_linear_reg.Rd
+++ b/man/brulee_linear_reg.Rd
@@ -213,7 +213,7 @@ if (torch::torch_is_installed()) {
  set.seed(1)
  brulee_linear_reg(x = as.matrix(ames_train[, c("Longitude", "Latitude")]),
                     y = ames_train$Sale_Price,
-                    penalty = 0.10, epochs = 1, batch_size = 64)
+                    penalty = 0.10, epochs = 10)
 
  # Using recipe
  library(recipes)
@@ -234,8 +234,7 @@ if (torch::torch_is_installed()) {
     step_normalize(all_numeric_predictors())
 
  set.seed(2)
- fit <- brulee_linear_reg(ames_rec, data = ames_train,
-                           epochs = 5, batch_size = 32)
+ fit <- brulee_linear_reg(ames_rec, data = ames_train, epochs = 5)
  fit
 
  autoplot(fit)
diff --git a/man/brulee_mlp.Rd b/man/brulee_mlp.Rd
index 14a6917..a4a3318 100644
--- a/man/brulee_mlp.Rd
+++ b/man/brulee_mlp.Rd
@@ -74,7 +74,7 @@ brulee_mlp(x, ...)
   batch_size = NULL,
   class_weights = NULL,
   stop_iter = 5,
-  device = device,
+  device = "cpu",
   verbose = FALSE,
   ...
 )
diff --git a/man/matrix_to_dataset.Rd b/man/matrix_to_dataset.Rd
index b8b2dc1..6f0ab61 100644
--- a/man/matrix_to_dataset.Rd
+++ b/man/matrix_to_dataset.Rd
@@ -11,6 +11,10 @@ matrix_to_dataset(x, y, device = "cpu")
 
 \item{y}{A vector. If regression than \code{y} is numeric. For classification, it
 is a factor.}
+
+\item{device}{A character string to denote which processor to use with
+possibles values: \code{"cpu"}, \code{"cuda"}, \code{"mps"}, and \code{"auto"}. The last value
+uses \code{\link[=guess_brulee_device]{guess_brulee_device()}} to make the determination.}
 }
 \value{
 An R6 index sampler object with classes "training_set",