From 65d21b6bd34f80ce0f93bff55f98129746b7eeb6 Mon Sep 17 00:00:00 2001 From: topepo Date: Wed, 1 Nov 2023 17:10:08 -0400 Subject: [PATCH 1/6] initial setup for MLP models --- R/checks.R | 6 +++--- R/convert_data.R | 8 ++++---- R/device.R | 16 ++++++++++++++++ R/mlp-fit.R | 21 ++++++++++++++++++--- 4 files changed, 41 insertions(+), 10 deletions(-) create mode 100644 R/device.R diff --git a/R/checks.R b/R/checks.R index e4c6b32..61c2417 100644 --- a/R/checks.R +++ b/R/checks.R @@ -177,14 +177,14 @@ check_logical <- function(x, single = TRUE, fn = NULL) { } -check_class_weights <- function(wts, lvls, xtab, fn) { +check_class_weights <- function(wts, lvls, xtab, fn, device = NULL) { if (length(lvls) == 0) { return(NULL) } if (is.null(wts)) { wts <- rep(1, length(lvls)) - return(torch::torch_tensor(wts)) + return(torch::torch_tensor(wts, device = device)) } if (!is.numeric(wts)) { msg <- paste(format_msg(fn, "class_weights"), "to a numeric vector") @@ -217,5 +217,5 @@ check_class_weights <- function(wts, lvls, xtab, fn) { } - torch::torch_tensor(wts) + torch::torch_tensor(wts, device = device) } diff --git a/R/convert_data.R b/R/convert_data.R index 90cc0b8..c5f5e3d 100644 --- a/R/convert_data.R +++ b/R/convert_data.R @@ -14,13 +14,13 @@ #' matrix_to_dataset(as.matrix(mtcars[, -1]), mtcars$mpg) #' } #' @export -matrix_to_dataset <- function(x, y) { - x <- torch::torch_tensor(x) +matrix_to_dataset <- function(x, y, device) { + x <- torch::torch_tensor(x, device = device) if (is.factor(y)) { y <- as.numeric(y) - y <- torch::torch_tensor(y, dtype = torch_long()) + y <- torch::torch_tensor(y, dtype = torch_long(), device = device) } else { - y <- torch::torch_tensor(y) + y <- torch::torch_tensor(y, device = device) } torch::tensor_dataset(x = x, y = y) } diff --git a/R/device.R b/R/device.R new file mode 100644 index 0000000..3bbd3c5 --- /dev/null +++ b/R/device.R @@ -0,0 +1,16 @@ +#' Determine an appropriate computational device for torch +#' Uses \pkg{torch} functions to determine if there is a GPU available for use. +#' @return A character string, one of: `"cpu"`, `"cuda"`, or `"mps"`. +#' @examplesI +#' guess_brulee_device() +#' @export +guess_brulee_device <- function() { + if (torch::backends_mps_is_available()) { + dev <- "mps" + } else if (torch::cuda_is_available()) { + dev <- "cuda" + } else { + dev <- "cpu" + } + dev +} diff --git a/R/mlp-fit.R b/R/mlp-fit.R index 3ccbd45..bf3fc57 100644 --- a/R/mlp-fit.R +++ b/R/mlp-fit.R @@ -248,6 +248,7 @@ brulee_mlp.data.frame <- batch_size = NULL, class_weights = NULL, stop_iter = 5, + device = "cpu", verbose = FALSE, ...) { processed <- hardhat::mold(x, y) @@ -268,6 +269,7 @@ brulee_mlp.data.frame <- batch_size = batch_size, class_weights = class_weights, stop_iter = stop_iter, + device = device, verbose = verbose, ... ) @@ -293,6 +295,7 @@ brulee_mlp.matrix <- function(x, batch_size = NULL, class_weights = NULL, stop_iter = 5, + device = "cpu", verbose = FALSE, ...) { processed <- hardhat::mold(x, y) @@ -313,6 +316,7 @@ brulee_mlp.matrix <- function(x, batch_size = batch_size, class_weights = class_weights, stop_iter = stop_iter, + device = device, verbose = verbose, ... ) @@ -339,6 +343,7 @@ brulee_mlp.formula <- batch_size = NULL, class_weights = NULL, stop_iter = 5, + device = device, verbose = FALSE, ...) { processed <- hardhat::mold(formula, data) @@ -359,6 +364,7 @@ brulee_mlp.formula <- batch_size = batch_size, class_weights = class_weights, stop_iter = stop_iter, + device = device, verbose = verbose, ... ) @@ -385,6 +391,7 @@ brulee_mlp.recipe <- batch_size = NULL, class_weights = NULL, stop_iter = 5, + device = "cpu", verbose = FALSE, ...) { processed <- hardhat::mold(x, data) @@ -405,6 +412,7 @@ brulee_mlp.recipe <- batch_size = batch_size, class_weights = class_weights, stop_iter = stop_iter, + device = device, verbose = verbose, ... ) @@ -416,7 +424,7 @@ brulee_mlp.recipe <- brulee_mlp_bridge <- function(processed, epochs, hidden_units, activation, learn_rate, rate_schedule, momentum, penalty, mixture, dropout, class_weights, validation, optimizer, - batch_size, stop_iter, verbose, ...) { + batch_size, stop_iter, device, verbose, ...) { if(!torch::torch_is_installed()) { rlang::abort("The torch backend has not been installed; use `torch::install_torch()`.") } @@ -457,7 +465,11 @@ brulee_mlp_bridge <- function(processed, epochs, hidden_units, activation, check_logical(verbose, single = TRUE, fn = f_nm) check_character(activation, single = FALSE, fn = f_nm) + # ------------------------------------------------------------------------------ + if (is.null(device)) { + device <- guess_brulee_device() + } ## ----------------------------------------------------------------------------- @@ -483,7 +495,7 @@ brulee_mlp_bridge <- function(processed, epochs, hidden_units, activation, lvls <- levels(outcome) xtab <- table(outcome) - class_weights <- check_class_weights(class_weights, lvls, xtab, f_nm) + class_weights <- check_class_weights(class_weights, lvls, xtab, f_nm, device = device) ## ----------------------------------------------------------------------------- @@ -505,6 +517,7 @@ brulee_mlp_bridge <- function(processed, epochs, hidden_units, activation, batch_size = batch_size, class_weights = class_weights, stop_iter = stop_iter, + device = device, verbose = verbose, ... ) @@ -577,6 +590,7 @@ mlp_fit_imp <- activation = "relu", class_weights = NULL, stop_iter = 5, + device = "cpu", verbose = FALSE, ...) { @@ -643,7 +657,7 @@ mlp_fit_imp <- ## --------------------------------------------------------------------------- # Convert to index sampler and data loader - ds <- brulee::matrix_to_dataset(x, y) + ds <- brulee::matrix_to_dataset(x, y, device = device) dl <- torch::dataloader(ds, batch_size = batch_size) if (validation > 0) { @@ -654,6 +668,7 @@ mlp_fit_imp <- ## --------------------------------------------------------------------------- # Initialize model and optimizer model <- mlp_module(ncol(x), hidden_units, activation, dropout, y_dim) + model$to(device = device) loss_fn <- make_penalized_loss(loss_fn, model, penalty, mixture) # Set the optimizer From fe3c827dbc1cba31fedfa91d19a24f7449c36e62 Mon Sep 17 00:00:00 2001 From: topepo Date: Thu, 2 Nov 2023 07:29:34 -0400 Subject: [PATCH 2/6] initial support for GPU for #55 --- NAMESPACE | 1 + R/convert_data.R | 2 +- R/device.R | 3 ++- R/mlp-fit.R | 9 +++++++-- man/brulee_mlp.Rd | 7 +++++++ man/guess_brulee_device.Rd | 17 +++++++++++++++++ man/matrix_to_dataset.Rd | 2 +- 7 files changed, 36 insertions(+), 5 deletions(-) create mode 100644 man/guess_brulee_device.Rd diff --git a/NAMESPACE b/NAMESPACE index e28f00d..52107df 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -47,6 +47,7 @@ export(brulee_logistic_reg) export(brulee_mlp) export(brulee_multinomial_reg) export(coef) +export(guess_brulee_device) export(matrix_to_dataset) export(schedule_cyclic) export(schedule_decay_expo) diff --git a/R/convert_data.R b/R/convert_data.R index c5f5e3d..f4434e2 100644 --- a/R/convert_data.R +++ b/R/convert_data.R @@ -14,7 +14,7 @@ #' matrix_to_dataset(as.matrix(mtcars[, -1]), mtcars$mpg) #' } #' @export -matrix_to_dataset <- function(x, y, device) { +matrix_to_dataset <- function(x, y, device = "cpu") { x <- torch::torch_tensor(x, device = device) if (is.factor(y)) { y <- as.numeric(y) diff --git a/R/device.R b/R/device.R index 3bbd3c5..47a9c8a 100644 --- a/R/device.R +++ b/R/device.R @@ -1,7 +1,8 @@ #' Determine an appropriate computational device for torch +#' #' Uses \pkg{torch} functions to determine if there is a GPU available for use. #' @return A character string, one of: `"cpu"`, `"cuda"`, or `"mps"`. -#' @examplesI +#' @examples #' guess_brulee_device() #' @export guess_brulee_device <- function() { diff --git a/R/mlp-fit.R b/R/mlp-fit.R index bf3fc57..f6b8d7d 100644 --- a/R/mlp-fit.R +++ b/R/mlp-fit.R @@ -65,6 +65,9 @@ #' @param stop_iter A non-negative integer for how many iterations with no #' improvement before stopping. #' @param verbose A logical that prints out the iteration history. +#' @param device A character string or `NULL` (if you want it to guess). Possible +#' values are `"cpu"`, `"cuda"`, `"mps"`, `"auto"`. The last value uses +#' [guess_brulee_device()]. #' @param ... Options to pass to the learning rate schedulers via #' [set_learn_rate()]. For example, the `reduction` or `steps` arguments to #' [schedule_step()] could be passed here. @@ -465,9 +468,11 @@ brulee_mlp_bridge <- function(processed, epochs, hidden_units, activation, check_logical(verbose, single = TRUE, fn = f_nm) check_character(activation, single = FALSE, fn = f_nm) + # ------------------------------------------------------------------------------ - if (is.null(device)) { + device <- rlang::arg_match(device, c("cpu", "auto", "cuda", "mps")) + if (device == "auto") { device <- guess_brulee_device() } @@ -661,7 +666,7 @@ mlp_fit_imp <- dl <- torch::dataloader(ds, batch_size = batch_size) if (validation > 0) { - ds_val <- brulee::matrix_to_dataset(x_val, y_val) + ds_val <- brulee::matrix_to_dataset(x_val, y_val, device = device) dl_val <- torch::dataloader(ds_val) } diff --git a/man/brulee_mlp.Rd b/man/brulee_mlp.Rd index d73a870..26d37c3 100644 --- a/man/brulee_mlp.Rd +++ b/man/brulee_mlp.Rd @@ -30,6 +30,7 @@ brulee_mlp(x, ...) batch_size = NULL, class_weights = NULL, stop_iter = 5, + device = "cpu", verbose = FALSE, ... ) @@ -51,6 +52,7 @@ brulee_mlp(x, ...) batch_size = NULL, class_weights = NULL, stop_iter = 5, + device = "cpu", verbose = FALSE, ... ) @@ -72,6 +74,7 @@ brulee_mlp(x, ...) batch_size = NULL, class_weights = NULL, stop_iter = 5, + device = device, verbose = FALSE, ... ) @@ -93,6 +96,7 @@ brulee_mlp(x, ...) batch_size = NULL, class_weights = NULL, stop_iter = 5, + device = "cpu", verbose = FALSE, ... ) @@ -174,6 +178,9 @@ and all other classes receive a weight of one. \item{stop_iter}{A non-negative integer for how many iterations with no improvement before stopping.} +\item{device}{A character string or \code{NULL} (if you want it to guess). Possible +values are \code{"cpu"}, \code{"cuda"}, or \code{"mps"}. See \code{\link[=guess_brulee_device]{guess_brulee_device()}}.} + \item{verbose}{A logical that prints out the iteration history.} \item{formula}{A formula specifying the outcome term(s) on the left-hand side, diff --git a/man/guess_brulee_device.Rd b/man/guess_brulee_device.Rd new file mode 100644 index 0000000..2e7d9ef --- /dev/null +++ b/man/guess_brulee_device.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/device.R +\name{guess_brulee_device} +\alias{guess_brulee_device} +\title{Determine an appropriate computational device for torch} +\usage{ +guess_brulee_device() +} +\value{ +A character string, one of: \code{"cpu"}, \code{"cuda"}, or \code{"mps"}. +} +\description{ +Uses \pkg{torch} functions to determine if there is a GPU available for use. +} +\examples{ +guess_brulee_device() +} diff --git a/man/matrix_to_dataset.Rd b/man/matrix_to_dataset.Rd index 7fb2fdf..6350b30 100644 --- a/man/matrix_to_dataset.Rd +++ b/man/matrix_to_dataset.Rd @@ -4,7 +4,7 @@ \alias{matrix_to_dataset} \title{Convert data to torch format} \usage{ -matrix_to_dataset(x, y) +matrix_to_dataset(x, y, device) } \arguments{ \item{x}{A numeric matrix of predictors.} From b3b836f4d00e825bc4d21eb389999a0f9bb10077 Mon Sep 17 00:00:00 2001 From: topepo Date: Thu, 2 Nov 2023 08:14:53 -0400 Subject: [PATCH 3/6] linear and logistic regression --- R/linear_reg-fit.R | 25 ++++++++++++++++++++++--- R/logistic_reg-fit.R | 24 +++++++++++++++++++++--- R/mlp-fit.R | 7 +++---- man/brulee_linear_reg.Rd | 8 ++++++++ man/brulee_logistic_reg.Rd | 8 ++++++++ man/brulee_mlp.Rd | 5 +++-- man/matrix_to_dataset.Rd | 2 +- 7 files changed, 66 insertions(+), 13 deletions(-) diff --git a/R/linear_reg-fit.R b/R/linear_reg-fit.R index 39af2f1..628724b 100644 --- a/R/linear_reg-fit.R +++ b/R/linear_reg-fit.R @@ -161,6 +161,7 @@ brulee_linear_reg.data.frame <- momentum = 0.0, batch_size = NULL, stop_iter = 5, + device = "cpu", verbose = FALSE, ...) { processed <- hardhat::mold(x, y) @@ -176,6 +177,7 @@ brulee_linear_reg.data.frame <- momentum = momentum, batch_size = batch_size, stop_iter = stop_iter, + device = device, verbose = verbose, ... ) @@ -196,6 +198,7 @@ brulee_linear_reg.matrix <- function(x, momentum = 0.0, batch_size = NULL, stop_iter = 5, + device = "cpu", verbose = FALSE, ...) { processed <- hardhat::mold(x, y) @@ -211,6 +214,7 @@ brulee_linear_reg.matrix <- function(x, validation = validation, batch_size = batch_size, stop_iter = stop_iter, + device = device, verbose = verbose, ... ) @@ -232,6 +236,7 @@ brulee_linear_reg.formula <- momentum = 0.0, batch_size = NULL, stop_iter = 5, + device = "cpu", verbose = FALSE, ...) { processed <- hardhat::mold(formula, data) @@ -247,6 +252,7 @@ brulee_linear_reg.formula <- validation = validation, batch_size = batch_size, stop_iter = stop_iter, + device = device, verbose = verbose, ... ) @@ -268,6 +274,7 @@ brulee_linear_reg.recipe <- momentum = 0.0, batch_size = NULL, stop_iter = 5, + device = "cpu", verbose = FALSE, ...) { processed <- hardhat::mold(x, data) @@ -283,6 +290,7 @@ brulee_linear_reg.recipe <- validation = validation, batch_size = batch_size, stop_iter = stop_iter, + device = device, verbose = verbose, ... ) @@ -293,7 +301,8 @@ brulee_linear_reg.recipe <- brulee_linear_reg_bridge <- function(processed, epochs, optimizer, learn_rate, momentum, penalty, mixture, dropout, - validation, batch_size, stop_iter, verbose, ...) { + validation, batch_size, stop_iter, device, + verbose, ...) { if(!torch::torch_is_installed()) { rlang::abort("The torch backend has not been installed; use `torch::install_torch()`.") } @@ -318,6 +327,13 @@ brulee_linear_reg_bridge <- function(processed, epochs, optimizer, check_double(learn_rate, single = TRUE, 0, incl = c(FALSE, TRUE), fn = f_nm) check_logical(verbose, single = TRUE, fn = f_nm) + # ------------------------------------------------------------------------------ + + device <- rlang::arg_match(device, c("cpu", "auto", "cuda", "mps")) + if (device == "auto") { + device <- guess_brulee_device() + } + ## ----------------------------------------------------------------------------- predictors <- processed$predictors @@ -353,6 +369,7 @@ brulee_linear_reg_bridge <- function(processed, epochs, optimizer, validation = validation, batch_size = batch_size, stop_iter = stop_iter, + device = device, verbose = verbose ) @@ -413,6 +430,7 @@ linear_reg_fit_imp <- learn_rate = 1, momentum = 0.0, stop_iter = 5, + device = "cpu", verbose = FALSE, ...) { @@ -460,17 +478,18 @@ linear_reg_fit_imp <- ## --------------------------------------------------------------------------- # Convert to index sampler and data loader - ds <- brulee::matrix_to_dataset(x, y) + ds <- brulee::matrix_to_dataset(x, y, device = device) dl <- torch::dataloader(ds, batch_size = batch_size) if (validation > 0) { - ds_val <- brulee::matrix_to_dataset(x_val, y_val) + ds_val <- brulee::matrix_to_dataset(x_val, y_val, device = device) dl_val <- torch::dataloader(ds_val) } ## --------------------------------------------------------------------------- # Initialize model and optimizer model <- linear_reg_module(ncol(x)) + model$to(device = device) loss_fn <- make_penalized_loss(loss_fn, model, penalty, mixture) # Write a optim wrapper diff --git a/R/logistic_reg-fit.R b/R/logistic_reg-fit.R index 859bd36..e88dcc4 100644 --- a/R/logistic_reg-fit.R +++ b/R/logistic_reg-fit.R @@ -149,6 +149,7 @@ brulee_logistic_reg.data.frame <- batch_size = NULL, class_weights = NULL, stop_iter = 5, + device = "cpu", verbose = FALSE, ...) { processed <- hardhat::mold(x, y) @@ -165,6 +166,7 @@ brulee_logistic_reg.data.frame <- batch_size = batch_size, class_weights = class_weights, stop_iter = stop_iter, + device = device, verbose = verbose, ... ) @@ -186,6 +188,7 @@ brulee_logistic_reg.matrix <- function(x, batch_size = NULL, class_weights = NULL, stop_iter = 5, + device = "cpu", verbose = FALSE, ...) { processed <- hardhat::mold(x, y) @@ -202,6 +205,7 @@ brulee_logistic_reg.matrix <- function(x, batch_size = batch_size, class_weights = class_weights, stop_iter = stop_iter, + device = device, verbose = verbose, ... ) @@ -224,6 +228,7 @@ brulee_logistic_reg.formula <- batch_size = NULL, class_weights = NULL, stop_iter = 5, + device = "cpu", verbose = FALSE, ...) { processed <- hardhat::mold(formula, data) @@ -240,6 +245,7 @@ brulee_logistic_reg.formula <- batch_size = batch_size, class_weights = class_weights, stop_iter = stop_iter, + device = device, verbose = verbose, ... ) @@ -262,6 +268,7 @@ brulee_logistic_reg.recipe <- batch_size = NULL, class_weights = NULL, stop_iter = 5, + device = "cpu", verbose = FALSE, ...) { processed <- hardhat::mold(x, data) @@ -278,6 +285,7 @@ brulee_logistic_reg.recipe <- batch_size = batch_size, class_weights = class_weights, stop_iter = stop_iter, + device = device, verbose = verbose, ... ) @@ -288,7 +296,7 @@ brulee_logistic_reg.recipe <- brulee_logistic_reg_bridge <- function(processed, epochs, optimizer, learn_rate, momentum, penalty, mixture, class_weights, - validation, batch_size, stop_iter, verbose, ...) { + validation, batch_size, stop_iter, device, verbose, ...) { if(!torch::torch_is_installed()) { rlang::abort("The torch backend has not been installed; use `torch::install_torch()`.") } @@ -312,6 +320,13 @@ brulee_logistic_reg_bridge <- function(processed, epochs, optimizer, check_double(learn_rate, single = TRUE, 0, incl = c(FALSE, TRUE), fn = f_nm) check_logical(verbose, single = TRUE, fn = f_nm) + # ------------------------------------------------------------------------------ + + device <- rlang::arg_match(device, c("cpu", "auto", "cuda", "mps")) + if (device == "auto") { + device <- guess_brulee_device() + } + ## ----------------------------------------------------------------------------- predictors <- processed$predictors @@ -362,6 +377,7 @@ brulee_logistic_reg_bridge <- function(processed, epochs, optimizer, batch_size = batch_size, class_weights = class_weights, stop_iter = stop_iter, + device = device, verbose = verbose ) @@ -423,6 +439,7 @@ logistic_reg_fit_imp <- momentum = 0.0, class_weights = NULL, stop_iter = 5, + device = "cpu", verbose = FALSE, ...) { @@ -471,17 +488,18 @@ logistic_reg_fit_imp <- ## --------------------------------------------------------------------------- # Convert to index sampler and data loader - ds <- brulee::matrix_to_dataset(x, y) + ds <- brulee::matrix_to_dataset(x, y, device = device) dl <- torch::dataloader(ds, batch_size = batch_size) if (validation > 0) { - ds_val <- brulee::matrix_to_dataset(x_val, y_val) + ds_val <- brulee::matrix_to_dataset(x_val, y_val, device = device) dl_val <- torch::dataloader(ds_val) } ## --------------------------------------------------------------------------- # Initialize model and optimizer model <- logistic_module(ncol(x), y_dim) + model$to(device = device) loss_fn <- make_penalized_loss(loss_fn, model, penalty, mixture) # Write a optim wrapper diff --git a/R/mlp-fit.R b/R/mlp-fit.R index f6b8d7d..454143f 100644 --- a/R/mlp-fit.R +++ b/R/mlp-fit.R @@ -65,9 +65,9 @@ #' @param stop_iter A non-negative integer for how many iterations with no #' improvement before stopping. #' @param verbose A logical that prints out the iteration history. -#' @param device A character string or `NULL` (if you want it to guess). Possible -#' values are `"cpu"`, `"cuda"`, `"mps"`, `"auto"`. The last value uses -#' [guess_brulee_device()]. +#' @param device A character string to denote which processor to use with +#' possibles values: `"cpu"`, `"cuda"`, `"mps"`, and `"auto"`. The last value +#' uses [guess_brulee_device()] to make the determination. #' @param ... Options to pass to the learning rate schedulers via #' [set_learn_rate()]. For example, the `reduction` or `steps` arguments to #' [schedule_step()] could be passed here. @@ -468,7 +468,6 @@ brulee_mlp_bridge <- function(processed, epochs, hidden_units, activation, check_logical(verbose, single = TRUE, fn = f_nm) check_character(activation, single = FALSE, fn = f_nm) - # ------------------------------------------------------------------------------ device <- rlang::arg_match(device, c("cpu", "auto", "cuda", "mps")) diff --git a/man/brulee_linear_reg.Rd b/man/brulee_linear_reg.Rd index bef2b6f..5324b20 100644 --- a/man/brulee_linear_reg.Rd +++ b/man/brulee_linear_reg.Rd @@ -25,6 +25,7 @@ brulee_linear_reg(x, ...) momentum = 0, batch_size = NULL, stop_iter = 5, + device = "cpu", verbose = FALSE, ... ) @@ -41,6 +42,7 @@ brulee_linear_reg(x, ...) momentum = 0, batch_size = NULL, stop_iter = 5, + device = "cpu", verbose = FALSE, ... ) @@ -57,6 +59,7 @@ brulee_linear_reg(x, ...) momentum = 0, batch_size = NULL, stop_iter = 5, + device = "cpu", verbose = FALSE, ... ) @@ -73,6 +76,7 @@ brulee_linear_reg(x, ...) momentum = 0, batch_size = NULL, stop_iter = 5, + device = "cpu", verbose = FALSE, ... ) @@ -127,6 +131,10 @@ batch. (\code{optimizer = "SGD"} only)} \item{stop_iter}{A non-negative integer for how many iterations with no improvement before stopping.} +\item{device}{A character string to denote which processor to use with +possibles values: \code{"cpu"}, \code{"cuda"}, \code{"mps"}, and \code{"auto"}. The last value +uses \code{\link[=guess_brulee_device]{guess_brulee_device()}} to make the determination.} + \item{verbose}{A logical that prints out the iteration history.} \item{formula}{A formula specifying the outcome term(s) on the left-hand side, diff --git a/man/brulee_logistic_reg.Rd b/man/brulee_logistic_reg.Rd index 315dbc7..728d6c7 100644 --- a/man/brulee_logistic_reg.Rd +++ b/man/brulee_logistic_reg.Rd @@ -26,6 +26,7 @@ brulee_logistic_reg(x, ...) batch_size = NULL, class_weights = NULL, stop_iter = 5, + device = "cpu", verbose = FALSE, ... ) @@ -43,6 +44,7 @@ brulee_logistic_reg(x, ...) batch_size = NULL, class_weights = NULL, stop_iter = 5, + device = "cpu", verbose = FALSE, ... ) @@ -60,6 +62,7 @@ brulee_logistic_reg(x, ...) batch_size = NULL, class_weights = NULL, stop_iter = 5, + device = "cpu", verbose = FALSE, ... ) @@ -77,6 +80,7 @@ brulee_logistic_reg(x, ...) batch_size = NULL, class_weights = NULL, stop_iter = 5, + device = "cpu", verbose = FALSE, ... ) @@ -142,6 +146,10 @@ and all other classes receive a weight of one. \item{stop_iter}{A non-negative integer for how many iterations with no improvement before stopping.} +\item{device}{A character string to denote which processor to use with +possibles values: \code{"cpu"}, \code{"cuda"}, \code{"mps"}, and \code{"auto"}. The last value +uses \code{\link[=guess_brulee_device]{guess_brulee_device()}} to make the determination.} + \item{verbose}{A logical that prints out the iteration history.} \item{formula}{A formula specifying the outcome term(s) on the left-hand side, diff --git a/man/brulee_mlp.Rd b/man/brulee_mlp.Rd index 26d37c3..f14ed0c 100644 --- a/man/brulee_mlp.Rd +++ b/man/brulee_mlp.Rd @@ -178,8 +178,9 @@ and all other classes receive a weight of one. \item{stop_iter}{A non-negative integer for how many iterations with no improvement before stopping.} -\item{device}{A character string or \code{NULL} (if you want it to guess). Possible -values are \code{"cpu"}, \code{"cuda"}, or \code{"mps"}. See \code{\link[=guess_brulee_device]{guess_brulee_device()}}.} +\item{device}{A character string to denote which processor to use with +possibles values: \code{"cpu"}, \code{"cuda"}, \code{"mps"}, and \code{"auto"}. The last value +uses \code{\link[=guess_brulee_device]{guess_brulee_device()}} to make the determination.} \item{verbose}{A logical that prints out the iteration history.} diff --git a/man/matrix_to_dataset.Rd b/man/matrix_to_dataset.Rd index 6350b30..b8b2dc1 100644 --- a/man/matrix_to_dataset.Rd +++ b/man/matrix_to_dataset.Rd @@ -4,7 +4,7 @@ \alias{matrix_to_dataset} \title{Convert data to torch format} \usage{ -matrix_to_dataset(x, y, device) +matrix_to_dataset(x, y, device = "cpu") } \arguments{ \item{x}{A numeric matrix of predictors.} From b00f68a21cdbfb34411fa1d004ace7eb1440aee5 Mon Sep 17 00:00:00 2001 From: topepo Date: Thu, 2 Nov 2023 10:26:51 -0400 Subject: [PATCH 4/6] added docs --- R/linear_reg-fit.R | 2 ++ R/logistic_reg-fit.R | 2 ++ R/mlp-fit.R | 3 +++ man/brulee_linear_reg.Rd | 3 +++ man/brulee_logistic_reg.Rd | 3 +++ man/brulee_mlp.Rd | 3 +++ 6 files changed, 16 insertions(+) diff --git a/R/linear_reg-fit.R b/R/linear_reg-fit.R index 628724b..4e746ad 100644 --- a/R/linear_reg-fit.R +++ b/R/linear_reg-fit.R @@ -53,6 +53,8 @@ #' The zeroing out of parameters is a specific feature the optimization method #' used in those packages. #' +#' If GPU computing is requested via the `device` argument, note that torch +#' can't set the random number seeds in the GPU. #' @seealso [predict.brulee_linear_reg()], [coef.brulee_linear_reg()], #' [autoplot.brulee_linear_reg()] #' diff --git a/R/logistic_reg-fit.R b/R/logistic_reg-fit.R index e88dcc4..4a76a67 100644 --- a/R/logistic_reg-fit.R +++ b/R/logistic_reg-fit.R @@ -57,6 +57,8 @@ #' The zeroing out of parameters is a specific feature the optimization method #' used in those packages. #' +#' If GPU computing is requested via the `device` argument, note that torch +#' can't set the random number seeds in the GPU. #' @seealso [predict.brulee_logistic_reg()], [coef.brulee_logistic_reg()], #' [autoplot.brulee_logistic_reg()] #' diff --git a/R/mlp-fit.R b/R/mlp-fit.R index 454143f..04942b2 100644 --- a/R/mlp-fit.R +++ b/R/mlp-fit.R @@ -102,6 +102,9 @@ #' The zeroing out of parameters is a specific feature the optimization method #' used in those packages. #' +#' If GPU computing is requested via the `device` argument, note that torch +#' can't set the random number seeds in the GPU. +#' #' ## Learning Rates #' #' The learning rate can be set to constant (the default) or dynamically set diff --git a/man/brulee_linear_reg.Rd b/man/brulee_linear_reg.Rd index 5324b20..c951dcf 100644 --- a/man/brulee_linear_reg.Rd +++ b/man/brulee_linear_reg.Rd @@ -189,6 +189,9 @@ The use of the L1 penalty (a.k.a. the lasso penalty) does \emph{not} force parameters to be strictly zero (as it does in packages such as \pkg{glmnet}). The zeroing out of parameters is a specific feature the optimization method used in those packages. + +If GPU computing is requested via the \code{device} argument, note that torch +can't set the random number seeds in the GPU. } \examples{ \donttest{ diff --git a/man/brulee_logistic_reg.Rd b/man/brulee_logistic_reg.Rd index 728d6c7..57ccb6d 100644 --- a/man/brulee_logistic_reg.Rd +++ b/man/brulee_logistic_reg.Rd @@ -200,6 +200,9 @@ The use of the L1 penalty (a.k.a. the lasso penalty) does \emph{not} force parameters to be strictly zero (as it does in packages such as \pkg{glmnet}). The zeroing out of parameters is a specific feature the optimization method used in those packages. + +If GPU computing is requested via the \code{device} argument, note that torch +can't set the random number seeds in the GPU. } \examples{ \donttest{ diff --git a/man/brulee_mlp.Rd b/man/brulee_mlp.Rd index f14ed0c..cecfd07 100644 --- a/man/brulee_mlp.Rd +++ b/man/brulee_mlp.Rd @@ -239,6 +239,9 @@ The use of the L1 penalty (a.k.a. the lasso penalty) does \emph{not} force parameters to be strictly zero (as it does in packages such as \pkg{glmnet}). The zeroing out of parameters is a specific feature the optimization method used in those packages. + +If GPU computing is requested via the \code{device} argument, note that torch +can't set the random number seeds in the GPU. \subsection{Learning Rates}{ The learning rate can be set to constant (the default) or dynamically set From 69f7628b3129daef51634b84193459befccaef95 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Sun, 5 Nov 2023 11:41:10 -0500 Subject: [PATCH 5/6] gpu tests --- tests/testthat/test-gpu.R | 67 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 tests/testthat/test-gpu.R diff --git a/tests/testthat/test-gpu.R b/tests/testthat/test-gpu.R new file mode 100644 index 0000000..975f8ff --- /dev/null +++ b/tests/testthat/test-gpu.R @@ -0,0 +1,67 @@ +test_that('device type - mac ARM', { + + skip_if_not(torch::torch_is_installed()) + skip_on_os(c("windows", "linux", "solaris")) + skip_on_os("mac", arch = "x86_64") + expect_snapshot(guess_brulee_device()) +}) + +test_that('device type - cpu', { + + skip_if_not(torch::torch_is_installed()) + skip_on_os("mac", arch = "aarch64") + expect_snapshot(guess_brulee_device()) +}) + + +test_that('linear regression on gpu', { + + skip_if_not(torch::torch_is_installed()) + skip_on_os(c("windows", "linux", "solaris")) + skip_on_os("mac", arch = "x86_64") + + skip_if_not_installed("modeldata") + + set.seed(591) + tr <- sim_regression(1000) + + expect_error( + fit <- brulee_linear_reg(outcome ~ ., data = tr, device = "mps"), + regex = NA + ) +}) + +test_that('logistic regression on gpu', { + + skip_if_not(torch::torch_is_installed()) + skip_on_os(c("windows", "linux", "solaris")) + skip_on_os("mac", arch = "x86_64") + + skip_if_not_installed("modeldata") + + set.seed(591) + tr <- sim_classification(1000) + + expect_error( + fit <- brulee_logistic_reg(class ~ ., data = tr, device = "mps"), + regex = NA + ) +}) + +test_that('mlp on gpu', { + + skip_if_not(torch::torch_is_installed()) + skip_on_os(c("windows", "linux", "solaris")) + skip_on_os("mac", arch = "x86_64") + + skip_if_not_installed("modeldata") + + set.seed(591) + tr <- sim_regression(1000) + + expect_error( + fit <- brulee_mlp(outcome ~ ., data = tr, device = "mps"), + regex = NA + ) +}) + From b46122beeb1fd80bb5aa76a930b2c5fc0e80f45d Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Sun, 5 Nov 2023 12:35:11 -0500 Subject: [PATCH 6/6] documentation updates --- NEWS.md | 6 ++++++ R/activation.R | 7 +++++++ R/convert_data.R | 3 +++ R/linear_reg-fit.R | 5 ++--- README.Rmd | 16 ++++++++++++---- README.md | 20 ++++++++++++++------ inst/WORDLIST | 3 ++- man/brulee_activations.Rd | 9 +++++++++ man/brulee_linear_reg.Rd | 5 ++--- man/brulee_mlp.Rd | 2 +- man/matrix_to_dataset.Rd | 4 ++++ 11 files changed, 62 insertions(+), 18 deletions(-) diff --git a/NEWS.md b/NEWS.md index 6d4cf27..2d259e0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,12 @@ * Fixed a bug where SGD always being used as the optimizer (#61). +* Added many more activation functions for `brulee_mlp()` (#74). + +* Enabled GPUs for computations (#). + +* Rewrote the unit test suite due to irreproducibility issues across operating system (#75). + # brulee 0.2.0 * Several learning rate schedulers were added to the modeling functions (#12). diff --git a/R/activation.R b/R/activation.R index c650af8..8bdf561 100644 --- a/R/activation.R +++ b/R/activation.R @@ -7,6 +7,13 @@ allowed_activation <- #' Activation functions for neural networks in brulee #' #' @return A character vector of values. +#' @seealso [torch::nn_celu()], [torch::nn_elu()], [torch::nn_gelu()], +#' [torch::nn_hardshrink()], [torch::nn_hardsigmoid()], [torch::nn_hardtanh()], +#' [torch::nn_leaky_relu()], [torch::nn_identity()], [torch::nn_log_sigmoid()], +#' [torch::nn_relu()], [torch::nn_relu6()], [torch::nn_rrelu()], [torch::nn_selu()], +#' [torch::nn_sigmoid()], [torch::nn_silu()], [torch::nn_softplus()], +#' [torch::nn_softshrink()], [torch::nn_softsign()], [torch::nn_tanh()], +#' [torch::nn_tanhshrink()] #' @export brulee_activations <- function() { allowed_activation diff --git a/R/convert_data.R b/R/convert_data.R index 1b38709..35536a5 100644 --- a/R/convert_data.R +++ b/R/convert_data.R @@ -6,6 +6,9 @@ #' @param x A numeric matrix of predictors. #' @param y A vector. If regression than `y` is numeric. For classification, it #' is a factor. +#' @param device A character string to denote which processor to use with +#' possibles values: `"cpu"`, `"cuda"`, `"mps"`, and `"auto"`. The last value +#' uses [guess_brulee_device()] to make the determination. #' @return An R6 index sampler object with classes "training_set", #' "dataset", and "R6". #' @details Missing values should be removed before passing data to this function. diff --git a/R/linear_reg-fit.R b/R/linear_reg-fit.R index 6bcaa04..38b85e9 100644 --- a/R/linear_reg-fit.R +++ b/R/linear_reg-fit.R @@ -91,7 +91,7 @@ #' set.seed(1) #' brulee_linear_reg(x = as.matrix(ames_train[, c("Longitude", "Latitude")]), #' y = ames_train$Sale_Price, -#' penalty = 0.10, epochs = 1, batch_size = 64) +#' penalty = 0.10, epochs = 10) #' #' # Using recipe #' library(recipes) @@ -112,8 +112,7 @@ #' step_normalize(all_numeric_predictors()) #' #' set.seed(2) -#' fit <- brulee_linear_reg(ames_rec, data = ames_train, -#' epochs = 5, batch_size = 32) +#' fit <- brulee_linear_reg(ames_rec, data = ames_train, epochs = 5) #' fit #' #' autoplot(fit) diff --git a/README.Rmd b/README.Rmd index 9acf4e6..530834d 100644 --- a/README.Rmd +++ b/README.Rmd @@ -28,6 +28,14 @@ The R `brulee` package contains several basic modeling functions that use the `t * [logistic regression](https://brulee.tidymodels.org/reference/brulee_logistic_reg.html) * [multinomial regression](https://brulee.tidymodels.org/reference/brulee_multinomial_reg.html) +Some interesting features: + + * Early stopping based on an internal validation set. + * L1 and L2 penalization, also dropout. + * GPU utilization for models. + * Class-specific weights for cost-sensitive learning. + * Data frame, matrix, formula, and recipe interfaces for models. + ## Installation @@ -59,8 +67,9 @@ library(yardstick) data(bivariate, package = "modeldata") set.seed(20) -nn_log_biv <- brulee_mlp(Class ~ log(A) + log(B), data = bivariate_train, - epochs = 150, hidden_units = 3) +nn_log_biv <- brulee_mlp(Class ~ log(A) + log(B), + data = bivariate_train, + hidden_units = 3) # We use the tidymodels semantics to always return a tibble when predicting predict(nn_log_biv, bivariate_test, type = "prob") %>% @@ -80,9 +89,8 @@ rec <- set.seed(20) nn_rec_biv <- brulee_mlp(rec, data = bivariate_train, - epochs = 150, hidden_units = 3) + hidden_units = 3) -# A little better predict(nn_rec_biv, bivariate_test, type = "prob") %>% bind_cols(bivariate_test) %>% roc_auc(Class, .pred_One) diff --git a/README.md b/README.md index f4d8736..d89cebd 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,14 @@ use the `torch` package infrastructure, such as: - [multinomial regression](https://brulee.tidymodels.org/reference/brulee_multinomial_reg.html) +Some interesting features: + +- Early stopping based on an internal validation set. +- L1 and L2 penalization, also dropout. +- GPU utilization for models. +- Class-specific weights for cost-sensitive learning. +- Data frame, matrix, formula, and recipe interfaces for models. + ## Installation You can install the released version of brulee from @@ -53,8 +61,9 @@ library(yardstick) data(bivariate, package = "modeldata") set.seed(20) -nn_log_biv <- brulee_mlp(Class ~ log(A) + log(B), data = bivariate_train, - epochs = 150, hidden_units = 3) +nn_log_biv <- brulee_mlp(Class ~ log(A) + log(B), + data = bivariate_train, + hidden_units = 3) # We use the tidymodels semantics to always return a tibble when predicting predict(nn_log_biv, bivariate_test, type = "prob") %>% @@ -63,7 +72,7 @@ predict(nn_log_biv, bivariate_test, type = "prob") %>% #> # A tibble: 1 × 3 #> .metric .estimator .estimate #> -#> 1 roc_auc binary 0.410 +#> 1 roc_auc binary 0.838 ``` A recipe can also be used if the data require some sort of preprocessing @@ -79,16 +88,15 @@ rec <- set.seed(20) nn_rec_biv <- brulee_mlp(rec, data = bivariate_train, - epochs = 150, hidden_units = 3) + hidden_units = 3) -# A little better predict(nn_rec_biv, bivariate_test, type = "prob") %>% bind_cols(bivariate_test) %>% roc_auc(Class, .pred_One) #> # A tibble: 1 × 3 #> .metric .estimator .estimate #> -#> 1 roc_auc binary 0.708 +#> 1 roc_auc binary 0.866 ``` ## Code of Conduct diff --git a/inst/WORDLIST b/inst/WORDLIST index 56eb8d0..4da6301 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -1,13 +1,14 @@ CMD Codecov +GPUs LBFGS Lifecycle ORCID PBC SGD -elu extensibility funder +irreproducibility mlp multilayer perceptrons diff --git a/man/brulee_activations.Rd b/man/brulee_activations.Rd index ac2cf93..078ce06 100644 --- a/man/brulee_activations.Rd +++ b/man/brulee_activations.Rd @@ -12,3 +12,12 @@ A character vector of values. \description{ Activation functions for neural networks in brulee } +\seealso{ +\code{\link[torch:nn_celu]{torch::nn_celu()}}, \code{\link[torch:nn_elu]{torch::nn_elu()}}, \code{\link[torch:nn_gelu]{torch::nn_gelu()}}, +\code{\link[torch:nn_hardshrink]{torch::nn_hardshrink()}}, \code{\link[torch:nn_hardsigmoid]{torch::nn_hardsigmoid()}}, \code{\link[torch:nn_hardtanh]{torch::nn_hardtanh()}}, +\code{\link[torch:nn_leaky_relu]{torch::nn_leaky_relu()}}, \code{\link[torch:nn_identity]{torch::nn_identity()}}, \code{\link[torch:nn_log_sigmoid]{torch::nn_log_sigmoid()}}, +\code{\link[torch:nn_relu]{torch::nn_relu()}}, \code{\link[torch:nn_relu6]{torch::nn_relu6()}}, \code{\link[torch:nn_rrelu]{torch::nn_rrelu()}}, \code{\link[torch:nn_selu]{torch::nn_selu()}}, +\code{\link[torch:nn_sigmoid]{torch::nn_sigmoid()}}, \code{\link[torch:nn_silu]{torch::nn_silu()}}, \code{\link[torch:nn_softplus]{torch::nn_softplus()}}, +\code{\link[torch:nn_softshrink]{torch::nn_softshrink()}}, \code{\link[torch:nn_softsign]{torch::nn_softsign()}}, \code{\link[torch:nn_tanh]{torch::nn_tanh()}}, +\code{\link[torch:nn_tanhshrink]{torch::nn_tanhshrink()}} +} diff --git a/man/brulee_linear_reg.Rd b/man/brulee_linear_reg.Rd index c951dcf..237d94a 100644 --- a/man/brulee_linear_reg.Rd +++ b/man/brulee_linear_reg.Rd @@ -213,7 +213,7 @@ if (torch::torch_is_installed()) { set.seed(1) brulee_linear_reg(x = as.matrix(ames_train[, c("Longitude", "Latitude")]), y = ames_train$Sale_Price, - penalty = 0.10, epochs = 1, batch_size = 64) + penalty = 0.10, epochs = 10) # Using recipe library(recipes) @@ -234,8 +234,7 @@ if (torch::torch_is_installed()) { step_normalize(all_numeric_predictors()) set.seed(2) - fit <- brulee_linear_reg(ames_rec, data = ames_train, - epochs = 5, batch_size = 32) + fit <- brulee_linear_reg(ames_rec, data = ames_train, epochs = 5) fit autoplot(fit) diff --git a/man/brulee_mlp.Rd b/man/brulee_mlp.Rd index 14a6917..a4a3318 100644 --- a/man/brulee_mlp.Rd +++ b/man/brulee_mlp.Rd @@ -74,7 +74,7 @@ brulee_mlp(x, ...) batch_size = NULL, class_weights = NULL, stop_iter = 5, - device = device, + device = "cpu", verbose = FALSE, ... ) diff --git a/man/matrix_to_dataset.Rd b/man/matrix_to_dataset.Rd index b8b2dc1..6f0ab61 100644 --- a/man/matrix_to_dataset.Rd +++ b/man/matrix_to_dataset.Rd @@ -11,6 +11,10 @@ matrix_to_dataset(x, y, device = "cpu") \item{y}{A vector. If regression than \code{y} is numeric. For classification, it is a factor.} + +\item{device}{A character string to denote which processor to use with +possibles values: \code{"cpu"}, \code{"cuda"}, \code{"mps"}, and \code{"auto"}. The last value +uses \code{\link[=guess_brulee_device]{guess_brulee_device()}} to make the determination.} } \value{ An R6 index sampler object with classes "training_set",