diff --git a/DESCRIPTION b/DESCRIPTION
index 269d9ceb..16118ca1 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -24,7 +24,7 @@ Imports:
     generics (>= 0.1.2),
     ggplot2,
     glue (>= 1.6.2),
-    GPfit,
+    GauPro (>= 0.2.15),
     hardhat (>= 1.4.2),
     parallel,
     parsnip (>= 1.2.1.9003),
@@ -66,4 +66,4 @@ Encoding: UTF-8
 Language: en-US
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.2
+RoxygenNote: 7.3.3
diff --git a/NAMESPACE b/NAMESPACE
index 051f192e..12c0c7a3 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -255,7 +255,6 @@ export(val_class_or_null)
 import(rlang)
 import(vctrs)
 import(workflows)
-importFrom(GPfit,GP_fit)
 importFrom(cli,cli_abort)
 importFrom(cli,cli_alert)
 importFrom(cli,cli_alert_danger)
diff --git a/NEWS.md b/NEWS.md
index 09833b69..450022cd 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,13 @@
 # tune (development version)
 
+## Breaking Changes
+
+* The Gaussian process model package was changed from \pkg{GPfit} to \pkg{GauPro} because the former is no longer actively maintained. There are some differences: 
+
+   - Fit diagnostics are computed and reported. If the fit quality is poor, an "uncertainty sample" that is furthest away from the existing data is used as the new candidate. 
+   - The GP no longer uses binary indicators for qualitative predictors. Instead, a "categorical kernel" is used for those parameter columns. Fewer starting values are required with this change. 
+   - For numeric predictors, the Matern 3/2 kernel is always used. 
+
 # tune 2.0.1
 
 * Fixed a bug where `int_pctl()` wouldn't work on `last_fit()` outcomes when future parallelism was enabled. (#1099) 
diff --git a/R/0_imports.R b/R/0_imports.R
index 54409ce4..6b5334b9 100644
--- a/R/0_imports.R
+++ b/R/0_imports.R
@@ -12,7 +12,6 @@
 #' @importFrom stats model.matrix model.response model.frame update median
 #' @importFrom yardstick rsq rmse accuracy roc_auc brier_survival brier_class
 #' @importFrom tidyr unnest nest
-#' @importFrom GPfit GP_fit
 #' @importFrom parsnip get_from_env required_pkgs
 #' @importFrom recipes all_predictors all_outcomes
 #' @importFrom ggplot2 ggplot aes xlab geom_point geom_errorbar facet_wrap ylab
@@ -124,8 +123,9 @@ utils::globalVariables(
     ".num_models",
     "model_stage",
     "predict_stage",
-    "user",
-    "num"
+    "se",
+    "num",
+    "user"
   )
 )
 
diff --git a/R/checks.R b/R/checks.R
index fb76d712..b9fe5efb 100644
--- a/R/checks.R
+++ b/R/checks.R
@@ -244,10 +244,7 @@ check_bayes_initial_size <- function(num_param, num_grid, race = FALSE) {
     c(
       `!` = "{msg}",
       `*` = cli::pluralize(
-        "There are {cli::qty(diff)}{?as many/more} tuning parameters
-          {cli::qty(diff)}{?as/than} there are initial points.
-          This is likely to cause numerical issues in the first few
-          search iterations."
+        "This is likely to cause numerical issues in the first few search iterations."
       )
     )
 
diff --git a/R/gp_helpers.R b/R/gp_helpers.R
new file mode 100644
index 00000000..e4b54cd8
--- /dev/null
+++ b/R/gp_helpers.R
@@ -0,0 +1,346 @@
+# Determine any qualitative parameters and their ranges
+
+find_qual_param <- function(pset) {
+  is_qual <- purrr::map_lgl(pset$object, ~ inherits(.x, "qual_param"))
+  if (!any(is_qual)) {
+    return(list())
+  }
+
+  pset <- pset[is_qual, ]
+
+  possible_lvl <- purrr::map(pset$object, ~ .x$values)
+  names(possible_lvl) <- pset$id
+  possible_lvl
+}
+
+# Leave as-is but scale the others; order by parameter id in pset
+
+# Create the kernel for the continuous parameters
+# Add a new kernel for each qualitative parameter
+
+make_kernel <- function(pset, lvls) {
+  qual_ind <- which(pset$id %in% names(lvls))
+  quant_ind <- setdiff(seq_along(pset$id), qual_ind)
+  quant_avail <- length(quant_ind) > 0
+
+  if (length(qual_ind) == 0) {
+    return(GauPro::k_Matern32(D = length(quant_ind)))
+  }
+
+  num_kernels <- length(qual_ind) + quant_avail
+
+  kernels <- vector(mode = "list", length = num_kernels)
+  kern_count <- 0
+
+  if (quant_avail) {
+    kernels[[1]] <- GauPro::k_IgnoreIndsKernel(
+      k = GauPro::k_Matern32(D = length(quant_ind)),
+      ignoreinds = qual_ind
+    )
+    kern_count <- 1
+  }
+
+  for (i in seq_along(qual_ind)) {
+    kern_count <- kern_count + 1
+    kernels[[kern_count]] <- GauPro::k_FactorKernel(
+      D = 1,
+      xindex = qual_ind[i],
+      nlevels = length(lvls[[1]])
+    )
+  }
+
+  for (i in 1:num_kernels) {
+    if (i == 1) {
+      res <- kernels[[1]]
+    } else {
+      res <- res * kernels[[i]]
+    }
+  }
+  res
+}
+
+check_gp <- function(x) {
+  model_fail <- inherits(x, "try-error")
+  gp_threshold <- 0.1
+  if (!model_fail) {
+    loo_res <- summary(x)
+    loo_bad <- loo_res$coverage95LOO < gp_threshold
+    loo_rsq <- loo_res$r.squaredLOO
+  } else {
+    loo_bad <- TRUE
+    loo_rsq <- 0.0
+  }
+  # convergence?
+  if (model_fail) {
+    message_wrap(
+      cli::format_inline("GP failed: {as.character(x)}"),
+      prefix = cli::symbol$checkbox_circle_on,
+      color_text = get_tune_colors()$message$danger
+    )
+  } else if (loo_rsq < gp_threshold) {
+    msg <- cli::format_inline(
+      "GP has a LOO R\u00B2 of {round(loo_rsq * 100, 1)}% and is unreliable."
+    )
+    message_wrap(
+      msg,
+      prefix = cli::symbol$checkbox_circle_on,
+      color_text = get_tune_colors()$message$danger
+    )
+  } else if (loo_bad) {
+    msg <- cli::format_inline(
+      "GP has a coverage rate < {round(gp_threshold * 100, 1)}% and is unreliable."
+    )
+    message_wrap(
+      msg,
+      prefix = cli::symbol$checkbox_circle_on,
+      color_text = get_tune_colors()$message$danger
+    )
+  }
+  list(use = !loo_bad && !model_fail && loo_rsq > gp_threshold, rsq = loo_rsq)
+}
+
+# encode_set() was created to work on all types of tuning parameters; usage of
+# GauPro means that we should not encode qualitative tuning parameters so we
+# need a wrapper
+partial_encode <- function(dat, pset) {
+  qual_info <- find_qual_param(pset)
+
+  if (any(names(dat) == "mean")) {
+    outcomes <- dat$mean
+  }
+
+  normalized <- encode_set(
+    dat |> dplyr::select(dplyr::all_of(pset$id)),
+    pset = pset,
+    as_matrix = FALSE
+  )
+
+  # Replace with the original data when qualitative parameters
+  # GauPro::gpkm can take factor encodings to work
+  for (i in seq_along(qual_info)) {
+    nm <- names(qual_info)[i]
+    normalized[[nm]] <- factor(dat[[nm]], levels = qual_info[[i]])
+  }
+  if (any(names(dat) == "mean")) {
+    normalized$.outcome <- dat$mean
+  }
+  normalized
+}
+
+# ------------------------------------------------------------------------------
+
+fit_gp <- function(
+  dat,
+  pset,
+  metric,
+  eval_time = NULL,
+  control,
+  ...
+) {
+  tune::empty_ellipses(...)
+
+  dat <- dat |> dplyr::filter(.metric == metric)
+
+  if (!is.null(eval_time)) {
+    dat <- dat |> dplyr::filter(.eval_time == eval_time)
+  }
+
+  dat <- dat |>
+    check_gp_data() |>
+    dplyr::select(dplyr::all_of(pset$id), mean)
+
+  qual_info <- find_qual_param(pset)
+  num_pred <- nrow(pset)
+  num_cand <- nrow(dat)
+
+  normalized <- partial_encode(dat, pset)
+  # gaupro will look for * or : in names and fail :-(
+  # change names and change back later
+  og_names <- colnames(normalized)
+  gp_names <- make.names(og_names)
+  colnames(normalized) <- gp_names
+
+  gp_kernel <- make_kernel(pset, qual_info)
+
+  if (num_cand <= num_pred && num_cand > 0 & control$verbose_iter) {
+    msg <- cli::format_inline(
+      "The Gaussian process model is being fit using {num_pred} feature{?s} but
+			only has {num_cand} data point{?s} to do so. This may cause errors or a
+			poor model fit."
+    )
+    message_wrap(
+      msg,
+      prefix = "!",
+      color_text = get_tune_colors()$message$warning
+    )
+  }
+
+  withr::with_seed(
+    114,
+    gp_fit <- try(
+      GauPro::gpkm(
+        .outcome ~ .,
+        data = normalized,
+        kernel = gp_kernel,
+        verbose = 0,
+        restarts = 5,
+        nug.est = FALSE,
+        parallel = FALSE
+      ),
+      silent = TRUE
+    )
+  )
+
+  new_check <- check_gp(gp_fit)
+
+  if (control$verbose_iter) {
+    if (new_check$use) {
+      msg <- cli::format_inline(
+        "Gaussian process model (LOO R\u00B2: {round(new_check$rsq * 100, 1)}%)"
+      )
+      message_wrap(
+        msg,
+        prefix = cli::symbol$tick,
+        color_text = get_tune_colors()$message$success
+      )
+    } else {
+      message_wrap(
+        "Gaussian process model failed",
+        prefix = cli::symbol$tick,
+        color_text = get_tune_colors()$message$danger
+      )
+    }
+  }
+
+  list(
+    fit = gp_fit,
+    use = new_check$use,
+    rsq = new_check$rsq,
+    tr = normalized
+  )
+}
+
+# ------------------------------------------------------------------------------
+
+quiet_pred_gp <- function(object, new_data, ...) {
+  sssh_pred <- purrr::quietly(object$fit$pred)
+  res <- sssh_pred(new_data, ...)
+  wrn <- res$warnings
+
+  if (length(wrn) > 0) {
+    wrn <- wrn[!grepl("Too small", wrn)]
+    for (i in seq_along(wrn)) {
+      cli::cli_warn("{wrn[i]}")
+    }
+  }
+  res$result
+}
+
+pred_gp <- function(object, pset, size = 5000, current = NULL, control) {
+  candidates <- dials::grid_space_filling(
+    pset,
+    size = size,
+    type = "latin_hypercube"
+  ) |>
+    dplyr::distinct()
+
+  if (!object$use) {
+    x <- partial_encode(candidates, pset)
+    colnames(x) <- make.names(colnames(x))
+    x_old <- object$tr
+    x_old <- x_old[, names(x)]
+
+    # Remove existing points
+    x <- dplyr::anti_join(x, x_old, by = make.names(pset$id))
+
+    keep_ind <- dissim_sample(x_old, x, pset, max_n = Inf)
+    candidates <- candidates[keep_ind, ] |>
+      dplyr::mutate(.mean = NA_real_, .sd = NA_real_)
+
+    message_wrap(
+      "Generating a candidate as far away from existing points as possible.",
+      prefix = cli::symbol$info,
+      color_text = get_tune_colors()$message$info
+    )
+
+    return(candidates)
+  } else {
+    message_wrap(
+      paste("Generating", nrow(candidates), "candidates."),
+      prefix = cli::symbol$info,
+      color_text = get_tune_colors()$message$info
+    )
+  }
+
+  if (!is.null(current)) {
+    candidates <- candidates |>
+      dplyr::anti_join(current, by = pset$id)
+  }
+
+  if (nrow(candidates) == 0) {
+    message_wrap(
+      "No remaining candidate models",
+      prefix = cli::symbol$tick,
+      color_text = get_tune_colors()$message$warning
+    )
+    return(candidates |> dplyr::mutate(.mean = NA_real_, .sd = NA_real_))
+  }
+
+  x <- partial_encode(candidates, pset)
+  colnames(x) <- make.names(colnames(x))
+
+  gp_pred <- quiet_pred_gp(object, x, se.fit = TRUE)
+
+  gp_pred <- tibble::as_tibble(gp_pred) |>
+    dplyr::select(.mean = mean, .sd = se)
+  dplyr::bind_cols(candidates, gp_pred)
+}
+
+
+pick_candidate <- function(results, info, control) {
+  bad_gp <- all(is.na(results$.mean))
+  if (!bad_gp & info$uncertainty < control$uncertain) {
+    results <- results |>
+      dplyr::slice_max(objective, n = 1, with_ties = FALSE)
+  } else {
+    if (control$verbose_iter) {
+      message_wrap(
+        "Uncertainty sample",
+        prefix = cli::symbol$info,
+        color_text = get_tune_colors()$message$info
+      )
+    }
+    results <- results |>
+      dplyr::arrange(dplyr::desc(.sd)) |>
+      dplyr::slice(seq_len(floor(.1 * nrow(results)))) |>
+      dplyr::sample_n(1)
+  }
+  results
+}
+
+dissim_sample <- function(ref_data, candidates, pset, max_n = Inf) {
+  max_n <- min(max_n, nrow(candidates))
+  candidates <- candidates[1:max_n, , drop = FALSE]
+  all_data <- dplyr::bind_rows(ref_data, candidates)
+
+  # Deal with any qualitative predictors by casting them to c(0,1)
+  qual_info <- find_qual_param(pset)
+  if (length(qual_info) > 0) {
+    for (i in seq_along(qual_info)) {
+      nm <- names(qual_info)[i]
+      uniq <- sort(unique(all_data[[nm]]))
+      all_data[[nm]] <- as.character(all_data[[nm]])
+      all_data[[nm]] <- factor(all_data[[nm]], levels = uniq)
+    }
+  }
+  all_data <- stats::model.matrix(~ . + 0, data = all_data)
+
+  n_ref <- nrow(ref_data)
+  n_all <- nrow(all_data)
+  distances <- stats::dist(all_data)
+  distances <- as.matrix(distances)
+  distances <- distances[1:n_ref, (n_ref + 1):n_all]
+  min_distances <- apply(distances, 2, function(x) min(x[x > 0]))
+  max_ind <- which.max(min_distances)[1]
+  max_ind
+}
diff --git a/R/tune_bayes.R b/R/tune_bayes.R
index 3dd00359..5bf1c00f 100644
--- a/R/tune_bayes.R
+++ b/R/tune_bayes.R
@@ -13,7 +13,7 @@
 #' from [tune_grid()]) or a positive integer. It is suggested that the number of
 #' initial results be greater than the number of parameters being optimized.
 #' @param control A control object created by [control_bayes()].
-#' @param ... Options to pass to [GPfit::GP_fit()] (mostly for the `corr` argument).
+#' @param ... Not currently used.
 #' @return A tibble of results that mirror those generated by [tune_grid()].
 #' However, these results contain an `.iter` column and replicate the `rset`
 #' object multiple times over iterations (at limited additional memory costs).
@@ -277,6 +277,26 @@ tune_bayes.workflow <-
     res
   }
 
+## TODOs from testing
+
+# These lines are missing:
+# -   i Gaussian process model
+# -   ! The Gaussian process model is being fit using 1 features but only has 2
+# -     data points to do so. This may cause errors or a poor model fit.
+# -   v Gaussian process model
+# -   i Generating 3 candidates
+# -   i Predicted candidates
+
+# Some failures are not being caught (or ignored); see "missing performance
+# values" test
+# -   ! Gaussian process model: no non-missing arguments to min; returning Inf, no non-missing arguments...
+# -   x Gaussian process model: Error in seq_len(n - 1L): argument must be coercible to non-negative int...
+
+# -   Error in `check_gp_failure()`:
+# +   Error in `apply()`:
+# -   ! Gaussian process model was not fit.
+# +   ! dim(X) must have a positive length
+
 tune_bayes_workflow <- function(
   object,
   resamples,
@@ -413,25 +433,17 @@ tune_bayes_workflow <- function(
 
       check_time(start_time, control$time_limit)
 
+      # Maybe remove .catch_and_log() here and do catching inside function
       set.seed(control$seed[1] + i)
-      gp_mod <-
-        .catch_and_log(
-          fit_gp(
-            mean_stats |> dplyr::select(-.iter),
-            pset = param_info,
-            metric = opt_metric_name,
-            eval_time = opt_metric_time,
-            control = control,
-            ...
-          ),
-          control,
-          NULL,
-          location = "Gaussian process model",
-          notes = .notes,
-          catalog = FALSE
-        )
 
-      gp_mod <- check_gp_failure(gp_mod, prev_gp_mod)
+      gp_mod <- fit_gp(
+        mean_stats |> dplyr::select(-.iter),
+        pset = param_info,
+        metric = opt_metric_name,
+        eval_time = opt_metric_time,
+        control = control,
+        ...
+      )
 
       check_time(start_time, control$time_limit)
 
@@ -462,7 +474,8 @@ tune_bayes_workflow <- function(
 
       check_time(start_time, control$time_limit)
 
-      check_and_log_flow(control, candidates)
+      # TODO move some messages here
+      # check_and_log_flow(control, candidates)
 
       save_gp_results(
         gp_mod,
@@ -475,6 +488,12 @@ tune_bayes_workflow <- function(
       )
 
       candidates <- pick_candidate(candidates, score_card, control)
+
+      # These were temp used to replace pick_candidate()
+      # candidates <- candidates |>
+      #   dplyr::arrange(dplyr::desc(objective)) |>
+      #   dplyr::slice(1)
+
       if (score_card$uncertainty >= control$uncertain) {
         score_card$uncertainty <- -1 # is updated in update_score_card() below
       }
@@ -648,104 +667,6 @@ encode_set <- function(x, pset, ..., as_matrix = FALSE) {
   x
 }
 
-fit_gp <- function(dat, pset, metric, eval_time = NULL, control, ...) {
-  dat <- dat |> dplyr::filter(.metric == metric)
-
-  if (!is.null(eval_time)) {
-    dat <- dat |> dplyr::filter(.eval_time == eval_time)
-  }
-
-  dat <- dat |>
-    check_gp_data() |>
-    dplyr::select(dplyr::all_of(pset$id), mean)
-
-  x <- encode_set(dat |> dplyr::select(-mean), pset, as_matrix = TRUE)
-
-  if (nrow(x) <= ncol(x) + 1 && nrow(x) > 0) {
-    msg <-
-      paste(
-        "The Gaussian process model is being fit using ",
-        ncol(x),
-        "features but only has",
-        nrow(x),
-        "data points to do so. This may cause",
-        "errors or a poor model fit."
-      )
-    message_wrap(
-      msg,
-      prefix = "!",
-      color_text = get_tune_colors()$message$warning
-    )
-  }
-
-  opts <- list(...)
-
-  withCallingHandlers(
-    {
-      if (any(names(opts) == "trace") && opts$trace) {
-        gp_fit <- GPfit::GP_fit(X = x, Y = dat$mean, ...)
-      } else {
-        tmp_output <- utils::capture.output(
-          gp_fit <- GPfit::GP_fit(X = x, Y = dat$mean, ...)
-        )
-      }
-    },
-    warning = function(w) {
-      if (w$message == "X should be in range (0, 1)") {
-        rlang::cnd_muffle(w)
-      }
-    }
-  )
-
-  gp_fit
-}
-
-pred_gp <- function(object, pset, size = 5000, current = NULL, control) {
-  pred_grid <-
-    dials::grid_space_filling(pset, size = size, type = "latin_hypercube") |>
-    dplyr::distinct()
-
-  if (!is.null(current)) {
-    pred_grid <-
-      pred_grid |>
-      dplyr::anti_join(current, by = pset$id)
-  }
-
-  if (inherits(object, "try-error") | nrow(pred_grid) == 0) {
-    if (nrow(pred_grid) == 0) {
-      msg <- "No remaining candidate models"
-    } else {
-      msg <- "An error occurred when creating candidates parameters: "
-      msg <- paste(msg, as.character(object))
-    }
-    update_printer(control, split_labels = NULL, task = msg, type = "warning")
-    return(pred_grid |> dplyr::mutate(.mean = NA_real_, .sd = NA_real_))
-  }
-
-  update_printer(
-    control,
-    split_labels = NULL,
-    task = paste("Generating", nrow(pred_grid), "candidates"),
-    type = "info",
-    catalog = FALSE
-  )
-
-  x <- encode_set(pred_grid, pset, as_matrix = TRUE)
-  gp_pred <- predict(object, x)
-
-  update_printer(
-    control,
-    split_labels = NULL,
-    task = "Predicted candidates",
-    type = "info",
-    catalog = FALSE
-  )
-
-  pred_grid |>
-    dplyr::mutate(.mean = gp_pred$Y_hat, .sd = sqrt(gp_pred$MSE))
-}
-
-
 pick_candidate <- function(results, info, control) {
   if (info$uncertainty < control$uncertain) {
     results <- results |>
@@ -807,6 +728,7 @@ initial_info <- function(stats, metrics, maximize, eval_time) {
     dplyr::filter(.metric == metrics) |>
     dplyr::filter(!is.na(mean))
 
+  # TODO a lot of slice_min/slice_max can be used now
   if (maximize) {
     best_res <-
       best_res |>
diff --git a/inst/WORDLIST b/inst/WORDLIST
index 1e048a29..59f87a2f 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -9,11 +9,14 @@ Davison
 Disambiguates
 EI
 GPUs
+GPfit
+GauPro
 HD
 Hinkley
 Isomap
 L'Ecuyer
 Lifecycle
+Matern
 Monterey
 Olshen
 PSOCK
diff --git a/inst/test_objects.R b/inst/test_objects.R
index e73a5542..1946d679 100644
--- a/inst/test_objects.R
+++ b/inst/test_objects.R
@@ -1,4 +1,17 @@
-library(tidymodels)
+library(broom)
+library(dials)
+library(dplyr)
+library(ggplot2)
+library(modeldata)
+library(parsnip)
+library(purrr)
+library(recipes)
+library(rsample)
+library(tailor)
+library(tidyr)
+library(tune)
+library(workflows)
+library(yardstick)
 library(scales)
 library(censored)
 library(sessioninfo)
@@ -72,6 +85,7 @@ mt_spln_lm_bo <-
   tune_bayes(
     mt_spln_lm,
     resamples = folds,
+    initial = 2,
     iter = 3,
     control = b_ctrl
   )
diff --git a/man/autoplot.tune_results.Rd b/man/autoplot.tune_results.Rd
index 27d5dd9d..88366499 100644
--- a/man/autoplot.tune_results.Rd
+++ b/man/autoplot.tune_results.Rd
@@ -83,7 +83,7 @@ Parameters are labeled using the labels found in the parameter object
 }
 }
 \examples{
-\dontshow{if (tune:::should_run_examples()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (tune:::should_run_examples()) withAutoprint(\{ # examplesIf}
 # For grid search:
 data("example_ames_knn")
 
diff --git a/man/collect_predictions.Rd b/man/collect_predictions.Rd
index faea494b..965fb42a 100644
--- a/man/collect_predictions.Rd
+++ b/man/collect_predictions.Rd
@@ -124,7 +124,7 @@ more about submodels.
 }
 
 \examples{
-\dontshow{if (tune:::should_run_examples(suggests = c("kknn", "splines2"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (tune:::should_run_examples(suggests = c("kknn", "splines2"))) withAutoprint(\{ # examplesIf}
 data("example_ames_knn")
 # The parameters for the model:
 extract_parameter_set_dials(ames_wflow)
diff --git a/man/compute_metrics.Rd b/man/compute_metrics.Rd
index cf286e67..792274ae 100644
--- a/man/compute_metrics.Rd
+++ b/man/compute_metrics.Rd
@@ -54,7 +54,7 @@ class probabilities (\code{"prob"}.) By default, the tuning functions used to
 generate \code{x} compute metrics of all needed types.
 }
 \examples{
-\dontshow{if (tune:::should_run_examples()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (tune:::should_run_examples()) withAutoprint(\{ # examplesIf}
 # load needed packages:
 library(parsnip)
 library(rsample)
diff --git a/man/conf_mat_resampled.Rd b/man/conf_mat_resampled.Rd
index 08307698..16171f86 100644
--- a/man/conf_mat_resampled.Rd
+++ b/man/conf_mat_resampled.Rd
@@ -26,7 +26,7 @@ For classification problems, \code{conf_mat_resampled()} computes a separate
 confusion matrix for each resample then averages the cell counts.
 }
 \examples{
-\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (rlang::is_installed("modeldata")) withAutoprint(\{ # examplesIf}
 # example code
 
 library(parsnip)
diff --git a/man/coord_obs_pred.Rd b/man/coord_obs_pred.Rd
index 0358b0f5..da29cb43 100644
--- a/man/coord_obs_pred.Rd
+++ b/man/coord_obs_pred.Rd
@@ -30,7 +30,7 @@ For regression models, \code{coord_obs_pred()} can be used in a ggplot to make t
 x- and y-axes have the same exact scale along with an aspect ratio of one.
 }
 \examples{
-\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (rlang::is_installed("modeldata")) withAutoprint(\{ # examplesIf}
 # example code
 data(solubility_test, package = "modeldata")
 
diff --git a/man/expo_decay.Rd b/man/expo_decay.Rd
index e3a69eec..c4470f9c 100644
--- a/man/expo_decay.Rd
+++ b/man/expo_decay.Rd
@@ -30,7 +30,7 @@ would be required since only the first argument would be evaluated during
 tuning.
 }
 \examples{
-\dontshow{if (tune:::should_run_examples()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (tune:::should_run_examples()) withAutoprint(\{ # examplesIf}
 library(tibble)
 library(purrr)
 library(ggplot2)
diff --git a/man/extract-tune.Rd b/man/extract-tune.Rd
index 16c929cc..fad41881 100644
--- a/man/extract-tune.Rd
+++ b/man/extract-tune.Rd
@@ -73,7 +73,7 @@ estimated for objects produced by \code{\link[=last_fit]{last_fit()}}.
 These functions supersede \code{extract_model()}.
 }
 \examples{
-\dontshow{if (rlang::is_installed("splines2")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (rlang::is_installed("splines2")) withAutoprint(\{ # examplesIf}
 # example code
 
 library(recipes)
diff --git a/man/finalize_model.Rd b/man/finalize_model.Rd
index dcfc3441..dfa6c1f7 100644
--- a/man/finalize_model.Rd
+++ b/man/finalize_model.Rd
@@ -32,7 +32,7 @@ The \verb{finalize_*} functions take a list or tibble of tuning parameter values
 update objects with those values.
 }
 \examples{
-\dontshow{if (tune:::should_run_examples(suggests = "kknn")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (tune:::should_run_examples(suggests = "kknn")) withAutoprint(\{ # examplesIf}
 data("example_ames_knn")
 
 library(parsnip)
diff --git a/man/fit_best.Rd b/man/fit_best.Rd
index 57118ff0..903354bf 100644
--- a/man/fit_best.Rd
+++ b/man/fit_best.Rd
@@ -109,7 +109,7 @@ via \link[=extract_workflow.tune_results]{extract_workflow()}.
 }
 
 \examples{
-\dontshow{if (tune:::should_run_examples() && rlang::is_installed("modeldata") && !tune:::is_cran_check()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (tune:::should_run_examples() && rlang::is_installed("modeldata") && !tune:::is_cran_check()) withAutoprint(\{ # examplesIf}
 library(recipes)
 library(rsample)
 library(parsnip)
diff --git a/man/fit_resamples.Rd b/man/fit_resamples.Rd
index 94a2d6ac..c6974497 100644
--- a/man/fit_resamples.Rd
+++ b/man/fit_resamples.Rd
@@ -210,7 +210,7 @@ processing.
 }
 
 \examples{
-\dontshow{if (tune:::should_run_examples() & rlang::is_installed("splines2")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (tune:::should_run_examples() & rlang::is_installed("splines2")) withAutoprint(\{ # examplesIf}
 library(recipes)
 library(rsample)
 library(parsnip)
diff --git a/man/int_pctl.tune_results.Rd b/man/int_pctl.tune_results.Rd
index 1ee68804..93fb67dc 100644
--- a/man/int_pctl.tune_results.Rd
+++ b/man/int_pctl.tune_results.Rd
@@ -81,7 +81,7 @@ computations can take a long time unless the times are filtered with the
 \code{eval_time} argument.
 }
 \examples{
-\dontshow{if (!tune:::is_cran_check()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (!tune:::is_cran_check()) withAutoprint(\{ # examplesIf}
 if (rlang::is_installed("modeldata")) {
   data(Sacramento, package = "modeldata")
   library(rsample)
diff --git a/man/last_fit.Rd b/man/last_fit.Rd
index cf0dbcbb..c999508a 100644
--- a/man/last_fit.Rd
+++ b/man/last_fit.Rd
@@ -152,7 +152,7 @@ via \link[=extract_workflow.tune_results]{extract_workflow()}.
 }
 
 \examples{
-\dontshow{if (tune:::should_run_examples() & rlang::is_installed("splines2")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (tune:::should_run_examples() & rlang::is_installed("splines2")) withAutoprint(\{ # examplesIf}
 library(recipes)
 library(rsample)
 library(parsnip)
diff --git a/man/merge.recipe.Rd b/man/merge.recipe.Rd
index 99221a24..f17a8577 100644
--- a/man/merge.recipe.Rd
+++ b/man/merge.recipe.Rd
@@ -26,7 +26,7 @@ A tibble with a column \code{x} that has as many rows as were in \code{y}.
 \pkg{parsnip} model or recipe.
 }
 \examples{
-\dontshow{if (tune:::should_run_examples(suggests = c("xgboost", "modeldata", "splines2"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (tune:::should_run_examples(suggests = c("xgboost", "modeldata", "splines2"))) withAutoprint(\{ # examplesIf}
 library(tibble)
 library(recipes)
 library(parsnip)
diff --git a/man/show_best.Rd b/man/show_best.Rd
index 649e8bcd..f998ecb5 100644
--- a/man/show_best.Rd
+++ b/man/show_best.Rd
@@ -98,7 +98,7 @@ model has an RMSE of 1. The percent loss would be \code{(1.00 - 0.75)/1.00 * 100
 or 25 percent. Note that loss will always be non-negative.
 }
 \examples{
-\dontshow{if (tune:::should_run_examples()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (tune:::should_run_examples()) withAutoprint(\{ # examplesIf}
 data("example_ames_knn")
 
 show_best(ames_iter_search, metric = "rmse")
diff --git a/man/tune_bayes.Rd b/man/tune_bayes.Rd
index 4c0fd315..074da3eb 100644
--- a/man/tune_bayes.Rd
+++ b/man/tune_bayes.Rd
@@ -41,7 +41,7 @@ tune_bayes(object, ...)
 have been marked with \link[hardhat:tune]{tune()}, their values must be
 \link[=finalize_model]{finalized}.}
 
-\item{...}{Options to pass to \code{\link[GPfit:GP_fit]{GPfit::GP_fit()}} (mostly for the \code{corr} argument).}
+\item{...}{Not currently used.}
 
 \item{preprocessor}{A traditional model formula or a recipe created using
 \code{\link[recipes:recipe]{recipes::recipe()}}.}
@@ -306,7 +306,7 @@ processing.
 }
 
 \examples{
-\dontshow{if (tune:::should_run_examples(suggests = "kernlab") && !tune:::is_cran_check()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (tune:::should_run_examples(suggests = "kernlab") && !tune:::is_cran_check()) withAutoprint(\{ # examplesIf}
 library(recipes)
 library(rsample)
 library(parsnip)
diff --git a/man/tune_grid.Rd b/man/tune_grid.Rd
index cf153c71..1b3453c2 100644
--- a/man/tune_grid.Rd
+++ b/man/tune_grid.Rd
@@ -287,7 +287,7 @@ the largest time corresponding to an event are constant (or \code{NA}).
 }
 
 \examples{
-\dontshow{if (tune:::should_run_examples(suggests = "kernlab") & rlang::is_installed("splines2") && !tune:::is_cran_check()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (tune:::should_run_examples(suggests = "kernlab") & rlang::is_installed("splines2") && !tune:::is_cran_check()) withAutoprint(\{ # examplesIf}
 library(recipes)
 library(rsample)
 library(parsnip)
diff --git a/tests/testthat/_snaps/compute_metrics.md b/tests/testthat/_snaps/compute_metrics.md
index 3560f458..87593c3a 100644
--- a/tests/testthat/_snaps/compute_metrics.md
+++ b/tests/testthat/_snaps/compute_metrics.md
@@ -16,6 +16,30 @@
       ! The supplied `metrics` argument has metrics of type "prob", while the metrics used to generate predictions only used "class" metrics.
       i To save predictions for prob metrics, generate `x` with metrics of that type.
 
+# `metrics` argument works (iterative tuning)
+
+    Code
+      set.seed(1)
+      res_rmse <- tune_bayes(nearest_neighbor("regression", neighbors = tune()), mpg ~
+        ., vfold_cv(mtcars, v = 3), metrics = m_set_rmse, control = tune::control_bayes(
+        save_pred = TRUE), iter = 2, initial = 3)
+    Message
+      (x) GP has a LOO R² of 0% and is unreliable.
+      i Generating a candidate as far away from existing points as possible.
+      i Generating 15 candidates
+
+---
+
+    Code
+      set.seed(1)
+      res_both <- tune_bayes(nearest_neighbor("regression", neighbors = tune()), mpg ~
+        ., vfold_cv(mtcars, v = 3), metrics = m_set_both, control = tune::control_bayes(
+        save_pred = TRUE), iter = 2, initial = 3)
+    Message
+      (x) GP has a LOO R² of 0% and is unreliable.
+      i Generating a candidate as far away from existing points as possible.
+      i Generating 15 candidates
+
 # errors informatively with bad input
 
     Code
diff --git a/tests/testthat/_snaps/eval-time-args.md b/tests/testthat/_snaps/eval-time-args.md
index 8d4b3646..a933df32 100644
--- a/tests/testthat/_snaps/eval-time-args.md
+++ b/tests/testthat/_snaps/eval-time-args.md
@@ -40,6 +40,8 @@
     Condition
       Warning in `tune_bayes()`:
       `eval_time` is only used for models with mode "censored regression".
+    Message
+      i Generating 15 candidates
 
 ---
 
@@ -91,6 +93,8 @@
     Condition
       Warning in `tune_bayes()`:
       `eval_time` is only used for models with mode "censored regression".
+    Message
+      i Generating 15 candidates
 
 ---
 
diff --git a/tests/testthat/_snaps/gp_helpers.md b/tests/testthat/_snaps/gp_helpers.md
new file mode 100644
index 00000000..9c412c44
--- /dev/null
+++ b/tests/testthat/_snaps/gp_helpers.md
@@ -0,0 +1,41 @@
+# GP fit - svm - failure
+
+    Code
+      svm_gp <- tune:::fit_gp(collect_metrics(svm_results), pset = svm_set, metric = "accuracy",
+      control = control_bayes(verbose = TRUE))
+    Message
+      (x) GP has a LOO R² of -6.1% and is unreliable.
+
+---
+
+    Code
+      svm_scores <- tune:::pred_gp(svm_gp, pset = svm_set, size = 20, current = curr,
+        control = ctrl)
+    Message
+      i Generating a candidate as far away from existing points as possible.
+
+# GP scoring with failed model
+
+    Code
+      svm_gp <- tune:::fit_gp(collect_metrics(svm_results), pset = svm_set, metric = "accuracy",
+      control = ctrl)
+    Message
+      (x) GP has a LOO R² of -6.1% and is unreliable.
+
+---
+
+    Code
+      svm_scores <- tune:::pred_gp(svm_gp, pset = svm_set, size = 20, current = curr,
+        control = ctrl)
+    Message
+      i Generating a candidate as far away from existing points as possible.
+
+# GP fit - knn
+
+    Code
+      set.seed(1)
+      knn_scores <- tune:::pred_gp(knn_gp, pset = knn_set, size = 20, current = mutate(
+        knn_mtr, .iter = 0), control = ctrl)
+    Message
+      i Generating 20 candidates
+
diff --git a/tests/testthat/_snaps/int_pctl.md b/tests/testthat/_snaps/int_pctl.md
index c9bb4df6..89bf1618 100644
--- a/tests/testthat/_snaps/int_pctl.md
+++ b/tests/testthat/_snaps/int_pctl.md
@@ -43,6 +43,17 @@
     Code
       int_res_1 <- int_pctl(c5_res)
 
+---
+
+    Code
+      set.seed(92)
+      c5_bo_res <- tune_bayes(set_mode(set_engine(decision_tree(min_n = tune()),
+      "C5.0"), "classification"), Class ~ ., resamples = cls_rs, initial = c5_res,
+      iter = 1, metrics = metric_set(sens), control = control_bayes(save_pred = TRUE))
+    Message
+      (x) GP has a LOO R² of 3.1% and is unreliable.
+      i Generating a candidate as far away from existing points as possible.
+
 # percentile intervals - grid tuning with validation set
 
     Code
diff --git a/tests/testthat/_snaps/bayes.md b/tests/testthat/_snaps/tune_bayes.md
similarity index 78%
rename from tests/testthat/_snaps/bayes.md
rename to tests/testthat/_snaps/tune_bayes.md
index f6a22f57..c8729238 100644
--- a/tests/testthat/_snaps/bayes.md
+++ b/tests/testthat/_snaps/tune_bayes.md
@@ -1,5 +1,16 @@
 # tune recipe only
 
+    Code
+      set.seed(2)
+      res <- tune_bayes(wflow, resamples = folds, param_info = pset, initial = iter1,
+        iter = iter2, control = control)
+    Message
+      (x) GP has a LOO R² of 0% and is unreliable.
+      i Generating a candidate as far away from existing points as possible.
+      i Generating 15 candidates
+
+---
+
     Code
       tune_bayes(wflow, resamples = folds, param_info = pset, initial = iter1, iter = iter2,
         control = control_bayes(verbose = TRUE))
@@ -8,11 +19,8 @@
       >  Generating a set of 2 initial parameter results
       v Initialization complete
       
-      i Gaussian process model
-      ! The Gaussian process model is being fit using 1 features but only has 2
-        data points to do so. This may cause errors or a poor model fit.
-      i Generating 3 candidates
-      i Predicted candidates
+      (x) GP has a LOO R² of 0% and is unreliable.
+      i Generating a candidate as far away from existing points as possible.
       i Estimating performance
       i Fold01: preprocessor 1/1
       i Fold01: preprocessor 1/1, model 1/1
@@ -45,9 +53,7 @@
       i Fold10: preprocessor 1/1, model 1/1
       i Fold10: preprocessor 1/1, model 1/1 (predictions)
       v Estimating performance
-      i Gaussian process model
-      i Generating 2 candidates
-      i Predicted candidates
+      i Generating 15 candidates
       i Estimating performance
       i Fold01: preprocessor 1/1
       i Fold01: preprocessor 1/1, model 1/1
@@ -108,27 +114,24 @@
       
       -- Iteration 1 -----------------------------------------------------------------
       
-      i Current best:		rmse=2.461 (@iter 0)
-      i Gaussian process model
-      ! The Gaussian process model is being fit using 1 features but only has 2
-        data points to do so. This may cause errors or a poor model fit.
-      i Generating 3 candidates
-      i Predicted candidates
-      i num_comp=5
+      i Current best:		rmse=2.505 (@iter 0)
+      (x) GP has a LOO R² of 0% and is unreliable.
+      v Gaussian process model failed
+      i Generating a candidate as far away from existing points as possible.
+      i num_comp=11
       i Estimating performance
       v Estimating performance
-      <3 Newest results:	rmse=2.453 (+/-0.381)
+      (x) Newest results:	rmse=3.589 (+/-0.499)
       
       -- Iteration 2 -----------------------------------------------------------------
       
-      i Current best:		rmse=2.453 (@iter 1)
-      i Gaussian process model
-      i Generating 2 candidates
-      i Predicted candidates
-      i num_comp=1
+      i Current best:		rmse=2.505 (@iter 0)
+      v Gaussian process model (LOO R²: 30.5%)
+      i Generating 15 candidates
+      i num_comp=4
       i Estimating performance
       v Estimating performance
-      (x) Newest results:	rmse=2.646 (+/-0.286)
+      <3 Newest results:	rmse=2.461 (+/-0.37)
     Output
       # Tuning results
       # 10-fold cross-validation 
@@ -161,13 +164,11 @@
       
       -- Iteration 1 -----------------------------------------------------------------
       
-      i Current best:		rmse=2.461 (@iter 0)
-      i Gaussian process model
-      ! The Gaussian process model is being fit using 1 features but only has 2
-        data points to do so. This may cause errors or a poor model fit.
-      i Generating 3 candidates
-      i Predicted candidates
-      i num_comp=5
+      i Current best:		rmse=2.505 (@iter 0)
+      (x) GP has a LOO R² of 0% and is unreliable.
+      v Gaussian process model failed
+      i Generating a candidate as far away from existing points as possible.
+      i num_comp=11
       i Estimating performance
       i Fold01: preprocessor 1/1
       i Fold01: preprocessor 1/1, model 1/1
@@ -200,15 +201,14 @@
       i Fold10: preprocessor 1/1, model 1/1
       i Fold10: preprocessor 1/1, model 1/1 (predictions)
       v Estimating performance
-      <3 Newest results:	rmse=2.453 (+/-0.381)
+      (x) Newest results:	rmse=3.589 (+/-0.499)
       
       -- Iteration 2 -----------------------------------------------------------------
       
-      i Current best:		rmse=2.453 (@iter 1)
-      i Gaussian process model
-      i Generating 2 candidates
-      i Predicted candidates
-      i num_comp=1
+      i Current best:		rmse=2.505 (@iter 0)
+      v Gaussian process model (LOO R²: 30.5%)
+      i Generating 15 candidates
+      i num_comp=4
       i Estimating performance
       i Fold01: preprocessor 1/1
       i Fold01: preprocessor 1/1, model 1/1
@@ -241,7 +241,7 @@
       i Fold10: preprocessor 1/1, model 1/1
       i Fold10: preprocessor 1/1, model 1/1 (predictions)
       v Estimating performance
-      (x) Newest results:	rmse=2.646 (+/-0.286)
+      <3 Newest results:	rmse=2.461 (+/-0.37)
     Output
       # Tuning results
       # 10-fold cross-validation 
@@ -260,6 +260,39 @@
       10 <split [29/3]> Fold10 <tibble [4 x 5]> <tibble [0 x 4]>     0
       # i 20 more rows
 
+# tune recipe only - failure in recipe is caught elegantly
+
+    Code
+      cars_init_res <- tune_grid(model, preprocessor = rec, resamples = data_folds,
+        grid = cars_grid)
+    Message
+      > A | error:   Error in `step_spline_b()`:
+                     Caused by error in `prep()`:
+                     ! `deg_free` must be a whole number, not a numeric `NA`.
+      > B | warning: Some 'x' values beyond boundary knots may cause ill-conditioned basis
+                     functions.
+
+---
+
+    Code
+      set.seed(283)
+      cars_bayes_res <- tune_bayes(model, preprocessor = rec, resamples = data_folds,
+        initial = cars_init_res, iter = 2)
+    Message
+      (x) GP has a LOO R² of 0% and is unreliable.
+      i Generating a candidate as far away from existing points as possible.
+      > A | error:   Error in `step_spline_b()`:
+                     Caused by error in `prep()`:
+                     ! `degree` (3) must be less than or equal to `deg_free` (2) when `complete_set = TRUE`.
+    Condition
+      Warning:
+      All models failed. Run `show_notes(.Last.tune.result)` for more information.
+    Message
+      (x) GP has a LOO R² of 0% and is unreliable.
+      i Generating a candidate as far away from existing points as possible.
+      > A | warning: Some 'x' values beyond boundary knots may cause ill-conditioned basis
+                     functions.
+
 # tune model only - failure in recipe is caught elegantly
 
     Code
@@ -320,8 +353,9 @@
       res2 <- tune_bayes(wflow, resamples = folds, param_info = pset, initial = iter1,
         iter = iter2, control = control_bayes(save_workflow = TRUE))
     Message
-      ! The Gaussian process model is being fit using 1 features but only has 2
-        data points to do so. This may cause errors or a poor model fit.
+      (x) GP has a LOO R² of 0% and is unreliable.
+      i Generating a candidate as far away from existing points as possible.
+      i Generating 5 candidates
 
 # too few starting values
 
@@ -329,8 +363,7 @@
       tune:::check_bayes_initial_size(5, 3, FALSE)
     Message
       ! There are 5 tuning parameters and 3 grid points were requested.
-      * There are as many tuning parameters as there are initial points. This is
-        likely to cause numerical issues in the first few search iterations.
+      * This is likely to cause numerical issues in the first few search iterations.
 
 ---
 
@@ -338,8 +371,7 @@
       tune:::check_bayes_initial_size(5, 3, TRUE)
     Message
       ! There are 5 tuning parameters and 3 grid points were requested.
-      * There are as many tuning parameters as there are initial points. This is
-        likely to cause numerical issues in the first few search iterations.
+      * This is likely to cause numerical issues in the first few search iterations.
       * With racing, only completely resampled parameters are used.
 
 ---
@@ -348,8 +380,7 @@
       tune:::check_bayes_initial_size(2, 2, FALSE)
     Message
       ! There are 2 tuning parameters and 2 grid points were requested.
-      * There are as many tuning parameters as there are initial points. This is
-        likely to cause numerical issues in the first few search iterations.
+      * This is likely to cause numerical issues in the first few search iterations.
 
 ---
 
@@ -390,34 +421,53 @@
       > A | warning: A correlation computation is required, but `estimate` is constant and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be returned.
       ! For the rsq estimates, 1 missing value was found and removed before fitting
         the Gaussian process model.
-      ! The Gaussian process model is being fit using 1 features but only has 2
-        data points to do so. This may cause errors or a poor model fit.
-      ! For the rsq estimates, 1 missing value was found and removed before fitting
-        the Gaussian process model.
+      (x) GP has a LOO R² of 0% and is unreliable.
+      i Generating a candidate as far away from existing points as possible.
       > A | warning: A correlation computation is required, but `estimate` is constant and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be returned.
       ! For the rsq estimates, 2 missing values were found and removed before
         fitting the Gaussian process model.
+      (x) GP has a LOO R² of 0% and is unreliable.
+      i Generating a candidate as far away from existing points as possible.
       > A | warning: A correlation computation is required, but `estimate` is constant and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be returned.
       ! For the rsq estimates, 3 missing values were found and removed before
         fitting the Gaussian process model.
+      (x) GP has a LOO R² of 0% and is unreliable.
+      i Generating a candidate as far away from existing points as possible.
       > A | warning: A correlation computation is required, but `estimate` is constant and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be returned.
       ! For the rsq estimates, 4 missing values were found and removed before
         fitting the Gaussian process model.
+      (x) GP has a LOO R² of 0% and is unreliable.
+      i Generating a candidate as far away from existing points as possible.
       > A | warning: A correlation computation is required, but `estimate` is constant and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be returned.
       ! For the rsq estimates, 5 missing values were found and removed before
         fitting the Gaussian process model.
+      (x) GP has a LOO R² of 0% and is unreliable.
+      i Generating a candidate as far away from existing points as possible.
       > A | warning: A correlation computation is required, but `estimate` is constant and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be returned.
       ! For the rsq estimates, 6 missing values were found and removed before
         fitting the Gaussian process model.
+      (x) GP has a LOO R² of 0% and is unreliable.
+      i Generating a candidate as far away from existing points as possible.
       > A | warning: A correlation computation is required, but `estimate` is constant and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be returned.
       ! For the rsq estimates, 7 missing values were found and removed before
         fitting the Gaussian process model.
+      (x) GP has a LOO R² of 0% and is unreliable.
+      i Generating a candidate as far away from existing points as possible.
       > A | warning: A correlation computation is required, but `estimate` is constant and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be returned.
       ! For the rsq estimates, 8 missing values were found and removed before
         fitting the Gaussian process model.
+      (x) GP has a LOO R² of 0% and is unreliable.
+      i Generating a candidate as far away from existing points as possible.
       > A | warning: A correlation computation is required, but `estimate` is constant and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be returned.
       ! For the rsq estimates, 9 missing values were found and removed before
         fitting the Gaussian process model.
+      (x) GP has a LOO R² of 0% and is unreliable.
+      i Generating a candidate as far away from existing points as possible.
+      > A | warning: A correlation computation is required, but `estimate` is constant and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be returned.
+      ! For the rsq estimates, 10 missing values were found and removed before
+        fitting the Gaussian process model.
+      (x) GP has a LOO R² of 0% and is unreliable.
+      i Generating a candidate as far away from existing points as possible.
       > A | warning: A correlation computation is required, but `estimate` is constant and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be returned.
       ! No improvement for 10 iterations; returning current results.
 
@@ -433,12 +483,10 @@
       > A | warning: A correlation computation is required, but `estimate` is constant and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be returned.
       ! All of the rsq estimates were missing. The Gaussian process model cannot be
         fit to the data.
-      > A | warning: no non-missing arguments to min; returning Inf
-      > B | warning: no non-missing arguments to max; returning -Inf
-      > C | error:   argument must be coercible to non-negative integer
+      (x) GP failed: Error in initialize(...) : is.numeric(X) is not TRUE
     Condition
-      Error in `check_gp_failure()`:
-      ! Gaussian process model was not fit.
+      Error in `apply()`:
+      ! dim(X) must have a positive length
     Message
       x Optimization stopped prematurely; returning current results.
 
diff --git a/tests/testthat/data/knn_gp.rds b/tests/testthat/data/knn_gp.rds
index 3697faa0..59c3347b 100644
Binary files a/tests/testthat/data/knn_gp.rds and b/tests/testthat/data/knn_gp.rds differ
diff --git a/tests/testthat/data/lm_bayes.rds b/tests/testthat/data/lm_bayes.rds
index 7b4c7f91..b807dad1 100644
Binary files a/tests/testthat/data/lm_bayes.rds and b/tests/testthat/data/lm_bayes.rds differ
diff --git a/tests/testthat/data/rcv_results.rds b/tests/testthat/data/rcv_results.rds
index 5f85997b..e342b8ae 100644
Binary files a/tests/testthat/data/rcv_results.rds and b/tests/testthat/data/rcv_results.rds differ
diff --git a/tests/testthat/data/test_objects.RData b/tests/testthat/data/test_objects.RData
index c7aa1e25..db481197 100644
Binary files a/tests/testthat/data/test_objects.RData and b/tests/testthat/data/test_objects.RData differ
diff --git a/tests/testthat/test-GP.R b/tests/testthat/test-GP.R
deleted file mode 100644
index 93434082..00000000
--- a/tests/testthat/test-GP.R
+++ /dev/null
@@ -1,90 +0,0 @@
-test_that("encoding before model", {
-  skip_if_not_installed("dials", minimum_version = "1.4.0")
-  knn_set <- readRDS(test_path("data", "knn_set.rds"))
-  knn_grid <- readRDS(test_path("data", "knn_grid.rds"))
-
-  knn_encoded <- tune:::encode_set(knn_grid, knn_set)
-
-  expect_true(all(knn_encoded$K >= 0 & knn_encoded$K <= 1))
-  expect_true(all(knn_encoded$exponent >= 0 & knn_encoded$exponent <= 1))
-  expect_true(is.factor(knn_encoded$weight_func))
-  expect_equal(levels(knn_encoded$weight_func), dials::weight_func()$values)
-})
-
-# ------------------------------------------------------------------------------
-
-test_that("GP fit - svm", {
-  svm_results <- readRDS(test_path("data", "svm_results.rds"))
-  svm_set <- attributes(svm_results)$parameters
-
-  svm_gp <-
-    tune:::fit_gp(
-      collect_metrics(svm_results),
-      pset = svm_set,
-      metric = "accuracy",
-      control = control_bayes(verbose = TRUE)
-    )
-  expect_equal(class(svm_gp), "GP")
-  expect_equal(
-    colnames(svm_gp$X),
-    c("cost", "`%^*#`", "scale_factor")
-  )
-})
-
-# ------------------------------------------------------------------------------
-
-test_that("GP fit - knn", {
-  knn_gp <- readRDS(test_path("data", "knn_gp.rds"))
-
-  knn_cols <- c(
-    "K",
-    "weight_funcrectangular",
-    "weight_functriangular",
-    "weight_funcepanechnikov",
-    "weight_funcbiweight",
-    "weight_functriweight",
-    "weight_funccos",
-    "weight_funcinv",
-    "weight_funcgaussian",
-    "weight_funcrank",
-    "exponent"
-  )
-  expect_equal(class(knn_gp), "GP")
-  expect_equal(colnames(knn_gp$X), knn_cols)
-})
-
-# ------------------------------------------------------------------------------
-
-test_that("GP scoring", {
-  svm_results <- readRDS(test_path("data", "svm_results.rds"))
-  svm_set <- attributes(svm_results)$parameters
-
-  ctrl <- control_bayes()
-  curr <-
-    collect_metrics(svm_results) |>
-    dplyr::filter(.metric == "accuracy") |>
-    mutate(.iter = 0)
-
-  svm_gp <-
-    tune:::fit_gp(
-      collect_metrics(svm_results),
-      pset = svm_set,
-      metric = "accuracy",
-      control = ctrl
-    )
-
-  svm_scores <-
-    tune:::pred_gp(
-      svm_gp,
-      pset = svm_set,
-      size = 20,
-      current = curr,
-      control = ctrl
-    )
-  expect_true(tibble::is_tibble(svm_scores))
-  expect_equal(
-    colnames(svm_scores),
-    c("cost", "%^*#", "scale_factor", ".mean", ".sd")
-  )
-  expect_equal(nrow(svm_scores), 20)
-})
diff --git a/tests/testthat/test-compute_metrics.R b/tests/testthat/test-compute_metrics.R
index 489608ba..8e22611d 100644
--- a/tests/testthat/test-compute_metrics.R
+++ b/tests/testthat/test-compute_metrics.R
@@ -256,31 +256,33 @@ test_that("`metrics` argument works (iterative tuning)", {
   m_set_rmse <- metric_set(rmse)
   m_set_both <- metric_set(rmse, rsq)
 
-  set.seed(1)
-
-  res_rmse <-
-    tune_bayes(
-      nearest_neighbor("regression", neighbors = tune()),
-      mpg ~ .,
-      vfold_cv(mtcars, v = 3),
-      metrics = m_set_rmse,
-      control = tune::control_bayes(save_pred = TRUE),
-      iter = 2,
-      initial = 3
-    )
-
-  set.seed(1)
-
-  res_both <-
-    tune_bayes(
-      nearest_neighbor("regression", neighbors = tune()),
-      mpg ~ .,
-      vfold_cv(mtcars, v = 3),
-      metrics = m_set_both,
-      control = tune::control_bayes(save_pred = TRUE),
-      iter = 2,
-      initial = 3
-    )
+  expect_snapshot({
+    set.seed(1)
+    res_rmse <-
+      tune_bayes(
+        nearest_neighbor("regression", neighbors = tune()),
+        mpg ~ .,
+        vfold_cv(mtcars, v = 3),
+        metrics = m_set_rmse,
+        control = tune::control_bayes(save_pred = TRUE),
+        iter = 2,
+        initial = 3
+      )
+  })
+
+  expect_snapshot({
+    set.seed(1)
+    res_both <-
+      tune_bayes(
+        nearest_neighbor("regression", neighbors = tune()),
+        mpg ~ .,
+        vfold_cv(mtcars, v = 3),
+        metrics = m_set_both,
+        control = tune::control_bayes(save_pred = TRUE),
+        iter = 2,
+        initial = 3
+      )
+  })
 
   collected_sum_rmse <- collect_metrics(res_rmse)
   computed_sum_rmse <- compute_metrics(res_both, m_set_rmse)
diff --git a/tests/testthat/test-gp_helpers.R b/tests/testthat/test-gp_helpers.R
new file mode 100644
index 00000000..d7affba6
--- /dev/null
+++ b/tests/testthat/test-gp_helpers.R
@@ -0,0 +1,146 @@
+test_that("encoding before model", {
+  skip_if_not_installed("dials", minimum_version = "1.4.0")
+  knn_set <- readRDS(test_path("data", "knn_set.rds"))
+  knn_grid <- readRDS(test_path("data", "knn_grid.rds"))
+
+  knn_encoded <- tune:::encode_set(knn_grid, knn_set)
+
+  expect_true(all(knn_encoded$K >= 0 & knn_encoded$K <= 1))
+  expect_true(all(knn_encoded$exponent >= 0 & knn_encoded$exponent <= 1))
+  expect_true(is.factor(knn_encoded$weight_func))
+  expect_equal(levels(knn_encoded$weight_func), dials::weight_func()$values)
+})
+
+# ------------------------------------------------------------------------------
+
+test_that("GP fit - svm - failure", {
+  svm_results <- readRDS(test_path("data", "svm_results.rds"))
+  svm_set <- attributes(svm_results)$parameters
+
+  expect_snapshot({
+    svm_gp <-
+      tune:::fit_gp(
+        collect_metrics(svm_results),
+        pset = svm_set,
+        metric = "accuracy",
+        control = control_bayes(verbose = TRUE)
+      )
+  })
+
+  expect_equal(class(svm_gp), "list")
+  expect_named(
+    svm_gp,
+    c("fit", "use", "rsq", "tr")
+  )
+  expect_false(svm_gp$use)
+  expect_named(
+    svm_gp$tr,
+    c("cost", "X....", "scale_factor", ".outcome")
+  )
+
+  curr <-
+    collect_metrics(svm_results) |>
+    dplyr::filter(.metric == "accuracy") |>
+    mutate(.iter = 0)
+
+  expect_snapshot({
+    svm_scores <-
+      tune:::pred_gp(
+        svm_gp,
+        pset = svm_set,
+        size = 20,
+        current = curr,
+        control = ctrl
+      )
+  })
+})
+
+# ------------------------------------------------------------------------------
+
+test_that("GP scoring with failed model", {
+  svm_results <- readRDS(test_path("data", "svm_results.rds"))
+  svm_set <- attributes(svm_results)$parameters
+
+  ctrl <- control_bayes()
+  curr <-
+    collect_metrics(svm_results) |>
+    dplyr::filter(.metric == "accuracy") |>
+    mutate(.iter = 0)
+
+  expect_snapshot({
+    svm_gp <-
+      tune:::fit_gp(
+        collect_metrics(svm_results),
+        pset = svm_set,
+        metric = "accuracy",
+        control = ctrl
+      )
+  })
+
+  expect_snapshot({
+    svm_scores <-
+      tune:::pred_gp(
+        svm_gp,
+        pset = svm_set,
+        size = 20,
+        current = curr,
+        control = ctrl
+      )
+  })
+  expect_true(tibble::is_tibble(svm_scores))
+  expect_named(
+    svm_scores,
+    c("cost", "%^*#", "scale_factor", ".mean", ".sd")
+  )
+  expect_equal(nrow(svm_scores), 1)
+})
+
+
+# ------------------------------------------------------------------------------
+
+test_that("GP fit - knn", {
+  knn_results <- readRDS(test_path("data", "knn_results.rds"))
+  knn_set <- attributes(knn_results)$parameters
+
+  knn_mtr <-
+    collect_metrics(knn_results) |>
+    dplyr::filter(.metric == "roc_auc")
+
+  set.seed(1)
+  knn_gp <-
+    tune:::fit_gp(
+      knn_mtr,
+      pset = knn_set,
+      metric = "roc_auc",
+      control = control_bayes()
+    )
+
+  expect_equal(class(knn_gp), "list")
+  expect_named(
+    knn_gp,
+    c("fit", "use", "rsq", "tr")
+  )
+  expect_true(knn_gp$use)
+  expect_named(
+    knn_gp$tr,
+    c("K", "weight_func", "exponent", ".outcome")
+  )
+
+  expect_snapshot({
+    set.seed(1)
+    knn_scores <-
+      tune:::pred_gp(
+        knn_gp,
+        pset = knn_set,
+        size = 20,
+        current = knn_mtr |> mutate(.iter = 0),
+        control = ctrl
+      )
+  })
+
+  expect_named(
+    knn_scores,
+    c("K", "weight_func", "exponent", ".mean", ".sd")
+  )
+  expect_equal(nrow(knn_scores), 20L)
+})
diff --git a/tests/testthat/test-int_pctl.R b/tests/testthat/test-int_pctl.R
index 41e0fb0a..4710235f 100644
--- a/tests/testthat/test-int_pctl.R
+++ b/tests/testthat/test-int_pctl.R
@@ -115,19 +115,21 @@ test_that("percentile intervals - grid + bayes tuning", {
 
   # ------------------------------------------------------------------------------
 
-  set.seed(92)
-  c5_bo_res <-
-    decision_tree(min_n = tune()) |>
-    set_engine("C5.0") |>
-    set_mode("classification") |>
-    tune_bayes(
-      Class ~ .,
-      resamples = cls_rs,
-      initial = c5_res,
-      iter = 1,
-      metrics = metric_set(sens),
-      control = control_bayes(save_pred = TRUE)
-    )
+  expect_snapshot({
+    set.seed(92)
+    c5_bo_res <-
+      decision_tree(min_n = tune()) |>
+      set_engine("C5.0") |>
+      set_mode("classification") |>
+      tune_bayes(
+        Class ~ .,
+        resamples = cls_rs,
+        initial = c5_res,
+        iter = 1,
+        metrics = metric_set(sens),
+        control = control_bayes(save_pred = TRUE)
+      )
+  })
   template <- dplyr::tibble(
     min_n = integer(0),
     .metric = character(0),
diff --git a/tests/testthat/test-bayes.R b/tests/testthat/test-tune_bayes.R
similarity index 96%
rename from tests/testthat/test-bayes.R
rename to tests/testthat/test-tune_bayes.R
index 7ae970c6..c44681fe 100644
--- a/tests/testthat/test-bayes.R
+++ b/tests/testthat/test-tune_bayes.R
@@ -24,11 +24,12 @@ test_that("tune recipe only", {
     add_recipe(rec_tune_1) |>
     add_model(lm_mod)
   pset <- extract_parameter_set_dials(wflow) |>
-    update(num_comp = dials::num_comp(c(1, 5)))
+    update(num_comp = dials::num_comp(c(1, 15)))
   folds <- rsample::vfold_cv(mtcars)
   control <- control_bayes(extract = identity)
 
-  suppressMessages({
+  expect_snapshot({
+    set.seed(2)
     res <- tune_bayes(
       wflow,
       resamples = folds,
@@ -38,6 +39,7 @@ test_that("tune recipe only", {
       control = control
     )
   })
+
   res_est <- collect_metrics(res)
   res_workflow <- res$.extracts[[1]]$.extracts[[1]]
 
@@ -57,20 +59,6 @@ test_that("tune recipe only", {
   expect_null(.get_tune_eval_times(res))
   expect_null(.get_tune_eval_time_target(res))
 
-  set.seed(1)
-  expect_no_error(
-    suppressMessages(
-      tune_bayes(
-        wflow,
-        resamples = folds,
-        param_info = pset,
-        initial = iter1,
-        iter = iter2,
-        corr = list(type = "matern", nu = 3 / 2)
-      )
-    )
-  )
-
   # test verbose options
   set.seed(1)
   expect_snapshot(
@@ -300,7 +288,7 @@ test_that("tune recipe only - failure in recipe is caught elegantly", {
   # NA values not allowed in recipe
   cars_grid <- tibble(deg_free = c(3, NA_real_, 4))
 
-  suppressMessages({
+  expect_snapshot({
     cars_init_res <- tune_grid(
       model,
       preprocessor = rec,
@@ -309,8 +297,8 @@ test_that("tune recipe only - failure in recipe is caught elegantly", {
     )
   })
 
-  suppressMessages({
-    set.seed(283) #<- chosen to not generate faiures
+  expect_snapshot({
+    set.seed(283)
     cars_bayes_res <- tune_bayes(
       model,
       preprocessor = rec,
@@ -335,11 +323,11 @@ test_that("tune recipe only - failure in recipe is caught elegantly", {
   expect_equal(obs_failures, exp_failures)
 
   all_notes <- collect_notes(cars_bayes_res)
-  expect_equal(nrow(all_notes), 11L)
+  expect_equal(nrow(all_notes), 5L)
 
   expect_equal(
     collect_metrics(cars_bayes_res) |> distinct(deg_free) |> nrow(),
-    exp_init_grid_res + 2
+    3
   )
 })
 
@@ -422,7 +410,7 @@ test_that("tune model and recipe - failure in recipe is caught elegantly", {
   })
 
   suppressMessages({
-    set.seed(283) #<- chosen to not generate faiures
+    set.seed(283)
     cars_bayes_res <- tune_bayes(
       svm_mod,
       preprocessor = rec,
@@ -447,7 +435,7 @@ test_that("tune model and recipe - failure in recipe is caught elegantly", {
   expect_equal(obs_failures, exp_failures)
 
   all_notes <- collect_notes(cars_bayes_res)
-  expect_equal(nrow(all_notes), 6L)
+  expect_equal(nrow(all_notes), 7L)
 
   expect_equal(
     collect_metrics(cars_bayes_res) |> distinct(deg_free, cost) |> nrow(),
@@ -558,6 +546,7 @@ test_that("missing performance values", {
   skip_if(new_rng_snapshots)
   skip_if(packageVersion("dplyr") < "1.1.1")
   skip_if_not_installed("modeldata")
+  skip_if_not_installed("pec")
 
   data(ames, package = "modeldata")