@@ -333,6 +333,70 @@ mergePreparedCovs <- function(years, fuelCovsCoarse, ignitionFirePoints, nonFore
333333
334334
335335
336+ # ' Prepare and (optionally) cache scaled covariates for modeling
337+ # '
338+ # ' Wraps [rescaleCovariates()] to rescale features according to the modeling
339+ # ' algorithm, and optionally caches results based on a robust digest of the
340+ # ' unscaled input data. Returns a list containing the (possibly) rescaled
341+ # ' covariates, the modeling formula (if any), per-variable rescalers (if any),
342+ # ' the x-axis variable name(s), and the digest used for caching.
343+ # '
344+ # ' @param unscaledData A `data.frame` or `data.table` of covariates. Must
345+ # ' include predictor columns and may include response columns such as
346+ # ' `ignitions` and/or `escapes` (see Details).
347+ # ' @param algorithm `character`. Name of the model algorithm. If it contains
348+ # ' `"xgb"`, covariates are standardized via [base::scale()] (centering and
349+ # ' scaling); otherwise, covariates are rescaled by order of magnitude to
350+ # ' roughly the \[0, 10\] range (see Details).
351+ # ' @param rescaleVars `logical`. If `TRUE`, perform rescaling/standardization
352+ # ' according to `algorithm`; if `FALSE`, covariates are returned unchanged.
353+ # ' @param useCache `logical` (default `TRUE`). If `TRUE`, results are cached
354+ # ' using [reproducible::Cache()] with an extra key derived from a digest of
355+ # ' `unscaledData`.
356+ # '
357+ # ' @details
358+ # ' This function:
359+ # ' * Computes a digest of `unscaledData` (if `useCache = TRUE`) using an internal
360+ # ' `.robustDigest()` to form a `.cacheExtra` key for [reproducible::Cache()].
361+ # ' * Calls [rescaleCovariates()] with `formula = NULL` and `family = NULL`
362+ # ' (non-xgboost code-paths were removed).
363+ # ' * Caches the result while omitting the large arguments `"covariates"` and
364+ # ' `"formula"` from cache key construction (via `omitArgs`), relying on the
365+ # ' explicit digest instead.
366+ # '
367+ # ' **Expected columns**:
368+ # ' - `ignitions`: if present, is preserved through standardization (xgboost path).
369+ # ' - `escapes`: if present, is preserved through standardization (xgboost path).
370+ # ' - `year` or `yr`: used to infer `xvar` (see `rescaleCovariates()`).
371+ # '
372+ # ' @return A `list` with elements:
373+ # ' \describe{
374+ # ' \item{covariates}{A `data.table` with rescaled/standardized covariates.}
375+ # ' \item{formula}{`NULL` (the non-xgboost formula path is disabled).}
376+ # ' \item{ignitionRescalers}{Named numeric vector of per-variable 10-based
377+ # ' rescalers (non-xgboost path) or `NULL` for xgboost / no-rescale.}
378+ # ' \item{xvar}{Character vector indicating which year-like column(s) were found.}
379+ # ' \item{digestOfData}{The digest used for caching, or `NULL` if `useCache = FALSE`.}
380+ # ' }
381+ # '
382+ # ' @seealso [rescaleCovariates()], [reproducible::Cache()]
383+ # '
384+ # ' @examples
385+ # ' \dontrun{
386+ # ' library(data.table)
387+ # ' dt <- data.table(pixelID = 1:3,
388+ # ' year = c(2001L, 2002L, 2003L),
389+ # ' ignitions = c(0, 1, 0),
390+ # ' x1 = c(0.01, 2.3, 15))
391+ # '
392+ # ' out <- prepareCovariatesOuter(unscaledData = dt,
393+ # ' algorithm = "xgb_classifier",
394+ # ' rescaleVars = TRUE,
395+ # ' useCache = FALSE)
396+ # ' str(out)
397+ # ' }
398+ # '
399+ # ' @export
336400prepareCovariatesOuter <- function (unscaledData , algorithm , rescaleVars ,
337401 useCache = TRUE ) {
338402 # covariatesHere <- igOrEscNames(igOrEsc, post = "Covariates") # paste0("fireSense_", igOrEsc, "Covariates")
@@ -373,6 +437,77 @@ prepareCovariatesOuter <- function(unscaledData, algorithm, rescaleVars,
373437
374438
375439
440+ # ' Rescale/standardize covariates for modeling
441+ # '
442+ # ' Applies either order-of-magnitude rescaling to approximately confine features
443+ # ' to the \[0, 10\] range (non-xgboost) or standardization via [base::scale()]
444+ # ' (xgboost path). Preserves key response columns when present.
445+ # '
446+ # ' @param formula A model formula or `NULL`. Non-xgboost paths would parse and
447+ # ' validate it, but the surrounding code path currently sets `formula = NULL`
448+ # ' and disables non-xgboost modeling in the caller.
449+ # ' @param covariates A `data.frame` or `data.table` of covariates and possibly
450+ # ' responses (e.g., `ignitions`, `escapes`).
451+ # ' @param rescaleVars `logical`. If `TRUE`, features are rescaled/standardized
452+ # ' according to `modelAlgorithm`. If `FALSE`, returned unchanged (with
453+ # ' `ignitionRescalers = NULL`).
454+ # ' @param modelAlgorithm `character`. If the string contains `"xgb"`, apply
455+ # ' [base::scale()] (centering and scaling) to all columns except
456+ # ' `yearChar`, `ignitions`, and `escapes`. Otherwise, log-10 order-of-magnitude
457+ # ' rescaling is applied to all non-excluded covariates whose maxima fall
458+ # ' outside \[0, 10\].
459+ # '
460+ # ' @details
461+ # ' **Year variable inference**: If any of `c("year", "yr")` match column names (case-insensitive),
462+ # ' `xvar` is set to the intersection. Otherwise, the code references `rows` (a TODO),
463+ # ' which is undefined here. This appears to be a bug or placeholder and will error
464+ # ' unless `rows` exists in scope; consider providing a default (e.g., `NULL`) or removing.
465+ # '
466+ # ' **Non-xgboost path**:
467+ # ' - Excludes `pixelID`, `ignitions`, `escapes`, `year`, and `yearChar` from rescaling.
468+ # ' - Computes per-variable `rescalers <- 10^(floor(log10(abs(max))))` for variables
469+ # ' whose maxima fall outside \[0, 10\].
470+ # ' - Calls `rescaleVarsByMagnitude(covariates, ignitionRescalers)` (must exist in scope).
471+ # '
472+ # ' **xgboost path**:
473+ # ' - Standardizes numeric columns except `yearChar`, `ignitions`, and `escapes`.
474+ # ' - Preserves original `ignitions` and (if present) `escapes` columns.
475+ # ' - Attaches centering/scaling attributes under `attr(covariates, "scaleData")`.
476+ # '
477+ # ' **External references**:
478+ # ' - Uses `fireSenseUtils:::ignitionsTxt` and `fireSenseUtils:::escapesTxt` to
479+ # ' refer to the expected response column names. If these are not available,
480+ # ' ensure compatible strings are provided (see exported constants below).
481+ # '
482+ # ' @return A `list` with elements:
483+ # ' \describe{
484+ # ' \item{covariates}{A `data.table` after rescaling/standardization.}
485+ # ' \item{formula}{The input `formula` (often `NULL` in current usage).}
486+ # ' \item{ignitionRescalers}{Named numeric vector of per-variable rescalers (non-xgb) or `NULL`.}
487+ # ' \item{xvar}{Character vector with detected year-like column(s) or (bug) `rows`.}
488+ # ' }
489+ # '
490+ # ' @examples
491+ # ' \dontrun{
492+ # ' library(data.table)
493+ # ' dt <- data.table(ignitions = c(0,1,0),
494+ # ' escapes = c(0,0,1),
495+ # ' year = 2001:2003,
496+ # ' x1 = c(0.05, 12, 100),
497+ # ' x2 = c(5, 6, 7))
498+ # '
499+ # ' # XGBoost-like standardization
500+ # ' out_xgb <- rescaleCovariates(NULL, dt, rescaleVars = TRUE, modelAlgorithm = "xgb_tree")
501+ # ' str(out_xgb)
502+ # '
503+ # ' # Non-xgboost magnitude rescaling
504+ # ' out_other <- rescaleCovariates(NULL, dt, rescaleVars = TRUE, modelAlgorithm = "glm")
505+ # ' str(out_other)
506+ # ' }
507+ # '
508+ # ' @seealso [base::scale()]
509+ # '
510+ # ' @export
376511rescaleCovariates <- function (formula , covariates , rescaleVars , modelAlgorithm ) {
377512
378513 covariates <- copy(setDT(covariates ))
@@ -454,6 +589,25 @@ rescaleCovariates <- function(formula, covariates, rescaleVars, modelAlgorithm)
454589 xvar = xvar ))
455590}
456591
592+ # ' Construct standardized FireSense object names
593+ # '
594+ # ' Creates a standardized name by combining a `pre` prefix, a normalized
595+ # ' `igOrEsc` token (case-transformed), and a `post` suffix.
596+ # '
597+ # ' @param igOrEsc `character`. A token such as `"ignition"` or `"escape"`.
598+ # ' @param pre `character` scalar. Prefix string (default `"fireSense_"`).
599+ # ' @param post `character` scalar. Suffix string to append.
600+ # ' @param case One of `c("lower", "camel", "sentence", "title")`. Controls how
601+ # ' `igOrEsc` is transformed before concatenation. Partial matching on the
602+ # ' first letters is used: e.g., `"cam"`, `"sen"`, `"tit"`.
603+ # '
604+ # ' @return `character(1)` constructed as `paste0(pre, transformed(igOrEsc), post)`.
605+ # '
606+ # ' @examples
607+ # ' igOrEscNames("ignition", post = "Covariates")
608+ # ' igOrEscNames("escape", pre = "fs_", post = "_Formula", case = "camel")
609+ # '
610+ # ' @export
457611igOrEscNames <- function (igOrEsc , pre = " fireSense_" , post , case = c(" lower" , " camel" , " sentence" , " title" )) {
458612 if (startsWith(tolower(case [1 ]), prefix = " cam" ))
459613 igOrEsc <- camelCase(igOrEsc )
@@ -462,8 +616,18 @@ igOrEscNames <- function(igOrEsc, pre = "fireSense_", post, case = c("lower", "c
462616 paste0(pre , igOrEsc , post )
463617}
464618
465-
619+ # ' Column name for ignitions
620+ # '
621+ # ' Utility constant naming the expected ignitions column.
622+ # '
623+ # ' @format `character(1)` with value `"ignitions"`.
624+ # ' @export
466625ignitionsTxt <- " ignitions"
467626
627+ # ' Column name for escapes
628+ # '
629+ # ' Utility constant naming the expected escapes column.
630+ # '
631+ # ' @format `character(1)` with value `"escapes"`.
632+ # ' @export
468633escapesTxt <- " escapes"
469-
0 commit comments