froll manual improvements for vectorized input (#7297)

jangorecki · web-flow · commit 1a93d1f7998e · 2025-09-09T10:37:18.000+02:00
diff --git a/inst/tests/froll.Rraw b/inst/tests/froll.Rraw
@@ -1473,7 +1473,7 @@ test(6010.653, y = ans, x = lapply(
 ))
 rm(X, ans, n)
 
-## simplify simplifyList
+## simplify simplifylist
 test(6010.701, frollapply(1:5, 2, sum), c(NA,3L,5L,7L,9L))
 test(6010.702, frollapply(1:5, 2, sum, simplify=unlist), c(NA,3L,5L,7L,9L))
 test(6010.703, frollapply(1:5, 2, sum, simplify=FALSE), list(NA,3L,5L,7L,9L))
diff --git a/man/froll.Rd b/man/froll.Rd
@@ -13,7 +13,7 @@
 \alias{rollmax}
 \title{Rolling functions}
 \description{
-  Fast rolling functions to calculate aggregates on sliding windows. For a user-defined rolling function see \code{\link{frollapply}}.
+  Fast rolling functions to calculate aggregates on a sliding window. For a user-defined rolling function see \code{\link{frollapply}}. For "time-aware" (irregularly spaced time series) rolling function see \code{\link{frolladapt}}.
 }
 \usage{
   frollmean(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"),
@@ -24,12 +24,10 @@
     na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, give.names=FALSE, hasNA)
 }
 \arguments{
-  \item{x}{ Vector, \code{data.frame} or \code{data.table} of integer, numeric or logical columns over which to calculate the windowed aggregations. May also be a list, in which case the rolling function is applied to each of its elements. }
-  \item{n}{ Integer, non-negative, vector giving rolling window size(s). This is the \emph{total} number of included values in aggregate function. Adaptive rolling functions also accept a list of integer vectors when applying multiple window sizes. }
+  \item{x}{ Integer, numeric or logical vector, coerced to numeric, on which sliding window calculates an aggregate function. It supports vectorized input, then it needs to be a \code{data.table}, \code{data.frame} or a \code{list}, in which case a rolling function is applied to each column/vector. }
+  \item{n}{ Integer, non-negative, rolling window size. This is the \emph{total} number of included values in aggregate function. In case of an adaptive rolling function window size has to be provided as a vector for each indivdual value of \code{x}. It supports vectorized input, then it needs to be a vector, or in case of an adaptive rolling a \code{list} of vectors. }
   \item{fill}{ Numeric; value to pad by. Defaults to \code{NA}. }
-  \item{algo}{ Character, default \code{"fast"}. When set to \code{"exact"}, a slower (but more accurate) algorithm is used. It
-    suffers less from floating point rounding errors by performing an extra pass, and carefully handles all non-finite values.
-    It will use multiple cores where available. See Details for more information. }
+  \item{algo}{ Character, default \code{"fast"}. When set to \code{"exact"}, a slower (but more accurate) algorithm is used. It suffers less from floating point rounding errors by performing an extra pass, and carefully handles all non-finite values. It will use multiple cores where available. See Details for more information. }
   \item{align}{ Character, specifying the "alignment" of the rolling window, defaulting to \code{"right"}. \code{"right"} covers preceding rows (the window \emph{ends} on the current value); \code{"left"} covers following rows (the window \emph{starts} on the current value); \code{"center"} is halfway in between (the window is \emph{centered} on the current value, biased towards \code{"left"} when \code{n} is even). }
   \item{na.rm}{ Logical, default \code{FALSE}. Should missing values be removed when calculating window? }
   \item{has.nf}{ Logical. If it is known whether \code{x} contains non-finite values (\code{NA}, \code{NaN}, \code{Inf}, \code{-Inf}), then setting this to \code{TRUE} or \code{FALSE} may speed up computation. Defaults to \code{NA}. See \emph{has.nf argument} section below for details. }
@@ -58,17 +56,6 @@
   are run in parallel. The exception is for \code{algo="exact"}, which runs in
   parallel already.
 
-  \code{frollapply} computes rolling aggregate on arbitrary R functions.
-  The input \code{x} (first argument) to the function \code{FUN}
-  is coerced to \emph{numeric} beforehand and \code{FUN}
-  has to return a scalar \emph{numeric} value. Checks for that are made only
-  during the first iteration when \code{FUN} is evaluated. Edge cases can be
-  found in examples below. Any R function is supported, but it is not optimized
-  using our own C implementation -- hence, for example, using \code{frollapply}
-  to compute a rolling average is inefficient. It is also always single-threaded
-  because there is no thread-safe API to R's C \code{eval}. Nevertheless we've
-  seen the computation speed up vis-a-vis versions implemented in base R.
-
   Setting \code{options(datatable.verbose=TRUE)} will display various
   information about how rolling function processed. It will not print
   information in real-time but only at the end of the processing.
diff --git a/man/frolladapt.Rd b/man/frolladapt.Rd
@@ -2,14 +2,14 @@
 \alias{frolladapt}
 \title{Adapt rolling window to irregularly spaced time series}
 \description{
-  Helper function to generate adaptive window size based on the irregularly spaced time series index. Experimental. Generated adaptive window can be then used in rolling functions. See \code{\link{froll}} and \code{\link{frollapply}} for details.
+  Helper function to generate adaptive window size based on the irregularly spaced time series index, to be passed as \code{n} argument to adaptive \code{\link{froll}} function (or \code{N} argument to adaptive \code{\link{frollapply}}). Experimental.
 }
 \usage{
   frolladapt(x, n, align="right", partial=FALSE, give.names=FALSE)
 }
 \arguments{
-  \item{x}{ Integer. Must be sorted with no duplicates or missing values. Other objects with numeric storage (including most commonly \code{Date} and \code{POSIXct}) will be coerced to integer, which, for example, in case of \code{POSIXct} means truncating to whole seconds. }
-  \item{n}{ Integer vector giving rolling positive window size(s). Up to \code{n} values nearest to each value of \code{x}, with distance in the units of \code{x} and according to the window implied by \code{align}, are included in each rolling aggregation window. Thus when \code{x} is a \code{POSIXct}, \code{n} are seconds, and when \code{x} is a \code{Date}, \code{n} are days. }
+  \item{x}{ Integer. Must be sorted with no duplicates or missing values. Other objects with numeric storage (including most commonly \code{Date} and \code{POSIXct}) will be coerced to integer, which, for example, in case of \code{POSIXct} means truncating to whole seconds. It does not support vectorized input. }
+  \item{n}{ Integer, positive, rolling window size. Up to \code{n} values nearest to each value of \code{x}, with distance in the units of \code{x} and according to the window implied by \code{align}, are included in each rolling aggregation window. Thus when \code{x} is a \code{POSIXct}, \code{n} are seconds, and when \code{x} is a \code{Date}, \code{n} are days. It supports vectorized input, then it needs to be a vector. }
   \item{align}{ Character, default \code{"right"}. Other alignments have not yet been implemented. }
   \item{partial}{ Logical, default \code{FALSE}. Should the rolling window size(s) provided in \code{n} be trimmed to available observations. For details see \code{\link{froll}}. }
   \item{give.names}{ Logical, default \code{FALSE}. When \code{TRUE}, names are automatically generated corresponding to names of \code{n}. If answer is an integer vector, then the argument is ignored, see examples. }
diff --git a/man/frollapply.Rd b/man/frollapply.Rd
@@ -3,17 +3,17 @@
 \alias{rollapply}
 \title{Rolling user-defined function}
 \description{
-  Fast rolling user-defined function (\emph{UDF}) to calculate on sliding window. Experimental. Please read, at least, \emph{caveats} section below.
+  Fast rolling user-defined function (\emph{UDF}) to calculate on a sliding window. Experimental. Please read, at least, \emph{caveats} section below. For "time-aware" (irregularly spaced time series) rolling function see \code{\link{frolladapt}}.
 }
 \usage{
   frollapply(X, N, FUN, \dots, by.column=TRUE, fill=NA,
     align=c("right","left","center"), adaptive=FALSE, partial=FALSE,
     give.names=FALSE, simplify=TRUE, x, n)
 }
 \arguments{
-  \item{X}{ Atomic vector, \code{data.frame}, \code{data.table} or \code{list}. When \code{by.column=TRUE} (default) then a non-atomic \code{X} is processed as \emph{vectorized} input, so rolling function is calculated for each column/vector (non-atomic columns/vectors are not supported). When \code{by.column=FALSE} then \code{X} expects to be a data.frame, data.table or a list of equal length vectors (non-atomic columns/vectors are not supported), so rolling function is calculated for \code{X} as data.frame/data.table/list rather than atomic vector. It supports \emph{vectorized} input as well, passing list of data.frames/data.tables, but not list of lists. }
-  \item{N}{ Integer, non-negative, vector giving rolling window size(s). This is the \emph{total} number of included values in aggregate function. Adaptive rolling functions also accept a list of integer vectors when applying multiple window sizes, see \code{adaptive} argument description for details. In both \code{adaptive} cases \code{N} may also be a list, supporting \emph{vectorized} input, then rolling function is calculated for each element of the list. }
-  \item{FUN}{ The function to be applied on a subsets of \code{X}. }
+  \item{X}{ Atomic vector, \code{data.frame}, \code{data.table} or a \code{list} on which sliding window calculates \code{FUN} function. How the \code{X} is handled depends on the \code{by.column} argument. It supports vectorized input, for \code{by.column=TRUE} it needs to be a \code{data.table}, \code{data.frame} or a \code{list}, and for \code{by.column=FALSE} list of data.frames/data.tables, but not list of lists. }
+  \item{N}{ Integer, non-negative, rolling window size. This is the \emph{total} number of included values in aggregate function. In case of an adaptive rolling function window size has to be provided as a vector for each indivdual value of \code{X}. It supports vectorized input, then it needs to be a vector, or in case of an adaptive rolling a \code{list} of vectors. }
+  \item{FUN}{ The function to be applied on subsets of \code{X}. }
   \item{\dots}{ Extra arguments passed to \code{FUN}. Note that arguments passed to \dots cannot have same names as arguments of \code{frollapply}. }
   \item{by.column}{ Logical. When \code{TRUE} (default) then \code{X} of types list/data.frame/data.table is treated as vectorized input rather an object to apply rolling window on. Setting to \code{FALSE} allows rolling window to be applied on multiple variables, using data.frame, data.table or a list, as a whole. For details see \emph{\code{by.column} argument} section below. }
   \item{fill}{ An object; value to pad by. Defaults to \code{NA}. When \code{partial=TRUE} this argument is ignored. }