Rdatatable · jangorecki · Sep 7, 2025 · May 26, 2025 · May 26, 2025 · May 27, 2025
@@ -20,6 +20,18 @@
     frollsum(c(1,2,3,Inf,5,6), 2)
     #[1]  NA   3   5 Inf Inf  11
 
+4. `frollapply` result is not coerced to numeric anymore. Users' code could possibly break if it depends on forced coercion of input/output to numeric type.
+    ```r
+    ## before
+    frollapply(c(F,T,F,F,F,T), 2, any)
+    #[1] NA  1  1  0  0  1
+
+    ## now
+    frollapply(c(F,T,F,F,F,T), 2, any)
+    #[1]    NA  TRUE  TRUE FALSE FALSE  TRUE
+    ```
+    Additionally argument names in `frollapply` has been renamed from `x` to `X` and `n` to `N` to avoid conflicts with common argument names that may be passed to `...`, aligning to base R API of `lapply`. `x` and `n` continue to work with a warning, for now.
+
 ### NOTICE OF INTENDED FUTURE POTENTIAL BREAKING CHANGES 
 
 1. `data.table(x=1, <expr>)`, where `<expr>` is an expression resulting in a 1-column matrix without column names, will eventually have names `x` and `V2`, not `x` and `V1`, consistent with `data.table(x=1, <expr>)` where `<expr>` results in an atomic vector, for example `data.table(x=1, cbind(1))` and `data.table(x=1, 1)` will both have columns named `x` and `V2`. In this release, the matrix case continues to be named `V1`, but the new behavior can be activated by setting `options(datatable.old.matrix.autoname)` to `FALSE`. See point 5 under Bug Fixes for more context; this change will provide more internal consistency as well as more consistency with `data.frame()`.
@@ -157,6 +169,47 @@
 
     As of now, adaptive rolling max has no _on-line_ implementation (`algo="fast"`), it uses a naive approach (`algo="exact"`). Therefore further speed up is still possible if `algo="fast"` gets implemented.
 
+17. Function `frollapply` has been completely rewritten. Thanks to @jangorecki for implementation. Be sure to read `frollapply` manual before using the function. There are following changes:
+    - all basic types are now supported on input/output, not only double. Users' code could possibly break if it depends on forced coercion of input/output to double type.
+    - new argument `by.column` allowing to pass a multi-column subset of a data.table into a rolling function, closes [#4887](https://github.com/Rdatatable/data.table/issues/4887).
+    ```r
+    x = data.table(v1=rnorm(120), v2=rnorm(120))
+    f = function(x) coef(lm(v2 ~ v1, data=x))
+    coef.fill = c("(Intercept)"=NA_real_, "v1"=NA_real_)
+    frollapply(x, 4, f, by.column=FALSE, fill=coef.fill)
+    #     (Intercept)         v1
+    #  1:          NA         NA
+    #  2:          NA         NA
+    #  3:          NA         NA
+    #  4:  0.65456931  0.3138012
+    #  5: -1.07977441 -2.0588094
+    #---
+    #116:  0.15828417  0.3570216
+    #117: -0.09083424  1.5494507
+    #118: -0.18345878  0.6424837
+    #119: -0.28964772  0.6116575
+    #120: -0.40598313  0.6112854
+    ```
+    - uses multiple CPU threads (on a decent OS); evaluation of UDF is inherently slow so this can be a great help.
+    ```r
+    x = rnorm(1e5)
+    n = 500
+    setDTthreads(1)
+    system.time(
+      th1 <- frollapply(x, n, median, simplify=unlist)
+    )
+    #   user  system elapsed
+    #  3.078   0.005   3.084
+    setDTthreads(4)
+    system.time(
+      th4 <- frollapply(x, n, median, simplify=unlist)
+    )
+    #   user  system elapsed
+    #  2.453   0.135   0.897
+    all.equal(th1, th4)
+    #[1] TRUE
+    ```
+
 ### BUG FIXES
 
 1. `fread()` no longer warns on certain systems on R 4.5.0+ where the file owner can't be resolved, [#6918](https://github.com/Rdatatable/data.table/issues/6918). Thanks @ProfFancyPants for the report and PR.

@@ -25,21 +25,31 @@ trimnadaptive = function(n, align) {
 # frollsum(list(1:4, 2:5), 2:3, partial=FALSE, adaptive=FALSE)
 # frollsum(list(1:4, 2:5), 2:3, partial=TRUE, adaptive=FALSE)
 partial2adaptive = function(x, n, align, adaptive) {
+  ## do not quote argument x and n arg names because frollapply has them in uppercase
   if (!length(n))
     stopf("n must be non 0 length")
   if (align=="center")
     stopf("'partial' cannot be used together with align='center'")
-  if (is.list(x) && length(unique(lengths(x))) != 1L)
-    stopf("'partial' does not support variable length of columns in 'x'")
-  len = if (is.list(x)) length(x[[1L]]) else length(x)
+  if (is.list(x)) {
+    if (!is.data.frame(x) && !equal.lengths(x)) ## froll
+      stopf("'partial' does not support variable length of columns in x")
+    else if (all_data.frame(x) && !equal.nrows(x)) ## frollapply by.column=F, single DT already wrapped into list
+      stopf("'partial' does not support variable nrow of data.tables in x")
+  }
+  len = if (is.list(x)) {
+    if (is.data.frame(x[[1L]])) ## frollapply by.column
+      nrow(x[[1L]])
+    else ## froll, this will work for both x list and x dt on input
+      length(x[[1L]])
+  } else length(x)
   verbose = getOption("datatable.verbose")
   if (!adaptive) {
     if (is.list(n))
       stopf("n must be an integer, list is accepted for adaptive TRUE")
     if (!is.numeric(n))
       stopf("n must be an integer vector or a list of integer vectors")
     if (verbose)
-      catf("partial2adaptive: froll partial=TRUE trimming 'n' and redirecting to adaptive=TRUE\n")
+      catf("partial2adaptive: froll partial=TRUE trimming n and redirecting to adaptive=TRUE\n")
     if (length(n) > 1L) {
       ## c(2,3) -> list(c(1,2,2,2),c(1,2,3,3)) ## for x=1:4
       lapply(n, len, align, FUN=trimn)
@@ -50,14 +60,14 @@ partial2adaptive = function(x, n, align, adaptive) {
   } else {
     if (!(is.numeric(n) || (is.list(n) && all(vapply_1b(n, is.numeric)))))
       stopf("n must be an integer vector or a list of integer vectors")
-    if (length(unique(lengths(n))) != 1L)
-      stopf("adaptive window provided in 'n' must not to have different lengths")
+    if (is.list(n) && length(unique(lengths(n))) != 1L)
+      stopf("adaptive windows provided in n must not to have different lengths")
     if (is.numeric(n) && length(n) != len)
-      stopf("length of 'n' argument must be equal to number of observations provided in 'x'")
+      stopf("length of n argument must be equal to number of observations provided in x")
     if (is.list(n) && length(n[[1L]]) != len)
-      stopf("length of vectors in 'x' must match to length of adaptive window in 'n'")
+      stopf("length of vectors in x must match to length of adaptive window in n")
     if (verbose)
-      catf("partial2adaptive: froll adaptive=TRUE and partial=TRUE trimming 'n'\n")
+      catf("partial2adaptive: froll adaptive=TRUE and partial=TRUE trimming n\n")
     if (is.numeric(n)) {
       ## c(3,3,3,2) -> c(1,2,3,2) ## for x=1:4
       trimnadaptive(n, align)
@@ -93,27 +103,35 @@ make.roll.names = function(x.len, n.len, n, x.nm, n.nm, fun, adaptive) {
     if (length(n.nm)) { ## !adaptive || is.list(n)
       n.nm
     } else { ## adaptive && is.numeric(n)
-      NULL # nocov ## call to make.roll.names is excluded by is.list(ans) condition before calling it, it will be relevant for !by.column in next PR
+      stopf("internal error: make.roll.names call should have been escaped in frollapply during 'unpack atomic input'") # nocov ## frollapply(data.frame(x=1:5), rep(2,5), dim, by.column=FALSE, give.names=TRUE, adaptive=TRUE)
     }
   }
   if (!is.null(ans) && length(ans) != x.len*n.len)
     stopf("internal error: make.roll.names generated names of wrong length") ## nocov
   ans
 }
 
-froll = function(fun, x, n, fill=NA, algo, align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, FUN, rho, give.names=FALSE) {
+froll = function(fun, x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, give.names=FALSE, hasNA) {
+  stopifnot(!missing(fun), is.character(fun), length(fun)==1L, !is.na(fun))
+  if (!missing(hasNA)) {
+    if (!is.na(has.nf))
+      stopf("hasNA is deprecated, use has.nf instead")
+    warningf("hasNA is deprecated, use has.nf instead")
+    has.nf = hasNA
+  } # remove check on next major release
+  algo = match.arg(algo)
   align = match.arg(align)
   if (isTRUE(give.names)) {
-     orig = list(n=n, adaptive=adaptive)
-     xnam = if (is.list(x)) names(x) else character()
-     nnam = if (isTRUE(adaptive)) {
-       if (is.list(n)) names(n) else character()
-     } else names(n)
-     nx = if (is.list(x)) length(x) else 1L
-     nn = if (isTRUE(adaptive)) {
-       if (is.list(n)) length(n) else 1L
-     } else length(n)
-   }
+    orig = list(n=n, adaptive=adaptive)
+    xnam = if (is.list(x)) names(x) else character()
+    nnam = if (isTRUE(adaptive)) {
+      if (is.list(n)) names(n) else character()
+    } else names(n)
+    nx = if (is.list(x)) length(x) else 1L
+    nn = if (isTRUE(adaptive)) {
+      if (is.list(n)) length(n) else 1L
+    } else length(n)
+  }
   if (isTRUE(partial)) {
     n = partial2adaptive(x, n, align, adaptive)
     adaptive = TRUE
@@ -128,10 +146,7 @@ froll = function(fun, x, n, fill=NA, algo, align=c("right","left","center"), na.
     n = rev2(n)
     align = "right"
   } ## support for left adaptive added in #5441
-  if (missing(FUN))
-    ans = .Call(CfrollfunR, fun, x, n, fill, algo, align, na.rm, has.nf, adaptive)
-  else
-    ans = .Call(CfrollapplyR, FUN, x, n, fill, align, adaptive, rho)
+  ans = .Call(CfrollfunR, fun, x, n, fill, algo, align, na.rm, has.nf, adaptive)
   if (leftadaptive) {
     if (verbose)
       catf("froll: adaptive=TRUE && align='left' post-processing from align='right'\n")
@@ -144,30 +159,12 @@ froll = function(fun, x, n, fill=NA, algo, align=c("right","left","center"), na.
   ans
 }
 
-frollfun = function(fun, x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, hasNA, give.names=FALSE) {
-  stopifnot(!missing(fun), is.character(fun), length(fun)==1L, !is.na(fun))
-  if (!missing(hasNA)) {
-    if (!is.na(has.nf))
-      stopf("hasNA is deprecated, use has.nf instead")
-    warningf("hasNA is deprecated, use has.nf instead")
-    has.nf = hasNA
-  } # remove check on next major release
-  algo = match.arg(algo)
-  froll(fun=fun, x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, has.nf=has.nf, adaptive=adaptive, partial=partial, give.names=give.names)
-}
-
-frollmean = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, hasNA, give.names=FALSE) {
-  frollfun(fun="mean", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, has.nf=has.nf, adaptive=adaptive, partial=partial, hasNA=hasNA, give.names=give.names)
+frollmean = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, give.names=FALSE, hasNA) {
+  froll(fun="mean", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, has.nf=has.nf, adaptive=adaptive, partial=partial, hasNA=hasNA, give.names=give.names)
 }
-frollsum = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, hasNA, give.names=FALSE) {
-  frollfun(fun="sum", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, has.nf=has.nf, adaptive=adaptive, partial=partial, hasNA=hasNA, give.names=give.names)
+frollsum = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, give.names=FALSE, hasNA) {
+  froll(fun="sum", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, has.nf=has.nf, adaptive=adaptive, partial=partial, hasNA=hasNA, give.names=give.names)
 }
-frollmax = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, hasNA, give.names=FALSE) {
-  frollfun(fun="max", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, has.nf=has.nf, adaptive=adaptive, partial=partial, hasNA=hasNA, give.names=give.names)
-}
-
-frollapply = function(x, n, FUN, ..., fill=NA, align=c("right","left","center"), adaptive=FALSE, partial=FALSE, give.names=FALSE) {
-  FUN = match.fun(FUN)
-  rho = new.env()
-  froll(FUN=FUN, rho=rho, x=x, n=n, fill=fill, align=align, adaptive=adaptive, partial=partial, give.names=give.names)
+frollmax = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, give.names=FALSE, hasNA) {
+  froll(fun="max", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, has.nf=has.nf, adaptive=adaptive, partial=partial, hasNA=hasNA, give.names=give.names)
 }