Rdatatable
diff --git a/‎NEWS.md‎
Lines changed: 59 additions & 0 deletions b/‎NEWS.md‎
Lines changed: 59 additions & 0 deletions
diff --git a/‎R/froll.R‎
Lines changed: 83 additions & 50 deletions b/‎R/froll.R‎
Lines changed: 83 additions & 50 deletions
@@ -8,6 +8,17 @@
 
 1. Rolling functions `frollmean` and `frollsum` used to treat `Inf` and `-Inf` as `NA` when using default `algo="fast"`. It has been changed now and infinite values are not treated as `NA` anymore. If your input into those functions has `Inf` or `-Inf` then you will be affected by this change.
 
+2. `frollapply` result is not coerced to numeric anymore. Users code could possibly break if it depends on forced coercion of input/output to numeric type.
+```r
+## before
+frollapply(c(F,T,F,F,F,T), 2, any)
+#[1] NA  1  1  0  0  1
+
+## 1.18.0
+frollapply(c(F,T,F,F,F,T), 2, any)
+#[1]    NA  TRUE  TRUE FALSE FALSE  TRUE
+```
+
 ### NEW FEATURES
 
 1. New `sort_by()` method for data.tables, [#6662](https://github.com/Rdatatable/data.table/issues/6662). It uses `forder()` to improve upon the data.frame method and also match `DT[order(...)]` behavior with respect to locale. Thanks @rikivillalba for the suggestion and PR.
@@ -56,6 +67,54 @@ microbenchmark::microbenchmark(
 # frapply(x)  713.23108  742.34657  865.2524  848.31641  965.3599 1114.0531    10
 ```
 
+6. Function `frollapply` has been completely rewritten. Be sure to read `frollapply` manual before using the function. There are following changes:
+
+- all basic types are now supported on input/output, not only double. Users code could possibly break if it depends on forced coercion of input/output to double type.
+- new argument `by.column` allowing to pass a multi-column subset of a data.table into a rolling function, closes [#4887](https://github.com/Rdatatable/data.table/issues/4887).
+```r
+x = as.data.table(iris)
+flow = function(x) {
+  v1 = x[[1L]]
+  v2 = x[[2L]]
+  (v1[2L] - v1[1L] * (1+v2[2L])) / v1[1L]
+}
+x[, "flow" := frollapply(.(Sepal.Length, Sepal.Width), 2, flow, by.column=FALSE),
+  by = Species][]
+#     Sepal.Length Sepal.Width Petal.Length Petal.Width   Species      flow
+#            <num>       <num>        <num>       <num>    <fctr>     <num>
+#  1:          5.1         3.5          1.4         0.2    setosa        NA
+#  2:          4.9         3.0          1.4         0.2    setosa -3.039216
+#  3:          4.7         3.2          1.3         0.2    setosa -3.240816
+#  4:          4.6         3.1          1.5         0.2    setosa -3.121277
+#  5:          5.0         3.6          1.4         0.2    setosa -3.513043
+# ---
+#146:          6.7         3.0          5.2         2.3 virginica -3.000000
+#147:          6.3         2.5          5.0         1.9 virginica -2.559701
+#148:          6.5         3.0          5.2         2.0 virginica -2.968254
+#149:          6.2         3.4          5.4         2.3 virginica -3.446154
+#150:          5.9         3.0          5.1         1.8 virginica -3.048387
+```
+
+- uses multiple CPU threads; evaluation of UDF is inherently slow so this can be a great help.
+```r
+x = rnorm(1e5)
+n = 500
+setDTthreads(1)
+system.time(
+  th1 <- frollapply(x, n, median, simplify=unlist)
+)
+#   user  system elapsed
+#  4.106   0.008   4.115
+setDTthreads(4)
+system.time(
+  th4 <- frollapply(x, n, median, simplify=unlist)
+)
+#   user  system elapsed
+#  5.778   0.140   1.498
+all.equal(th1, th4)
+#[1] TRUE
+```
+
 ### BUG FIXES
 
 1. Custom binary operators from the `lubridate` package now work with objects of class `IDate` as with a `Date` subclass, [#6839](https://github.com/Rdatatable/data.table/issues/6839). Thanks @emallickhossain for the report and @aitap for the fix.
 
@@ -2,9 +2,9 @@
 trimn = function(n, len, align) {
   n = min(n, len) ## so frollsum(1:2, 3, partial=TRUE) works
   if (align=="right")
-    c(seq.int(n), rep.int(n, len-n))
+    c(seq_len(n), rep.int(n, len-n))
   else
-    c(rep.int(n, len-n), rev(seq.int(n)))
+    c(rep.int(n, len-n), rev(seq_len(n)))
 }
 trimnadaptive = function(n, align) {
   if (align=="right")
@@ -25,43 +25,102 @@ trimnadaptive = function(n, align) {
 # frollsum(list(1:4, 2:5), 2:3, partial=FALSE, adaptive=FALSE)
 # frollsum(list(1:4, 2:5), 2:3, partial=TRUE, adaptive=FALSE)
 partial2adaptive = function(x, n, align, adaptive) {
+  ## do not quote argument x and n arg names because frollapply has them in uppercase
   if (align=="center")
     stopf("'partial' cannot be used together with align='center'")
-  if (is.list(x) && length(unique(lengths(x)))!=1L)
-    stopf("'partial' does not support variable length of columns in 'x'")
-  len = if (is.list(x)) length(x[[1L]]) else length(x)
+  if (is.list(x)) {
+    if (!is.data.frame(x) && !equal.lengths(x)) ## froll
+      stopf("'partial' does not support variable length of columns in x")
+    else if (all.data.frame(x) && !equal.nrows(x)) ## frollapply by.column=F, single DT already wrapped into list
+      stopf("'partial' does not support variable nrow of data.tables in x")
+  }
+  len = if (is.list(x)) {
+    if (is.data.frame(x[[1L]])) ## frollapply by.column
+      nrow(x[[1L]])
+    else ## froll, this will work for both x list and x dt on input
+      length(x[[1L]])
+  } else length(x)
   verbose = getOption("datatable.verbose")
   if (!adaptive) {
     if (is.list(n))
       stopf("n must be an integer, list is accepted for adaptive TRUE")
     if (!is.numeric(n))
       stopf("n must be an integer vector or a list of integer vectors")
     if (verbose)
-      catf("partial2adaptive: froll partial=TRUE trimming 'n' and redirecting to adaptive=TRUE\n")
-    if (length(n)>1L) {
+      catf("partial2adaptive: froll partial=TRUE trimming n and redirecting to adaptive=TRUE\n")
+    if (length(n) > 1L) {
       lapply(n, len, align, FUN=trimn)
     } else {
       trimn(n, len, align)
     }
   } else {
     if (!(is.numeric(n) || (is.list(n) && all(vapply_1b(n, is.numeric)))))
       stopf("n must be an integer vector or a list of integer vectors")
+    if (is.list(n) && length(unique(lengths(n))) != 1L)
+      stopf("adaptive windows provided in n must not to have different lengths")
+    if ((is.list(n) && length(n[[1L]]) != len) || (is.numeric(n) && length(n) != len))
+      stopf("length of vectors in x must match to length of adaptive window in n")
+    if (verbose)
+      catf("partial2adaptive: froll adaptive=TRUE and partial=TRUE trimming n\n")
     if (!is.list(n))
       n = list(n)
-    if (length(unique(lengths(n))) != 1L)
-      stopf("adaptive window provided in 'n' must not to have different lengths")
-    if (length(n[[1L]]) != len)
-      stopf("length of vectors in 'x' must match to length of adaptive window in 'n'")
-    if (verbose)
-      catf("partial2adaptive: froll adaptive=TRUE and partial=TRUE trimming 'n'\n")
     lapply(n, align, FUN=trimnadaptive)
   }
 }
 
-froll = function(fun, x, n, fill=NA, algo, align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, FUN, rho, give.names=FALSE) {
+make.roll.names = function(x.len, n.len, n, x.nm, n.nm, fun, adaptive) {
+  if (is.null(n.nm)) {
+    if (!adaptive) {
+      if (!is.numeric(n))
+        stopf("internal error: misuse of make.names, n must be numeric for !adaptive") ## nocov
+      n.nm = paste0("roll", fun, as.character(as.integer(n)))
+    } else {
+      n.nm = paste0("aroll", fun, seq_len(n.len))
+    }
+  } else if (!length(n.nm) && !adaptive)
+    stopf("internal error: misuse of make.names, non-null length 0 n is not possible for !adaptive") ## nocov
+  if (is.null(x.nm)) {
+    x.nm = paste0("V", seq_len(x.len))
+  }
+  ans = if (length(x.nm)) { ## is.list(x) && !is.data.frame(x)
+    if (length(n.nm)) { ## !adaptive || is.list(n)
+      paste(rep(x.nm, each=length(n.nm)), n.nm, sep="_")
+    } else { ## adaptive && is.numeric(n)
+      x.nm
+    }
+  } else { ## (by.column && is.atomic(x)) || (!by.column && is.data.frame(x))
+    if (length(n.nm)) { ## !adaptive || is.list(n)
+      n.nm
+    } else { ## adaptive && is.numeric(n)
+      NULL
+    }
+  }
+  if (!is.null(ans) && length(ans) != x.len*n.len)
+    stopf("internal error: make.names generated names of wrong length") ## nocov
+  ans
+}
+
+froll = function(fun, x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, give.names=FALSE, hasNA) {
+  stopifnot(!missing(fun), is.character(fun), length(fun)==1L, !is.na(fun))
+  if (!missing(hasNA)) {
+    if (!is.na(has.nf))
+      stopf("hasNA is deprecated, use has.nf instead")
+    warning("hasNA is deprecated, use has.nf instead")
+    has.nf = hasNA
+  } # remove check on next major release
+  algo = match.arg(algo)
   align = match.arg(align)
-  if (isTRUE(give.names))
+  if (isTRUE(give.names)) {
     orig = list(n=n, adaptive=adaptive)
+    xnam = if (is.list(x)) names(x) else character()
+    nnam = if (isTRUE(adaptive)) {
+      if (is.list(n)) names(n) else character()
+    } else names(n)
+    nx = if (is.list(x)) length(x) else 1L
+    nn = if (isTRUE(adaptive)) {
+      if (is.list(n)) length(n) else 1L
+    } else length(n)
+  }
   if (isTRUE(partial)) {
     if (!length(n))
       stopf("n must be non 0 length")
@@ -78,51 +137,25 @@ froll = function(fun, x, n, fill=NA, algo, align=c("right","left","center"), na.
     n = rev2(n)
     align = "right"
   } ## support for left adaptive added in #5441
-  if (missing(FUN))
-    ans = .Call(CfrollfunR, fun, x, n, fill, algo, align, na.rm, has.nf, adaptive)
-  else
-    ans = .Call(CfrollapplyR, FUN, x, n, fill, align, adaptive, rho)
+  ans = .Call(CfrollfunR, fun, x, n, fill, algo, align, na.rm, has.nf, adaptive)
   if (leftadaptive) {
     if (verbose)
       catf("froll: adaptive=TRUE && align='left' post-processing from align='right'\n")
     ans = rev2(ans)
   }
   if (isTRUE(give.names) && is.list(ans)) {
-    n = orig$n
-    adaptive = orig$adaptive
-    nx = names(x)
-    nn = names(n)
-    if (is.null(nx)) nx = paste0("V", if (is.atomic(x)) 1L else seq_along(x))
-    if (is.null(nn)) nn = if (adaptive) paste0("N", if (is.atomic(n)) 1L else seq_along(n)) else paste("roll", as.character(n), sep="_")
-    setattr(ans, "names",  paste(rep(nx, each=length(nn)), nn, sep="_"))
+    nms = make.roll.names(x.len=nx, n.len=nn, n=orig$n, x.nm=xnam, n.nm=nnam, fun=fun, adaptive=orig$adaptive)
+    setattr(ans, "names", nms)
   }
   ans
 }
 
-frollfun = function(fun, x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, hasNA, give.names=FALSE) {
-  stopifnot(!missing(fun), is.character(fun), length(fun)==1L, !is.na(fun))
-  if (!missing(hasNA)) {
-    if (!is.na(has.nf))
-      stopf("hasNA is deprecated, use has.nf instead")
-    warningf("hasNA is deprecated, use has.nf instead")
-    has.nf = hasNA
-  } # remove check on next major release
-  algo = match.arg(algo)
-  froll(fun=fun, x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, has.nf=has.nf, adaptive=adaptive, partial=partial, give.names=give.names)
-}
-
-frollmean = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, hasNA, give.names=FALSE) {
-  frollfun(fun="mean", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, has.nf=has.nf, adaptive=adaptive, partial=partial, hasNA=hasNA, give.names=give.names)
+frollmean = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, give.names=FALSE, hasNA) {
+  froll(fun="mean", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, has.nf=has.nf, adaptive=adaptive, partial=partial, hasNA=hasNA, give.names=give.names)
 }
-frollsum = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, hasNA, give.names=FALSE) {
-  frollfun(fun="sum", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, has.nf=has.nf, adaptive=adaptive, partial=partial, hasNA=hasNA, give.names=give.names)
+frollsum = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, give.names=FALSE, hasNA) {
+  froll(fun="sum", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, has.nf=has.nf, adaptive=adaptive, partial=partial, hasNA=hasNA, give.names=give.names)
 }
-frollmax = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, hasNA, give.names=FALSE) {
-  frollfun(fun="max", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, has.nf=has.nf, adaptive=adaptive, partial=partial, hasNA=hasNA, give.names=give.names)
-}
-
-frollapply = function(x, n, FUN, ..., fill=NA, align=c("right","left","center"), adaptive=FALSE, partial=FALSE, give.names=FALSE) {
-  FUN = match.fun(FUN)
-  rho = new.env()
-  froll(FUN=FUN, rho=rho, x=x, n=n, fill=fill, align=align, adaptive=adaptive, partial=partial, give.names=give.names)
+frollmax = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, give.names=FALSE, hasNA) {
+  froll(fun="max", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, has.nf=has.nf, adaptive=adaptive, partial=partial, hasNA=hasNA, give.names=give.names)
 }