Skip to content

Commit f40aad4

Browse files
committed
Merge branch 'frollmax5' into froll2025max5
2 parents a559201 + 4ce502c commit f40aad4

File tree

7 files changed

+369
-229
lines changed

7 files changed

+369
-229
lines changed

NEWS.md

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -792,6 +792,41 @@ rowwiseDT(
792792

793793
41. `tables()` is faster by default by excluding the size of character strings in R's global cache (which may be shared) and excluding the size of list column items (which also may be shared). `mb=` now accepts any function which accepts a `data.table` and returns a higher and better estimate of its size in bytes, albeit more slowly; e.g. `mb = utils::object.size`.
794794
795+
42. Multiple improvements has been added to rolling functions. Request came from @gpierard who needed left aligned, adaptive, rolling max, [#5438](https://github.com/Rdatatable/data.table/issues/5438). There was no `frollmax` function yet. Adaptive rolling functions did not have support for `align="left"`. `frollapply` did not support `adaptive=TRUE`. Available alternatives were base R `mapply` or self-join using `max` and grouping `by=.EACHI`. As a follow up of his request, following features has been added:
796+
- new function `frollmax`, applies `max` over a rolling window.
797+
- support for `align="left"` for adaptive rolling function.
798+
- support for `adaptive=TRUE` in `frollapply`.
799+
- `partial` argument to trim window width to available observations rather than returning `NA` whenever window is not complete.
800+
801+
For a comprehensive description about all available features see `?froll` manual.
802+
803+
Adaptive `frollmax` has observed to be up to 50 times faster than second fastest solution (data.table self-join using `max` and grouping `by=.EACHI`).
804+
```r
805+
set.seed(108)
806+
setDTthreads(8)
807+
x = data.table(
808+
value = cumsum(rnorm(1e6, 0.1)),
809+
end_window = 1:1e6 + sample(50:500, 1e6, TRUE),
810+
row = 1:1e6
811+
)[, "end_window" := pmin(end_window, .N)
812+
][, "len_window" := end_window-row+1L]
813+
814+
baser = function(x) x[, mapply(function(from, to) max(value[from:to]), row, end_window)]
815+
sj = function(x) x[x, max(value), on=.(row >= row, row <= end_window), by=.EACHI]$V1
816+
frmax = function(x) x[, frollmax(value, len_window, adaptive=TRUE, align="left", hasNA=FALSE)]
817+
frapply = function(x) x[, frollapply(value, len_window, max, adaptive=TRUE, align="left")]
818+
microbenchmark::microbenchmark(
819+
baser(x), sj(x), frmax(x), frapply(x),
820+
times=10, check="identical"
821+
)
822+
#Unit: milliseconds
823+
# expr min lq mean median uq max neval
824+
# baser(x) 5472.2715 5596.11013 5763.93265 5659.06510 5935.11236 6338.0498 10
825+
# sj(x) 4664.3359 4872.40122 4978.01860 4919.15975 5061.69718 5345.3508 10
826+
# frmax(x) 70.0804 75.13598 91.35392 95.80486 99.99415 113.2648 10
827+
# frapply(x) 743.9082 833.65667 904.32891 893.75805 979.63510 1158.6030 10
828+
```
829+
795830
## BUG FIXES
796831
797832
1. `by=.EACHI` when `i` is keyed but `on=` different columns than `i`'s key could create an invalidly keyed result, [#4603](https://github.com/Rdatatable/data.table/issues/4603) [#4911](https://github.com/Rdatatable/data.table/issues/4911). Thanks to @myoung3 and @adamaltmejd for reporting, and @ColeMiller1 for the PR. An invalid key is where a `data.table` is marked as sorted by the key columns but the data is not sorted by those columns, leading to incorrect results from subsequent queries.

R/froll.R

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ partial2adaptive = function(x, n, align) {
3434
}
3535
}
3636

37-
froll = function(fun, x, n, fill=NA, algo=c("fast", "exact"), align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE, partial=FALSE) {
37+
froll = function(fun, x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE, partial=FALSE) {
3838
stopifnot(!missing(fun), is.character(fun), length(fun)==1L, !is.na(fun))
3939
algo = match.arg(algo)
4040
align = match.arg(align)
@@ -68,17 +68,17 @@ froll = function(fun, x, n, fill=NA, algo=c("fast", "exact"), align=c("right", "
6868
}
6969
}
7070

71-
frollmean = function(x, n, fill=NA, algo=c("fast", "exact"), align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE, partial=FALSE) {
71+
frollmean = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE, partial=FALSE) {
7272
froll(fun="mean", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, hasNA=hasNA, adaptive=adaptive, partial=partial)
7373
}
74-
frollsum = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE, partial=FALSE) {
74+
frollsum = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE, partial=FALSE) {
7575
froll(fun="sum", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, hasNA=hasNA, adaptive=adaptive, partial=partial)
7676
}
77-
frollmax = function(x, n, fill=NA, algo=c("fast", "exact"), align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE, partial=FALSE) {
77+
frollmax = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE, partial=FALSE) {
7878
froll(fun="max", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, hasNA=hasNA, adaptive=adaptive, partial=partial)
7979
}
8080

81-
frollapply = function(x, n, FUN, ..., fill=NA, align=c("right", "left", "center"), adaptive=FALSE, partial=FALSE) {
81+
frollapply = function(x, n, FUN, ..., fill=NA, align=c("right","left","center"), adaptive=FALSE, partial=FALSE) {
8282
FUN = match.fun(FUN)
8383
align = match.arg(align)
8484
if (isTRUE(partial)) {
@@ -91,12 +91,14 @@ frollapply = function(x, n, FUN, ..., fill=NA, align=c("right", "left", "center"
9191
n = partial2adaptive(x, n, align)
9292
adaptive = TRUE
9393
}
94+
if (isTRUE(adaptive) && base::getRversion() < "3.4.0") ## support SET_GROWABLE_BIT
95+
stopf("frollapply adaptive=TRUE requires at least R 3.4.0"); # nocov
9496
leftadaptive = isTRUE(adaptive) && align=="left"
9597
if (leftadaptive) {
9698
verbose = getOption("datatable.verbose")
9799
rev2 = function(x) if (is.list(x)) lapply(x, rev) else rev(x)
98100
if (verbose)
99-
cat("froll: adaptive=TRUE && align='left' pre-processing for align='right'\n")
101+
cat("frollapply: adaptive=TRUE && align='left' pre-processing for align='right'\n")
100102
x = rev2(x)
101103
n = rev2(n)
102104
align = "right"

0 commit comments

Comments
 (0)