Skip to content

Commit 5975ffd

Browse files
authored
new rolling functions: min and prod (#7299)
1 parent f1d5c27 commit 5975ffd

File tree

9 files changed

+995
-15
lines changed

9 files changed

+995
-15
lines changed

NAMESPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ S3method(rollup, data.table)
5555
export(frollmean)
5656
export(frollsum)
5757
export(frollmax)
58+
export(frollmin)
59+
export(frollprod)
5860
export(frollapply)
5961
export(frolladapt)
6062
export(nafill)

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,8 @@
246246
#9: 2025-09-22 9 8 9.0
247247
```
248248

249+
19. New rolling functions, `frollmin` and `frollprod`, have been implemented, towards [#2778](https://github.com/Rdatatable/data.table/issues/2778). Thanks to @jangorecki for implementation.
250+
249251
### BUG FIXES
250252

251253
1. `fread()` no longer warns on certain systems on R 4.5.0+ where the file owner can't be resolved, [#6918](https://github.com/Rdatatable/data.table/issues/6918). Thanks @ProfFancyPants for the report and PR.

R/froll.R

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,3 +209,9 @@ frollsum = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left
209209
frollmax = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, give.names=FALSE, hasNA) {
210210
froll(fun="max", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, has.nf=has.nf, adaptive=adaptive, partial=partial, hasNA=hasNA, give.names=give.names)
211211
}
212+
frollmin = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, give.names=FALSE, hasNA) {
213+
froll(fun="min", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, has.nf=has.nf, adaptive=adaptive, partial=partial, hasNA=hasNA, give.names=give.names)
214+
}
215+
frollprod = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, give.names=FALSE, hasNA) {
216+
froll(fun="prod", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, has.nf=has.nf, adaptive=adaptive, partial=partial, hasNA=hasNA, give.names=give.names)
217+
}

inst/tests/froll.Rraw

Lines changed: 266 additions & 5 deletions
Large diffs are not rendered by default.

man/froll.Rd

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,14 @@
77
\alias{frollmean}
88
\alias{frollsum}
99
\alias{frollmax}
10+
\alias{frollmin}
11+
\alias{frollprod}
1012
\alias{roll}
1113
\alias{rollmean}
1214
\alias{rollsum}
1315
\alias{rollmax}
16+
\alias{rollmin}
17+
\alias{rollprod}
1418
\title{Rolling functions}
1519
\description{
1620
Fast rolling functions to calculate aggregates on a sliding window. For a user-defined rolling function see \code{\link{frollapply}}. For "time-aware" (irregularly spaced time series) rolling function see \code{\link{frolladapt}}.
@@ -22,6 +26,10 @@
2226
na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, give.names=FALSE, hasNA)
2327
frollmax(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"),
2428
na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, give.names=FALSE, hasNA)
29+
frollmin(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"),
30+
na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, give.names=FALSE, hasNA)
31+
frollprod(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"),
32+
na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, give.names=FALSE, hasNA)
2533
}
2634
\arguments{
2735
\item{x}{ Integer, numeric or logical vector, coerced to numeric, on which sliding window calculates an aggregate function. It supports vectorized input, then it needs to be a \code{data.table}, \code{data.frame} or a \code{list}, in which case a rolling function is applied to each column/vector. }
@@ -73,8 +81,8 @@
7381
\item \code{has.nf=TRUE} uses non-finite aware implementation straightaway.
7482
\item \code{has.nf=FALSE} uses faster implementation that does not support non-finite values. Then depending on the rolling function it will either:
7583
\itemize{
76-
\item (\emph{mean, sum}) detect non-finite, re-run non-finite aware.
77-
\item (\emph{max}) does not detect non-finites and may silently give incorrect answer.
84+
\item (\emph{mean, sum, prod}) detect non-finite, re-run non-finite aware.
85+
\item (\emph{max, min}) does not detect non-finites and may silently give incorrect answer.
7886
}
7987
In general \code{has.nf=FALSE && any(!is.finite(x))} should be considered as undefined behavior. Therefore \code{has.nf=FALSE} should be used with care.
8088
}
@@ -84,8 +92,8 @@
8492
\itemize{
8593
\item \code{algo="fast"} uses \emph{"on-line"}, single pass, algorithm.
8694
\itemize{
87-
\item \emph{max} rolling function will not do only a single pass but, on average \code{length(x)/n}, nested loops will be computed. The bigger the window the bigger advantage over algo \emph{exact} which computes \code{length(x)} nested loops. Note that \emph{exact} uses multiple CPUs so for a small window size and many CPUs it is possible it will be actually faster than \emph{fast} but in those cases elapsed timings will likely be far below a single second.
88-
\item Not all functions have \emph{fast} implementation available. As of now \emph{max} and \code{adaptive=TRUE} does not have, therefore it will automatically fall back to \emph{exact} implementation. \code{datatable.verbose} option can be used to check that.
95+
\item \emph{max} and \emph{min} rolling function will not do only a single pass but, on average \code{length(x)/n}, nested loops will be computed. The bigger the window the bigger advantage over algo \emph{exact} which computes \code{length(x)} nested loops. Note that \emph{exact} uses multiple CPUs so for a small window size and many CPUs it is possible it will be actually faster than \emph{fast} but in those cases elapsed timings will likely be far below a single second.
96+
\item Not all functions have \emph{fast} implementation available. As of now \emph{max} and \emph{min} in case of \code{adaptive=TRUE} do not have \emph{fast} implementation, therefore it will automatically fall back to \emph{exact} implementation. \code{datatable.verbose} option can be used to check that.
8997
}
9098
\item \code{algo="exact"} will make rolling functions to use a more computationally-intensive algorithm. For each observation from input vector it will compute a function on a window from scratch (complexity \eqn{O(n^2)}).
9199
\itemize{
@@ -139,11 +147,11 @@ frollmean(d[, .(V1)], c(3, 4))
139147
frollmean(d, c(3, 4))
140148
## three calls above will use multiple cores when available
141149
142-
# frollsum
150+
# other functions
143151
frollsum(d, 3:4)
144-
145-
# frollmax
146152
frollmax(d, 3:4)
153+
frollmin(d, 3:4)
154+
frollprod(d, 3:4)
147155
148156
# partial=TRUE
149157
x = 1:6/2

src/data.table.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,9 @@ void avoid_openmp_hang_within_fork(void);
224224
typedef enum { // adding rolling functions here and in frollfunR in frollR.c
225225
MEAN = 0,
226226
SUM = 1,
227-
MAX = 2
227+
MAX = 2,
228+
MIN = 3,
229+
PROD = 4
228230
} rollfun_t;
229231
// froll.c
230232
void frollfun(rollfun_t rfun, unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int align, double fill, bool narm, int hasnf, bool verbose);
@@ -234,6 +236,10 @@ void frollsumFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool n
234236
void frollsumExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasnf, bool verbose);
235237
void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasnf, bool verbose);
236238
void frollmaxExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasnf, bool verbose);
239+
void frollminFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasnf, bool verbose);
240+
void frollminExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasnf, bool verbose);
241+
void frollprodFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasnf, bool verbose);
242+
void frollprodExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasnf, bool verbose);
237243

238244
// frolladaptive.c
239245
void frolladaptivefun(rollfun_t rfun, unsigned int algo, double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasnf, bool verbose);
@@ -243,6 +249,10 @@ void frolladaptivesumFast(double *x, uint64_t nx, ans_t *ans, int *k, double fil
243249
void frolladaptivesumExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasnf, bool verbose);
244250
//void frolladaptivemaxFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasnf, bool verbose); // does not exists as of now
245251
void frolladaptivemaxExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasnf, bool verbose);
252+
//void frolladaptiveminFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasnf, bool verbose); // does not exists as of now
253+
void frolladaptiveminExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasnf, bool verbose);
254+
void frolladaptiveprodFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasnf, bool verbose);
255+
void frolladaptiveprodExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasnf, bool verbose);
246256

247257
// frollR.c
248258
SEXP frollfunR(SEXP fun, SEXP xobj, SEXP kobj, SEXP fill, SEXP algo, SEXP align, SEXP narm, SEXP hasnf, SEXP adaptive);

0 commit comments

Comments
 (0)