Skip to content

Commit b786d4a

Browse files
committed
Merge branch 'master' into frollapply-throttle
2 parents 4ea30ea + 251e88d commit b786d4a

File tree

11 files changed

+1047
-65
lines changed

11 files changed

+1047
-65
lines changed

NAMESPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ S3method(rollup, data.table)
5555
export(frollmean)
5656
export(frollsum)
5757
export(frollmax)
58+
export(frollmin)
59+
export(frollprod)
5860
export(frollapply)
5961
export(frolladapt)
6062
export(nafill)

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,8 @@
246246
#9: 2025-09-22 9 8 9.0
247247
```
248248

249+
19. New rolling functions, `frollmin` and `frollprod`, have been implemented, towards [#2778](https://github.com/Rdatatable/data.table/issues/2778). Thanks to @jangorecki for implementation.
250+
249251
### BUG FIXES
250252

251253
1. `fread()` no longer warns on certain systems on R 4.5.0+ where the file owner can't be resolved, [#6918](https://github.com/Rdatatable/data.table/issues/6918). Thanks @ProfFancyPants for the report and PR.

R/froll.R

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,8 +130,6 @@ frolladapt = function(x, n, align="right", partial=FALSE, give.names=FALSE) {
130130
}
131131
if (!length(n))
132132
stopf("'n' must be non 0 length")
133-
if (anyNA(n))
134-
stopf("'n' must not have NAs")
135133
if (!identical(align, "right"))
136134
stopf("'align' other than 'right' has not yet been implemented")
137135
if (!isTRUEorFALSE(partial))
@@ -209,3 +207,9 @@ frollsum = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left
209207
frollmax = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, give.names=FALSE, hasNA) {
210208
froll(fun="max", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, has.nf=has.nf, adaptive=adaptive, partial=partial, hasNA=hasNA, give.names=give.names)
211209
}
210+
frollmin = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, give.names=FALSE, hasNA) {
211+
froll(fun="min", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, has.nf=has.nf, adaptive=adaptive, partial=partial, hasNA=hasNA, give.names=give.names)
212+
}
213+
frollprod = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, give.names=FALSE, hasNA) {
214+
froll(fun="prod", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, has.nf=has.nf, adaptive=adaptive, partial=partial, hasNA=hasNA, give.names=give.names)
215+
}

inst/tests/froll.Rraw

Lines changed: 266 additions & 5 deletions
Large diffs are not rendered by default.

man/froll.Rd

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,14 @@
77
\alias{frollmean}
88
\alias{frollsum}
99
\alias{frollmax}
10+
\alias{frollmin}
11+
\alias{frollprod}
1012
\alias{roll}
1113
\alias{rollmean}
1214
\alias{rollsum}
1315
\alias{rollmax}
16+
\alias{rollmin}
17+
\alias{rollprod}
1418
\title{Rolling functions}
1519
\description{
1620
Fast rolling functions to calculate aggregates on a sliding window. For a user-defined rolling function see \code{\link{frollapply}}. For "time-aware" (irregularly spaced time series) rolling function see \code{\link{frolladapt}}.
@@ -22,6 +26,10 @@
2226
na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, give.names=FALSE, hasNA)
2327
frollmax(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"),
2428
na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, give.names=FALSE, hasNA)
29+
frollmin(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"),
30+
na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, give.names=FALSE, hasNA)
31+
frollprod(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"),
32+
na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, give.names=FALSE, hasNA)
2533
}
2634
\arguments{
2735
\item{x}{ Integer, numeric or logical vector, coerced to numeric, on which sliding window calculates an aggregate function. It supports vectorized input, then it needs to be a \code{data.table}, \code{data.frame} or a \code{list}, in which case a rolling function is applied to each column/vector. }
@@ -73,8 +81,8 @@
7381
\item \code{has.nf=TRUE} uses non-finite aware implementation straightaway.
7482
\item \code{has.nf=FALSE} uses faster implementation that does not support non-finite values. Then depending on the rolling function it will either:
7583
\itemize{
76-
\item (\emph{mean, sum}) detect non-finite, re-run non-finite aware.
77-
\item (\emph{max}) does not detect non-finites and may silently give incorrect answer.
84+
\item (\emph{mean, sum, prod}) detect non-finite, re-run non-finite aware.
85+
\item (\emph{max, min}) does not detect non-finites and may silently give incorrect answer.
7886
}
7987
In general \code{has.nf=FALSE && any(!is.finite(x))} should be considered as undefined behavior. Therefore \code{has.nf=FALSE} should be used with care.
8088
}
@@ -84,8 +92,8 @@
8492
\itemize{
8593
\item \code{algo="fast"} uses \emph{"on-line"}, single pass, algorithm.
8694
\itemize{
87-
\item \emph{max} rolling function will not do only a single pass but, on average \code{length(x)/n}, nested loops will be computed. The bigger the window the bigger advantage over algo \emph{exact} which computes \code{length(x)} nested loops. Note that \emph{exact} uses multiple CPUs so for a small window size and many CPUs it is possible it will be actually faster than \emph{fast} but in those cases elapsed timings will likely be far below a single second.
88-
\item Not all functions have \emph{fast} implementation available. As of now \emph{max} and \code{adaptive=TRUE} does not have, therefore it will automatically fall back to \emph{exact} implementation. \code{datatable.verbose} option can be used to check that.
95+
\item \emph{max} and \emph{min} rolling function will not do only a single pass but, on average \code{length(x)/n}, nested loops will be computed. The bigger the window the bigger advantage over algo \emph{exact} which computes \code{length(x)} nested loops. Note that \emph{exact} uses multiple CPUs so for a small window size and many CPUs it is possible it will be actually faster than \emph{fast} but in those cases elapsed timings will likely be far below a single second.
96+
\item Not all functions have \emph{fast} implementation available. As of now \emph{max} and \emph{min} in case of \code{adaptive=TRUE} do not have \emph{fast} implementation, therefore it will automatically fall back to \emph{exact} implementation. \code{datatable.verbose} option can be used to check that.
8997
}
9098
\item \code{algo="exact"} will make rolling functions to use a more computationally-intensive algorithm. For each observation from input vector it will compute a function on a window from scratch (complexity \eqn{O(n^2)}).
9199
\itemize{
@@ -139,11 +147,11 @@ frollmean(d[, .(V1)], c(3, 4))
139147
frollmean(d, c(3, 4))
140148
## three calls above will use multiple cores when available
141149
142-
# frollsum
150+
# other functions
143151
frollsum(d, 3:4)
144-
145-
# frollmax
146152
frollmax(d, 3:4)
153+
frollmin(d, 3:4)
154+
frollprod(d, 3:4)
147155
148156
# partial=TRUE
149157
x = 1:6/2

src/data.table.h

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -224,25 +224,35 @@ void avoid_openmp_hang_within_fork(void);
224224
typedef enum { // adding rolling functions here and in frollfunR in frollR.c
225225
MEAN = 0,
226226
SUM = 1,
227-
MAX = 2
227+
MAX = 2,
228+
MIN = 3,
229+
PROD = 4
228230
} rollfun_t;
229231
// froll.c
230-
void frollfun(rollfun_t rfun, unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int align, double fill, bool narm, int hasnf, bool verbose);
231-
void frollmeanFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasnf, bool verbose);
232-
void frollmeanExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasnf, bool verbose);
233-
void frollsumFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasnf, bool verbose);
234-
void frollsumExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasnf, bool verbose);
235-
void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasnf, bool verbose);
236-
void frollmaxExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasnf, bool verbose);
232+
void frollfun(rollfun_t rfun, unsigned int algo, const double *x, uint64_t nx, ans_t *ans, int k, int align, double fill, bool narm, int hasnf, bool verbose);
233+
void frollmeanFast(const double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasnf, bool verbose);
234+
void frollmeanExact(const double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasnf, bool verbose);
235+
void frollsumFast(const double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasnf, bool verbose);
236+
void frollsumExact(const double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasnf, bool verbose);
237+
void frollmaxFast(const double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasnf, bool verbose);
238+
void frollmaxExact(const double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasnf, bool verbose);
239+
void frollminFast(const double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasnf, bool verbose);
240+
void frollminExact(const double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasnf, bool verbose);
241+
void frollprodFast(const double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasnf, bool verbose);
242+
void frollprodExact(const double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasnf, bool verbose);
237243

238244
// frolladaptive.c
239-
void frolladaptivefun(rollfun_t rfun, unsigned int algo, double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasnf, bool verbose);
240-
void frolladaptivemeanFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasnf, bool verbose);
241-
void frolladaptivemeanExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasnf, bool verbose);
242-
void frolladaptivesumFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasnf, bool verbose);
243-
void frolladaptivesumExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasnf, bool verbose);
244-
//void frolladaptivemaxFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasnf, bool verbose); // does not exists as of now
245-
void frolladaptivemaxExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasnf, bool verbose);
245+
void frolladaptivefun(rollfun_t rfun, unsigned int algo, const double *x, uint64_t nx, ans_t *ans, const int *k, double fill, bool narm, int hasnf, bool verbose);
246+
void frolladaptivemeanFast(const double *x, uint64_t nx, ans_t *ans, const int *k, double fill, bool narm, int hasnf, bool verbose);
247+
void frolladaptivemeanExact(const double *x, uint64_t nx, ans_t *ans, const int *k, double fill, bool narm, int hasnf, bool verbose);
248+
void frolladaptivesumFast(const double *x, uint64_t nx, ans_t *ans, const int *k, double fill, bool narm, int hasnf, bool verbose);
249+
void frolladaptivesumExact(const double *x, uint64_t nx, ans_t *ans, const int *k, double fill, bool narm, int hasnf, bool verbose);
250+
//void frolladaptivemaxFast(const double *x, uint64_t nx, ans_t *ans, const int *k, double fill, bool narm, int hasnf, bool verbose); // does not exists as of now
251+
void frolladaptivemaxExact(const double *x, uint64_t nx, ans_t *ans, const int *k, double fill, bool narm, int hasnf, bool verbose);
252+
//void frolladaptiveminFast(const double *x, uint64_t nx, ans_t *ans, const int *k, double fill, bool narm, int hasnf, bool verbose); // does not exists as of now
253+
void frolladaptiveminExact(const double *x, uint64_t nx, ans_t *ans, const int *k, double fill, bool narm, int hasnf, bool verbose);
254+
void frolladaptiveprodFast(const double *x, uint64_t nx, ans_t *ans, const int *k, double fill, bool narm, int hasnf, bool verbose);
255+
void frolladaptiveprodExact(const double *x, uint64_t nx, ans_t *ans, const int *k, double fill, bool narm, int hasnf, bool verbose);
246256

247257
// frollR.c
248258
SEXP frollfunR(SEXP fun, SEXP xobj, SEXP kobj, SEXP fill, SEXP algo, SEXP align, SEXP narm, SEXP hasnf, SEXP adaptive);

src/fread.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,7 @@ double wallclock(void)
418418
* 2.206MiB (2313045 bytes)
419419
* 38.69KiB (39615 bytes)
420420
* 214 bytes
421+
* 1 byte
421422
* 0 bytes
422423
* The function returns a pointer to a static string buffer, so the caller
423424
* should not attempt to deallocate the buffer, or call this function from

0 commit comments

Comments
 (0)