Skip to content

Commit 477e6ea

Browse files
authored
adaptive and partial added to frollapply()
1 parent 5a44552 commit 477e6ea

File tree

7 files changed

+227
-52
lines changed

7 files changed

+227
-52
lines changed

NEWS.md

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,10 @@
1212

1313
4. `as.Date()` method for `IDate` no longer coerces to `double` [#6922](https://github.com/Rdatatable/data.table/issues/6922). Thanks @MichaelChirico for the report and PR. The only effect should be on overly-strict tests that assert `Date` objects have `double` storage, which is not in general true, especially from R 4.5.0.
1414

15-
5. Multiple improvements has been added to rolling functions. Request came from @gpierard who needed left aligned, adaptive, rolling max, [#5438](https://github.com/Rdatatable/data.table/issues/5438). There was no `frollmax` function yet. Adaptive rolling functions did not have support for `align="left"`. `frollapply` did not support `adaptive=TRUE`. Available alternatives were base R `mapply` or self-join using `max` and grouping `by=.EACHI`. As a follow up of his request, following features has been or will be added:
15+
5. Multiple improvements has been added to rolling functions. Request came from @gpierard who needed left aligned, adaptive, rolling max, [#5438](https://github.com/Rdatatable/data.table/issues/5438). There was no `frollmax` function yet. Adaptive rolling functions did not have support for `align="left"`. `frollapply` did not support `adaptive=TRUE`. Available alternatives were base R `mapply` or self-join using `max` and grouping `by=.EACHI`. As a follow up of his request, following features has been added:
1616
- new function `frollmax`, applies `max` over a rolling window.
1717
- support for `align="left"` for adaptive rolling function.
1818
- support for `adaptive=TRUE` in `frollapply`.
19-
- better support for non-double data types in `frollapply`.
20-
- better support for `Inf` and `-Inf` support in `algo="fast"` implementation.
2119
- `partial` argument to trim window width to available observations rather than returning `NA` whenever window is not complete.
2220

2321
For a comprehensive description about all available features see `?froll` manual.
@@ -36,15 +34,17 @@ x = data.table(
3634
baser = function(x) x[, mapply(function(from, to) max(value[from:to]), row, end_window)]
3735
sj = function(x) x[x, max(value), on=.(row >= row, row <= end_window), by=.EACHI]$V1
3836
frmax = function(x) x[, frollmax(value, len_window, adaptive=TRUE, align="left", hasNA=FALSE)]
37+
frapply = function(x) x[, frollapply(value, len_window, max, adaptive=TRUE, align="left")]
3938
microbenchmark::microbenchmark(
40-
baser(x), sj(x), frmax(x),
39+
baser(x), sj(x), frmax(x), frapply(x),
4140
times=10, check="identical"
4241
)
4342
#Unit: milliseconds
44-
# expr min lq mean median uq max neval
45-
# baser(x) 4290.98557 4529.82841 4573.94115 4604.85827 4654.39342 4883.991 10
46-
# sj(x) 3600.42771 3752.19359 4118.21755 4235.45856 4329.08728 4884.080 10
47-
# frmax(x) 64.48627 73.07978 88.84932 76.64569 82.56115 198.438 10
43+
# expr min lq mean median uq max neval
44+
# baser(x) 5472.2715 5596.11013 5763.93265 5659.06510 5935.11236 6338.0498 10
45+
# sj(x) 4664.3359 4872.40122 4978.01860 4919.15975 5061.69718 5345.3508 10
46+
# frmax(x) 70.0804 75.13598 91.35392 95.80486 99.99415 113.2648 10
47+
# frapply(x) 743.9082 833.65667 904.32891 893.75805 979.63510 1158.6030 10
4848
```
4949

5050
## BUG FIXES

R/froll.R

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ froll = function(fun, x, n, fill=NA, algo=c("fast", "exact"), align=c("right", "
4141
if (isTRUE(partial)) {
4242
if (isTRUE(adaptive))
4343
stopf("'partial' argument cannot be used together with 'adaptive'")
44-
if (is.list(n)) ## duplicate two of C check to detect early
44+
if (is.list(n))
4545
stopf("n must be integer, list is accepted for adaptive TRUE")
4646
if (!length(n))
4747
stopf("n must be non 0 length")
@@ -77,14 +77,37 @@ frollsum = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right", "lef
7777
frollmax = function(x, n, fill=NA, algo=c("fast", "exact"), align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE, partial=FALSE) {
7878
froll(fun="max", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, hasNA=hasNA, adaptive=adaptive, partial=partial)
7979
}
80+
8081
frollapply = function(x, n, FUN, ..., fill=NA, align=c("right", "left", "center"), adaptive=FALSE, partial=FALSE) {
8182
FUN = match.fun(FUN)
8283
align = match.arg(align)
83-
if (isTRUE(partial))
84-
stopf("frollapply does not support 'partial' argument yet")
85-
if (!missing(adaptive))
86-
stopf("frollapply does not support 'adaptive' argument yet")
84+
if (isTRUE(partial)) {
85+
if (isTRUE(adaptive))
86+
stopf("'partial' argument cannot be used together with 'adaptive'")
87+
if (is.list(n))
88+
stopf("n must be integer, list is accepted for adaptive TRUE")
89+
if (!length(n))
90+
stopf("n must be non 0 length")
91+
n = partial2adaptive(x, n, align)
92+
adaptive = TRUE
93+
}
94+
leftadaptive = isTRUE(adaptive) && align=="left"
95+
if (leftadaptive) {
96+
verbose = getOption("datatable.verbose")
97+
rev2 = function(x) if (is.list(x)) lapply(x, rev) else rev(x)
98+
if (verbose)
99+
cat("froll: adaptive=TRUE && align='left' pre-processing for align='right'\n")
100+
x = rev2(x)
101+
n = rev2(n)
102+
align = "right"
103+
}
87104
rho = new.env()
88-
ans = .Call(CfrollapplyR, FUN, x, n, fill, align, rho)
89-
ans
105+
ans = .Call(CfrollapplyR, FUN, x, n, fill, align, adaptive, rho)
106+
if (!leftadaptive)
107+
ans
108+
else {
109+
if (verbose)
110+
cat("frollapply: adaptive=TRUE && align='left' post-processing from align='right'\n")
111+
rev2(ans)
112+
}
90113
}

inst/tests/froll.Rraw

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1154,10 +1154,13 @@ f = function(x) {
11541154
}
11551155
#test(6010.106, head(frollapply(1:5, 3, f), 3L), c(NA_real_,NA_real_,1), output=c("frollapplyR: allocating memory.*","frollapply: took.*","frollapplyR: processing.*took.*")) # only head 3 is valid, rest is undefined as REAL is applied on logical type, can return garbage or fail with REAL error
11561156
options(datatable.verbose=FALSE)
1157+
1158+
# frollapply adaptive
1159+
test(6010.201, frollapply(1:3, c(3,3,3), sum, adaptive=TRUE), c(NA,NA,6))
1160+
#TODO tests
1161+
11571162
#### test coverage
1158-
test(6010.5, frollapply(1:3, c(3,3,3), sum, adaptive=TRUE), error="frollapply does not support 'adaptive' argument")
11591163
test(6010.501, frollapply(1:3, "b", sum), error="n must be integer")
1160-
test(6010.502, frollapply(1:3, 2.5, sum), error="n must be integer")
11611164
test(6010.503, frollapply(1:3, integer(), sum), error="n must be non 0 length")
11621165
test(6010.504, frollapply(1:3, 2L, sum, fill=1:2), error="fill must be a vector of length 1")
11631166
test(6010.505, frollapply(1:3, 2L, sum, fill=NA_integer_), c(NA,3,5))

man/froll.Rd

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,6 @@
107107
\item \code{align} does not support \code{"center"}.
108108
\item if list of vectors is passed to \code{x}, then all vectors within it must have equal length.
109109
\item \code{partial=TRUE} is not supported.
110-
\item functionality is not supported in \code{frollapply} (to be changed).
111110
}
112111
}
113112
\section{\code{partial} argument}{

src/data.table.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -241,10 +241,11 @@ void frolladaptivesumFast(double *x, uint64_t nx, ans_t *ans, int *k, double fil
241241
void frolladaptivesumExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
242242
//void frolladaptivemaxFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); // does not exists as of now
243243
void frolladaptivemaxExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
244+
void frolladaptiveapply(double *x, int64_t nx, SEXP pw, int *k, ans_t *ans, double fill, SEXP call, SEXP rho, bool verbose);
244245

245246
// frollR.c
246247
SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEXP narm, SEXP hasNA, SEXP adaptive);
247-
SEXP frollapplyR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP align, SEXP rho);
248+
SEXP frollapplyR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP align, SEXP adaptive, SEXP rho);
248249

249250
// nafill.c
250251
void nafillDouble(double *x, uint_fast64_t nx, unsigned int type, double fill, bool nan_is_na, ans_t *ans, bool verbose);
@@ -370,4 +371,3 @@ SEXP dt_has_zlib(void);
370371
SEXP startsWithAny(SEXP, SEXP, SEXP);
371372
SEXP convertDate(SEXP, SEXP);
372373
SEXP fastmean(SEXP);
373-

src/frollR.c

Lines changed: 108 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,16 @@ SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEX
8888
}
8989
int **ikl = (int**)R_alloc(nk, sizeof(int*)); // to not recalculate `length(x[[i]])` we store it in extra array
9090
if (badaptive) {
91-
for (int j=0; j<nk; j++) ikl[j] = INTEGER(VECTOR_ELT(kl, j));
91+
for (int j=0; j<nk; j++)
92+
ikl[j] = INTEGER(VECTOR_ELT(kl, j));
93+
}
94+
int* iik = NULL;
95+
if (!badaptive) {
96+
if (!isInteger(ik))
97+
error(_("Internal error: badaptive=%d but ik is not integer"), badaptive); // # nocov
98+
iik = INTEGER(ik); // pointer to non-adaptive window width, still can be vector when doing multiple windows
99+
} else {
100+
// ik is still R_NilValue from initialization. But that's ok as it's only needed below when !badaptive.
92101
}
93102

94103
if (!IS_TRUE_OR_FALSE(narm))
@@ -166,15 +175,6 @@ SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEX
166175
else
167176
internal_error(__func__, "invalid %s argument in %s function should have been caught earlier", "algo", "rolling"); // # nocov
168177

169-
int* iik = NULL;
170-
if (!badaptive) {
171-
if (!isInteger(ik))
172-
internal_error(__func__, "badaptive=%d but ik is not integer", badaptive); // # nocov
173-
iik = INTEGER(ik); // pointer to non-adaptive window width, still can be vector when doing multiple windows
174-
} else {
175-
// ik is still R_NilValue from initialization. But that's ok as it's only needed below when !badaptive.
176-
}
177-
178178
if (verbose) {
179179
if (ialgo==0)
180180
Rprintf(_("%s: %d column(s) and %d window(s), if product > 1 then entering parallel execution\n"), __func__, nx, nk);
@@ -201,7 +201,15 @@ SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEX
201201
return isVectorAtomic(obj) && length(ans) == 1 ? VECTOR_ELT(ans, 0) : ans;
202202
}
203203

204-
SEXP frollapplyR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP align, SEXP rho) {
204+
// helper to find biggest window width for adaptive frollapply
205+
int maxk(int *k, uint64_t len) {
206+
int mk = k[0];
207+
for (uint64_t i=1; i<len; i++)
208+
if (k[i] > mk)
209+
mk = k[i];
210+
return mk;
211+
}
212+
SEXP frollapplyR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP align, SEXP adaptive, SEXP rho) {
205213
int protecti = 0;
206214
const bool verbose = GetVerbose();
207215

@@ -218,22 +226,72 @@ SEXP frollapplyR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP align, SEXP rho) {
218226
SEXP x = PROTECT(coerceToRealListR(obj)); protecti++;
219227
R_len_t nx = length(x);
220228

221-
if (!isInteger(k)) {
222-
if (isReal(k)) {
223-
if (fitsInInt32(k)) {
224-
SEXP ik = PROTECT(coerceVector(k, INTSXP)); protecti++;
225-
k = ik;
229+
if (xlength(k) == 0)
230+
error(_("n must be non 0 length"));
231+
232+
if (!IS_TRUE_OR_FALSE(adaptive))
233+
error(_("%s must be TRUE or FALSE"), "adaptive");
234+
bool badaptive = LOGICAL(adaptive)[0];
235+
236+
R_len_t nk = 0;
237+
SEXP ik = R_NilValue;
238+
SEXP kl = R_NilValue;
239+
if (!badaptive) {
240+
if (isNewList(k))
241+
error(_("n must be integer, list is accepted for adaptive TRUE"));
242+
243+
if (isInteger(k)) {
244+
ik = k;
245+
} else if (isReal(k)) {
246+
ik = PROTECT(coerceVector(k, INTSXP)); protecti++;
247+
} else {
248+
error(_("n must be integer"));
249+
}
250+
251+
nk = length(k);
252+
R_len_t i=0;
253+
while (i < nk && INTEGER(ik)[i] > 0) i++;
254+
if (i != nk)
255+
error(_("n must be positive integer values (> 0)"));
256+
} else {
257+
if (isVectorAtomic(k)) {
258+
kl = PROTECT(allocVector(VECSXP, 1)); protecti++;
259+
if (isInteger(k)) {
260+
SET_VECTOR_ELT(kl, 0, k);
261+
} else if (isReal(k)) {
262+
SET_VECTOR_ELT(kl, 0, coerceVector(k, INTSXP));
226263
} else {
227-
error(_("n must be integer"));
264+
error(_("n must be integer vector or list of integer vectors"));
228265
}
266+
nk = 1;
229267
} else {
230-
error(_("n must be integer"));
268+
nk = length(k);
269+
kl = PROTECT(allocVector(VECSXP, nk)); protecti++;
270+
for (R_len_t i=0; i<nk; i++) {
271+
if (isInteger(VECTOR_ELT(k, i))) {
272+
SET_VECTOR_ELT(kl, i, VECTOR_ELT(k, i));
273+
} else if (isReal(VECTOR_ELT(k, i))) {
274+
SET_VECTOR_ELT(kl, i, coerceVector(VECTOR_ELT(k, i), INTSXP));
275+
} else {
276+
error(_("n must be integer vector or list of integer vectors"));
277+
}
278+
}
231279
}
232280
}
233-
R_len_t nk = length(k);
234-
if (nk == 0)
235-
error(_("n must be non 0 length"));
236-
int *ik = INTEGER(k);
281+
int **ikl = (int**)R_alloc(nk, sizeof(int*));
282+
if (badaptive) {
283+
for (int j=0; j<nk; j++)
284+
ikl[j] = INTEGER(VECTOR_ELT(kl, j));
285+
}
286+
287+
int* iik = NULL;
288+
if (!badaptive) {
289+
if (!isInteger(ik))
290+
error(_("Internal error: badaptive=%d but ik is not integer"), badaptive); // # nocov
291+
iik = INTEGER(ik);
292+
} else {
293+
// ik is still R_NilValue from initialization. But that's ok as it's only needed below when !badaptive.
294+
}
237295

238296
int ialign=-2;
239297
if (!strcmp(CHAR(STRING_ELT(align, 0)), "right")) {
@@ -246,6 +304,9 @@ SEXP frollapplyR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP align, SEXP rho) {
246304
internal_error(__func__, "invalid %s argument in %s function should have been caught earlier", "align", "rolling"); // # nocov
247305
}
248306

307+
if (badaptive && ialign==0)
308+
error(_("using adaptive TRUE and align 'center' is not implemented"));
309+
249310
if (length(fill) != 1)
250311
error(_("fill must be a vector of length 1"));
251312
if (!isInteger(fill) && !isReal(fill) && !isLogical(fill))
@@ -262,6 +323,12 @@ SEXP frollapplyR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP align, SEXP rho) {
262323
for (R_len_t i=0; i<nx; i++) {
263324
inx[i] = xlength(VECTOR_ELT(x, i));
264325
for (R_len_t j=0; j<nk; j++) {
326+
if (badaptive) {
327+
if (i > 0 && (inx[i]!=inx[i-1]))
328+
error(_("adaptive rolling function can only process 'x' having equal length of elements, like data.table or data.frame; If you want to call rolling function on list having variable length of elements call it for each field separately"));
329+
if (xlength(VECTOR_ELT(kl, j))!=inx[0])
330+
error(_("length of integer vector(s) provided as list to 'n' argument must be equal to number of observations provided in 'x'"));
331+
}
265332
SET_VECTOR_ELT(ans, i*nk+j, allocVector(REALSXP, inx[i]));
266333
dans[i*nk+j] = ((ans_t) { .dbl_v=REAL(VECTOR_ELT(ans, i*nk+j)), .status=0, .message={"\0","\0","\0","\0"} });
267334
}
@@ -274,16 +341,26 @@ SEXP frollapplyR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP align, SEXP rho) {
274341
// in the outer loop we handle vectorized k argument
275342
// for each k we need to allocate a width window object: pw
276343
// we also need to construct distinct R call pointing to that window
277-
for (R_len_t j=0; j<nk; j++) {
278-
pw = PROTECT(allocVector(REALSXP, ik[j]));
279-
dw = REAL(pw);
280-
pc = PROTECT(LCONS(fun, LCONS(pw, LCONS(R_DotsSymbol, R_NilValue))));
281-
282-
for (R_len_t i=0; i<nx; i++) {
283-
frollapply(dx[i], inx[i], dw, ik[j], &dans[i*nk+j], ialign, dfill, pc, rho, verbose);
344+
if (!badaptive) {
345+
for (R_len_t j=0; j<nk; j++) {
346+
pw = PROTECT(allocVector(REALSXP, iik[j]));
347+
dw = REAL(pw);
348+
pc = PROTECT(LCONS(fun, LCONS(pw, LCONS(R_DotsSymbol, R_NilValue))));
349+
for (R_len_t i=0; i<nx; i++) {
350+
frollapply(dx[i], inx[i], dw, iik[j], &dans[i*nk+j], ialign, dfill, pc, rho, verbose);
351+
}
352+
UNPROTECT(2);
353+
}
354+
} else {
355+
for (R_len_t j=0; j<nk; j++) {
356+
pw = PROTECT(allocVector(REALSXP, maxk(ikl[j], inx[0]))); // max window size, inx[0] because inx is constant for adaptive
357+
SET_GROWABLE_BIT(pw); // so we can set length of window for each observation
358+
pc = PROTECT(LCONS(fun, LCONS(pw, LCONS(R_DotsSymbol, R_NilValue))));
359+
for (R_len_t i=0; i<nx; i++) {
360+
frolladaptiveapply(dx[i], inx[i], pw, ikl[j], &dans[i*nk+j], dfill, pc, rho, verbose);
361+
}
362+
UNPROTECT(2);
284363
}
285-
286-
UNPROTECT(2);
287364
}
288365

289366
if (verbose)

0 commit comments

Comments
 (0)