Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,10 @@

4. `as.Date()` method for `IDate` no longer coerces to `double` [#6922](https://github.com/Rdatatable/data.table/issues/6922). Thanks @MichaelChirico for the report and PR. The only effect should be on overly-strict tests that assert `Date` objects have `double` storage, which is not in general true, especially from R 4.5.0.

5. Multiple improvements has been added to rolling functions. Request came from @gpierard who needed left aligned, adaptive, rolling max, [#5438](https://github.com/Rdatatable/data.table/issues/5438). There was no `frollmax` function yet. Adaptive rolling functions did not have support for `align="left"`. `frollapply` did not support `adaptive=TRUE`. Available alternatives were base R `mapply` or self-join using `max` and grouping `by=.EACHI`. As a follow up of his request, following features has been or will be added:
5. Multiple improvements has been added to rolling functions. Request came from @gpierard who needed left aligned, adaptive, rolling max, [#5438](https://github.com/Rdatatable/data.table/issues/5438). There was no `frollmax` function yet. Adaptive rolling functions did not have support for `align="left"`. `frollapply` did not support `adaptive=TRUE`. Available alternatives were base R `mapply` or self-join using `max` and grouping `by=.EACHI`. As a follow up of his request, following features has been added:
- new function `frollmax`, applies `max` over a rolling window.
- support for `align="left"` for adaptive rolling function.
- support for `adaptive=TRUE` in `frollapply`.
- better support for non-double data types in `frollapply`.
- better support for `Inf` and `-Inf` support in `algo="fast"` implementation.
- `partial` argument to trim window width to available observations rather than returning `NA` whenever window is not complete.

For a comprehensive description about all available features see `?froll` manual.
Expand All @@ -36,15 +34,17 @@ x = data.table(
baser = function(x) x[, mapply(function(from, to) max(value[from:to]), row, end_window)]
sj = function(x) x[x, max(value), on=.(row >= row, row <= end_window), by=.EACHI]$V1
frmax = function(x) x[, frollmax(value, len_window, adaptive=TRUE, align="left", hasNA=FALSE)]
frapply = function(x) x[, frollapply(value, len_window, max, adaptive=TRUE, align="left")]
microbenchmark::microbenchmark(
baser(x), sj(x), frmax(x),
baser(x), sj(x), frmax(x), frapply(x),
times=10, check="identical"
)
#Unit: milliseconds
# expr min lq mean median uq max neval
# baser(x) 4290.98557 4529.82841 4573.94115 4604.85827 4654.39342 4883.991 10
# sj(x) 3600.42771 3752.19359 4118.21755 4235.45856 4329.08728 4884.080 10
# frmax(x) 64.48627 73.07978 88.84932 76.64569 82.56115 198.438 10
# expr min lq mean median uq max neval
# baser(x) 5472.2715 5596.11013 5763.93265 5659.06510 5935.11236 6338.0498 10
# sj(x) 4664.3359 4872.40122 4978.01860 4919.15975 5061.69718 5345.3508 10
# frmax(x) 70.0804 75.13598 91.35392 95.80486 99.99415 113.2648 10
# frapply(x) 743.9082 833.65667 904.32891 893.75805 979.63510 1158.6030 10
```

## BUG FIXES
Expand Down
37 changes: 30 additions & 7 deletions R/froll.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ froll = function(fun, x, n, fill=NA, algo=c("fast", "exact"), align=c("right", "
if (isTRUE(partial)) {
if (isTRUE(adaptive))
stopf("'partial' argument cannot be used together with 'adaptive'")
if (is.list(n)) ## duplicate two of C check to detect early
if (is.list(n))
stopf("n must be integer, list is accepted for adaptive TRUE")
if (!length(n))
stopf("n must be non 0 length")
Expand Down Expand Up @@ -77,14 +77,37 @@ frollsum = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right", "lef
frollmax = function(x, n, fill=NA, algo=c("fast", "exact"), align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE, partial=FALSE) {
froll(fun="max", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, hasNA=hasNA, adaptive=adaptive, partial=partial)
}

frollapply = function(x, n, FUN, ..., fill=NA, align=c("right", "left", "center"), adaptive=FALSE, partial=FALSE) {
FUN = match.fun(FUN)
align = match.arg(align)
if (isTRUE(partial))
stopf("frollapply does not support 'partial' argument yet")
if (!missing(adaptive))
stopf("frollapply does not support 'adaptive' argument yet")
if (isTRUE(partial)) {
if (isTRUE(adaptive))
stopf("'partial' argument cannot be used together with 'adaptive'")
if (is.list(n))
stopf("n must be integer, list is accepted for adaptive TRUE")
if (!length(n))
stopf("n must be non 0 length")
n = partial2adaptive(x, n, align)
adaptive = TRUE
}
leftadaptive = isTRUE(adaptive) && align=="left"
if (leftadaptive) {
verbose = getOption("datatable.verbose")
rev2 = function(x) if (is.list(x)) lapply(x, rev) else rev(x)
if (verbose)
cat("froll: adaptive=TRUE && align='left' pre-processing for align='right'\n")
x = rev2(x)
n = rev2(n)
align = "right"
}
rho = new.env()
ans = .Call(CfrollapplyR, FUN, x, n, fill, align, rho)
ans
ans = .Call(CfrollapplyR, FUN, x, n, fill, align, adaptive, rho)
if (!leftadaptive)
ans
else {
if (verbose)
cat("frollapply: adaptive=TRUE && align='left' post-processing from align='right'\n")
rev2(ans)
}
}
7 changes: 5 additions & 2 deletions inst/tests/froll.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -1154,10 +1154,13 @@ f = function(x) {
}
#test(6010.106, head(frollapply(1:5, 3, f), 3L), c(NA_real_,NA_real_,1), output=c("frollapplyR: allocating memory.*","frollapply: took.*","frollapplyR: processing.*took.*")) # only head 3 is valid, rest is undefined as REAL is applied on logical type, can return garbage or fail with REAL error
options(datatable.verbose=FALSE)

# frollapply adaptive
test(6010.201, frollapply(1:3, c(3,3,3), sum, adaptive=TRUE), c(NA,NA,6))
#TODO tests

#### test coverage
test(6010.5, frollapply(1:3, c(3,3,3), sum, adaptive=TRUE), error="frollapply does not support 'adaptive' argument")
test(6010.501, frollapply(1:3, "b", sum), error="n must be integer")
test(6010.502, frollapply(1:3, 2.5, sum), error="n must be integer")
test(6010.503, frollapply(1:3, integer(), sum), error="n must be non 0 length")
test(6010.504, frollapply(1:3, 2L, sum, fill=1:2), error="fill must be a vector of length 1")
test(6010.505, frollapply(1:3, 2L, sum, fill=NA_integer_), c(NA,3,5))
Expand Down
1 change: 0 additions & 1 deletion man/froll.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,6 @@
\item \code{align} does not support \code{"center"}.
\item if list of vectors is passed to \code{x}, then all vectors within it must have equal length.
\item \code{partial=TRUE} is not supported.
\item functionality is not supported in \code{frollapply} (to be changed).
}
}
\section{\code{partial} argument}{
Expand Down
4 changes: 2 additions & 2 deletions src/data.table.h
Original file line number Diff line number Diff line change
Expand Up @@ -241,10 +241,11 @@ void frolladaptivesumFast(double *x, uint64_t nx, ans_t *ans, int *k, double fil
void frolladaptivesumExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
//void frolladaptivemaxFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); // does not exists as of now
void frolladaptivemaxExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
void frolladaptiveapply(double *x, int64_t nx, SEXP pw, int *k, ans_t *ans, double fill, SEXP call, SEXP rho, bool verbose);

// frollR.c
SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEXP narm, SEXP hasNA, SEXP adaptive);
SEXP frollapplyR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP align, SEXP rho);
SEXP frollapplyR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP align, SEXP adaptive, SEXP rho);

// nafill.c
void nafillDouble(double *x, uint_fast64_t nx, unsigned int type, double fill, bool nan_is_na, ans_t *ans, bool verbose);
Expand Down Expand Up @@ -370,4 +371,3 @@ SEXP dt_has_zlib(void);
SEXP startsWithAny(SEXP, SEXP, SEXP);
SEXP convertDate(SEXP, SEXP);
SEXP fastmean(SEXP);

139 changes: 108 additions & 31 deletions src/frollR.c
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,16 @@ SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEX
}
int **ikl = (int**)R_alloc(nk, sizeof(int*)); // to not recalculate `length(x[[i]])` we store it in extra array
if (badaptive) {
for (int j=0; j<nk; j++) ikl[j] = INTEGER(VECTOR_ELT(kl, j));
for (int j=0; j<nk; j++)
ikl[j] = INTEGER(VECTOR_ELT(kl, j));
}
int* iik = NULL;
if (!badaptive) {
if (!isInteger(ik))
error(_("Internal error: badaptive=%d but ik is not integer"), badaptive); // # nocov
iik = INTEGER(ik); // pointer to non-adaptive window width, still can be vector when doing multiple windows
} else {
// ik is still R_NilValue from initialization. But that's ok as it's only needed below when !badaptive.
}

if (!IS_TRUE_OR_FALSE(narm))
Expand Down Expand Up @@ -166,15 +175,6 @@ SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEX
else
internal_error(__func__, "invalid %s argument in %s function should have been caught earlier", "algo", "rolling"); // # nocov

int* iik = NULL;
if (!badaptive) {
if (!isInteger(ik))
internal_error(__func__, "badaptive=%d but ik is not integer", badaptive); // # nocov
iik = INTEGER(ik); // pointer to non-adaptive window width, still can be vector when doing multiple windows
} else {
// ik is still R_NilValue from initialization. But that's ok as it's only needed below when !badaptive.
}

if (verbose) {
if (ialgo==0)
Rprintf(_("%s: %d column(s) and %d window(s), if product > 1 then entering parallel execution\n"), __func__, nx, nk);
Expand All @@ -201,7 +201,15 @@ SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEX
return isVectorAtomic(obj) && length(ans) == 1 ? VECTOR_ELT(ans, 0) : ans;
}

SEXP frollapplyR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP align, SEXP rho) {
// helper to find biggest window width for adaptive frollapply
int maxk(int *k, uint64_t len) {
int mk = k[0];
for (uint64_t i=1; i<len; i++)
if (k[i] > mk)
mk = k[i];
return mk;
}
SEXP frollapplyR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP align, SEXP adaptive, SEXP rho) {
int protecti = 0;
const bool verbose = GetVerbose();

Expand All @@ -218,22 +226,72 @@ SEXP frollapplyR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP align, SEXP rho) {
SEXP x = PROTECT(coerceToRealListR(obj)); protecti++;
R_len_t nx = length(x);

if (!isInteger(k)) {
if (isReal(k)) {
if (fitsInInt32(k)) {
SEXP ik = PROTECT(coerceVector(k, INTSXP)); protecti++;
k = ik;
if (xlength(k) == 0)
error(_("n must be non 0 length"));

if (!IS_TRUE_OR_FALSE(adaptive))
error(_("%s must be TRUE or FALSE"), "adaptive");
bool badaptive = LOGICAL(adaptive)[0];

R_len_t nk = 0;
SEXP ik = R_NilValue;
SEXP kl = R_NilValue;
if (!badaptive) {
if (isNewList(k))
error(_("n must be integer, list is accepted for adaptive TRUE"));

if (isInteger(k)) {
ik = k;
} else if (isReal(k)) {
ik = PROTECT(coerceVector(k, INTSXP)); protecti++;
} else {
error(_("n must be integer"));
}

nk = length(k);
R_len_t i=0;
while (i < nk && INTEGER(ik)[i] > 0) i++;
if (i != nk)
error(_("n must be positive integer values (> 0)"));
} else {
if (isVectorAtomic(k)) {
kl = PROTECT(allocVector(VECSXP, 1)); protecti++;
if (isInteger(k)) {
SET_VECTOR_ELT(kl, 0, k);
} else if (isReal(k)) {
SET_VECTOR_ELT(kl, 0, coerceVector(k, INTSXP));
} else {
error(_("n must be integer"));
error(_("n must be integer vector or list of integer vectors"));
}
nk = 1;
} else {
error(_("n must be integer"));
nk = length(k);
kl = PROTECT(allocVector(VECSXP, nk)); protecti++;
for (R_len_t i=0; i<nk; i++) {
if (isInteger(VECTOR_ELT(k, i))) {
SET_VECTOR_ELT(kl, i, VECTOR_ELT(k, i));
} else if (isReal(VECTOR_ELT(k, i))) {
SET_VECTOR_ELT(kl, i, coerceVector(VECTOR_ELT(k, i), INTSXP));
} else {
error(_("n must be integer vector or list of integer vectors"));
}
}
}
}
R_len_t nk = length(k);
if (nk == 0)
error(_("n must be non 0 length"));
int *ik = INTEGER(k);
int **ikl = (int**)R_alloc(nk, sizeof(int*));
if (badaptive) {
for (int j=0; j<nk; j++)
ikl[j] = INTEGER(VECTOR_ELT(kl, j));
}

int* iik = NULL;
if (!badaptive) {
if (!isInteger(ik))
error(_("Internal error: badaptive=%d but ik is not integer"), badaptive); // # nocov
iik = INTEGER(ik);
} else {
// ik is still R_NilValue from initialization. But that's ok as it's only needed below when !badaptive.
}

int ialign=-2;
if (!strcmp(CHAR(STRING_ELT(align, 0)), "right")) {
Expand All @@ -246,6 +304,9 @@ SEXP frollapplyR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP align, SEXP rho) {
internal_error(__func__, "invalid %s argument in %s function should have been caught earlier", "align", "rolling"); // # nocov
}

if (badaptive && ialign==0)
error(_("using adaptive TRUE and align 'center' is not implemented"));

if (length(fill) != 1)
error(_("fill must be a vector of length 1"));
if (!isInteger(fill) && !isReal(fill) && !isLogical(fill))
Expand All @@ -262,6 +323,12 @@ SEXP frollapplyR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP align, SEXP rho) {
for (R_len_t i=0; i<nx; i++) {
inx[i] = xlength(VECTOR_ELT(x, i));
for (R_len_t j=0; j<nk; j++) {
if (badaptive) {
if (i > 0 && (inx[i]!=inx[i-1]))
error(_("adaptive rolling function can only process 'x' having equal length of elements, like data.table or data.frame; If you want to call rolling function on list having variable length of elements call it for each field separately"));
if (xlength(VECTOR_ELT(kl, j))!=inx[0])
error(_("length of integer vector(s) provided as list to 'n' argument must be equal to number of observations provided in 'x'"));
}
SET_VECTOR_ELT(ans, i*nk+j, allocVector(REALSXP, inx[i]));
dans[i*nk+j] = ((ans_t) { .dbl_v=REAL(VECTOR_ELT(ans, i*nk+j)), .status=0, .message={"\0","\0","\0","\0"} });
}
Expand All @@ -274,16 +341,26 @@ SEXP frollapplyR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP align, SEXP rho) {
// in the outer loop we handle vectorized k argument
// for each k we need to allocate a width window object: pw
// we also need to construct distinct R call pointing to that window
for (R_len_t j=0; j<nk; j++) {
pw = PROTECT(allocVector(REALSXP, ik[j]));
dw = REAL(pw);
pc = PROTECT(LCONS(fun, LCONS(pw, LCONS(R_DotsSymbol, R_NilValue))));

for (R_len_t i=0; i<nx; i++) {
frollapply(dx[i], inx[i], dw, ik[j], &dans[i*nk+j], ialign, dfill, pc, rho, verbose);
if (!badaptive) {
for (R_len_t j=0; j<nk; j++) {
pw = PROTECT(allocVector(REALSXP, iik[j]));
dw = REAL(pw);
pc = PROTECT(LCONS(fun, LCONS(pw, LCONS(R_DotsSymbol, R_NilValue))));
for (R_len_t i=0; i<nx; i++) {
frollapply(dx[i], inx[i], dw, iik[j], &dans[i*nk+j], ialign, dfill, pc, rho, verbose);
}
UNPROTECT(2);
}
} else {
for (R_len_t j=0; j<nk; j++) {
pw = PROTECT(allocVector(REALSXP, maxk(ikl[j], inx[0]))); // max window size, inx[0] because inx is constant for adaptive
SET_GROWABLE_BIT(pw); // so we can set length of window for each observation
pc = PROTECT(LCONS(fun, LCONS(pw, LCONS(R_DotsSymbol, R_NilValue))));
for (R_len_t i=0; i<nx; i++) {
frolladaptiveapply(dx[i], inx[i], pw, ikl[j], &dans[i*nk+j], dfill, pc, rho, verbose);
}
UNPROTECT(2);
}

UNPROTECT(2);
}

if (verbose)
Expand Down
Loading
Loading