adaptive and partial added to frollapply()

jangorecki · web-flow · commit 477e6eae3c0d · 2025-05-13T02:00:50.000+06:00
diff --git a/NEWS.md b/NEWS.md
@@ -12,12 +12,10 @@
 
 4. `as.Date()` method for `IDate` no longer coerces to `double` [#6922](https://github.com/Rdatatable/data.table/issues/6922). Thanks @MichaelChirico for the report and PR. The only effect should be on overly-strict tests that assert `Date` objects have `double` storage, which is not in general true, especially from R 4.5.0.
 
-5. Multiple improvements has been added to rolling functions. Request came from @gpierard who needed left aligned, adaptive, rolling max, [#5438](https://github.com/Rdatatable/data.table/issues/5438). There was no `frollmax` function yet. Adaptive rolling functions did not have support for `align="left"`. `frollapply` did not support `adaptive=TRUE`. Available alternatives were base R `mapply` or self-join using `max` and grouping `by=.EACHI`. As a follow up of his request, following features has been or will be added:
+5. Multiple improvements has been added to rolling functions. Request came from @gpierard who needed left aligned, adaptive, rolling max, [#5438](https://github.com/Rdatatable/data.table/issues/5438). There was no `frollmax` function yet. Adaptive rolling functions did not have support for `align="left"`. `frollapply` did not support `adaptive=TRUE`. Available alternatives were base R `mapply` or self-join using `max` and grouping `by=.EACHI`. As a follow up of his request, following features has been added:
 - new function `frollmax`, applies `max` over a rolling window.
 - support for `align="left"` for adaptive rolling function.
 - support for `adaptive=TRUE` in `frollapply`.
-- better support for non-double data types in `frollapply`.
-- better support for `Inf` and `-Inf` support in `algo="fast"` implementation.
 - `partial` argument to trim window width to available observations rather than returning `NA` whenever window is not complete.
 
 For a comprehensive description about all available features see `?froll` manual.
@@ -36,15 +34,17 @@ x = data.table(
 baser = function(x) x[, mapply(function(from, to) max(value[from:to]), row, end_window)]
 sj = function(x) x[x, max(value), on=.(row >= row, row <= end_window), by=.EACHI]$V1
 frmax = function(x) x[, frollmax(value, len_window, adaptive=TRUE, align="left", hasNA=FALSE)]
+frapply = function(x) x[, frollapply(value, len_window, max, adaptive=TRUE, align="left")]
 microbenchmark::microbenchmark(
-  baser(x), sj(x), frmax(x),
+  baser(x), sj(x), frmax(x), frapply(x),
   times=10, check="identical"
 )
 #Unit: milliseconds
-#     expr        min         lq       mean     median         uq      max neval
-# baser(x) 4290.98557 4529.82841 4573.94115 4604.85827 4654.39342 4883.991    10
-#    sj(x) 3600.42771 3752.19359 4118.21755 4235.45856 4329.08728 4884.080    10
-# frmax(x)   64.48627   73.07978   88.84932   76.64569   82.56115  198.438    10
+#       expr       min         lq       mean     median         uq       max neval
+#   baser(x) 5472.2715 5596.11013 5763.93265 5659.06510 5935.11236 6338.0498    10
+#      sj(x) 4664.3359 4872.40122 4978.01860 4919.15975 5061.69718 5345.3508    10
+#   frmax(x)   70.0804   75.13598   91.35392   95.80486   99.99415  113.2648    10
+# frapply(x)  743.9082  833.65667  904.32891  893.75805  979.63510 1158.6030    10
 ```
 
 ## BUG FIXES
diff --git a/R/froll.R b/R/froll.R
@@ -41,7 +41,7 @@ froll = function(fun, x, n, fill=NA, algo=c("fast", "exact"), align=c("right", "
   if (isTRUE(partial)) {
     if (isTRUE(adaptive))
       stopf("'partial' argument cannot be used together with 'adaptive'")
-    if (is.list(n)) ## duplicate two of C check to detect early
+    if (is.list(n))
       stopf("n must be integer, list is accepted for adaptive TRUE")
     if (!length(n))
       stopf("n must be non 0 length")
@@ -77,14 +77,37 @@ frollsum = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right", "lef
 frollmax = function(x, n, fill=NA, algo=c("fast", "exact"), align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE, partial=FALSE) {
   froll(fun="max", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, hasNA=hasNA, adaptive=adaptive, partial=partial)
 }
+
 frollapply = function(x, n, FUN, ..., fill=NA, align=c("right", "left", "center"), adaptive=FALSE, partial=FALSE) {
   FUN = match.fun(FUN)
   align = match.arg(align)
-  if (isTRUE(partial))
-    stopf("frollapply does not support 'partial' argument yet")
-  if (!missing(adaptive))
-    stopf("frollapply does not support 'adaptive' argument yet")
+  if (isTRUE(partial)) {
+    if (isTRUE(adaptive))
+      stopf("'partial' argument cannot be used together with 'adaptive'")
+    if (is.list(n))
+      stopf("n must be integer, list is accepted for adaptive TRUE")
+    if (!length(n))
+      stopf("n must be non 0 length")
+    n = partial2adaptive(x, n, align)
+    adaptive = TRUE
+  }
+  leftadaptive = isTRUE(adaptive) && align=="left"
+  if (leftadaptive) {
+    verbose = getOption("datatable.verbose")
+    rev2 = function(x) if (is.list(x)) lapply(x, rev) else rev(x)
+    if (verbose)
+      cat("froll: adaptive=TRUE && align='left' pre-processing for align='right'\n")
+    x = rev2(x)
+    n = rev2(n)
+    align = "right"
+  }
   rho = new.env()
-  ans = .Call(CfrollapplyR, FUN, x, n, fill, align, rho)
-  ans
+  ans = .Call(CfrollapplyR, FUN, x, n, fill, align, adaptive, rho)
+  if (!leftadaptive)
+    ans
+  else {
+    if (verbose)
+      cat("frollapply: adaptive=TRUE && align='left' post-processing from align='right'\n")
+    rev2(ans)
+  }
 }
diff --git a/inst/tests/froll.Rraw b/inst/tests/froll.Rraw
@@ -1154,10 +1154,13 @@ f = function(x) {
 }
 #test(6010.106, head(frollapply(1:5, 3, f), 3L), c(NA_real_,NA_real_,1), output=c("frollapplyR: allocating memory.*","frollapply: took.*","frollapplyR: processing.*took.*")) # only head 3 is valid, rest is undefined as REAL is applied on logical type, can return garbage or fail with REAL error
 options(datatable.verbose=FALSE)
+
+# frollapply adaptive
+test(6010.201, frollapply(1:3, c(3,3,3), sum, adaptive=TRUE), c(NA,NA,6))
+#TODO tests
+
 #### test coverage
-test(6010.5, frollapply(1:3, c(3,3,3), sum, adaptive=TRUE), error="frollapply does not support 'adaptive' argument")
 test(6010.501, frollapply(1:3, "b", sum), error="n must be integer")
-test(6010.502, frollapply(1:3, 2.5, sum), error="n must be integer")
 test(6010.503, frollapply(1:3, integer(), sum), error="n must be non 0 length")
 test(6010.504, frollapply(1:3, 2L, sum, fill=1:2), error="fill must be a vector of length 1")
 test(6010.505, frollapply(1:3, 2L, sum, fill=NA_integer_), c(NA,3,5))
diff --git a/man/froll.Rd b/man/froll.Rd
@@ -107,7 +107,6 @@
     \item \code{align} does not support \code{"center"}.
     \item if list of vectors is passed to \code{x}, then all vectors within it must have equal length.
     \item \code{partial=TRUE} is not supported.
-    \item functionality is not supported in \code{frollapply} (to be changed).
   }
 }
 \section{\code{partial} argument}{
diff --git a/src/data.table.h b/src/data.table.h
@@ -241,10 +241,11 @@ void frolladaptivesumFast(double *x, uint64_t nx, ans_t *ans, int *k, double fil
 void frolladaptivesumExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
 //void frolladaptivemaxFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); // does not exists as of now
 void frolladaptivemaxExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
+void frolladaptiveapply(double *x, int64_t nx, SEXP pw, int *k, ans_t *ans, double fill, SEXP call, SEXP rho, bool verbose);
 
 // frollR.c
 SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEXP narm, SEXP hasNA, SEXP adaptive);
-SEXP frollapplyR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP align, SEXP rho);
+SEXP frollapplyR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP align, SEXP adaptive, SEXP rho);
 
 // nafill.c
 void nafillDouble(double *x, uint_fast64_t nx, unsigned int type, double fill, bool nan_is_na, ans_t *ans, bool verbose);
@@ -370,4 +371,3 @@ SEXP dt_has_zlib(void);
 SEXP startsWithAny(SEXP, SEXP, SEXP);
 SEXP convertDate(SEXP, SEXP);
 SEXP fastmean(SEXP);
-
diff --git a/src/frollR.c b/src/frollR.c
@@ -88,7 +88,16 @@ SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEX
   }
   int **ikl = (int**)R_alloc(nk, sizeof(int*));                 // to not recalculate `length(x[[i]])` we store it in extra array
   if (badaptive) {
-    for (int j=0; j<nk; j++) ikl[j] = INTEGER(VECTOR_ELT(kl, j));
+    for (int j=0; j<nk; j++)
+      ikl[j] = INTEGER(VECTOR_ELT(kl, j));
+  }
+  int* iik = NULL;
+  if (!badaptive) {
+    if (!isInteger(ik))
+      error(_("Internal error: badaptive=%d but ik is not integer"), badaptive); // # nocov
+    iik = INTEGER(ik);                                          // pointer to non-adaptive window width, still can be vector when doing multiple windows
+  } else {
+    // ik is still R_NilValue from initialization. But that's ok as it's only needed below when !badaptive.
   }
 
   if (!IS_TRUE_OR_FALSE(narm))
@@ -166,15 +175,6 @@ SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEX
   else
     internal_error(__func__, "invalid %s argument in %s function should have been caught earlier", "algo", "rolling"); // # nocov
 
-  int* iik = NULL;
-  if (!badaptive) {
-    if (!isInteger(ik))
-      internal_error(__func__, "badaptive=%d but ik is not integer", badaptive); // # nocov
-    iik = INTEGER(ik);                                          // pointer to non-adaptive window width, still can be vector when doing multiple windows
-  } else {
-    // ik is still R_NilValue from initialization. But that's ok as it's only needed below when !badaptive.
-  }
-
   if (verbose) {
     if (ialgo==0)
       Rprintf(_("%s: %d column(s) and %d window(s), if product > 1 then entering parallel execution\n"), __func__, nx, nk);
@@ -201,7 +201,15 @@ SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEX
   return isVectorAtomic(obj) && length(ans) == 1 ? VECTOR_ELT(ans, 0) : ans;
 }
 
-SEXP frollapplyR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP align, SEXP rho) {
+// helper to find biggest window width for adaptive frollapply
+int maxk(int *k, uint64_t len) {
+  int mk = k[0];
+  for (uint64_t i=1; i<len; i++)
+    if (k[i] > mk)
+      mk = k[i];
+  return mk;
+}
+SEXP frollapplyR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP align, SEXP adaptive, SEXP rho) {
   int protecti = 0;
   const bool verbose = GetVerbose();
 
@@ -218,22 +226,72 @@ SEXP frollapplyR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP align, SEXP rho) {
   SEXP x = PROTECT(coerceToRealListR(obj)); protecti++;
   R_len_t nx = length(x);
 
-  if (!isInteger(k)) {
-    if (isReal(k)) {
-      if (fitsInInt32(k)) {
-        SEXP ik = PROTECT(coerceVector(k, INTSXP)); protecti++;
-        k = ik;
+  if (xlength(k) == 0)
+    error(_("n must be non 0 length"));
+
+  if (!IS_TRUE_OR_FALSE(adaptive))
+    error(_("%s must be TRUE or FALSE"), "adaptive");
+  bool badaptive = LOGICAL(adaptive)[0];
+
+  R_len_t nk = 0;
+  SEXP ik = R_NilValue;
+  SEXP kl = R_NilValue;
+  if (!badaptive) {
+    if (isNewList(k))
+      error(_("n must be integer, list is accepted for adaptive TRUE"));
+
+    if (isInteger(k)) {
+      ik = k;
+    } else if (isReal(k)) {
+      ik = PROTECT(coerceVector(k, INTSXP)); protecti++;
+    } else {
+      error(_("n must be integer"));
+    }
+
+    nk = length(k);
+    R_len_t i=0;
+    while (i < nk && INTEGER(ik)[i] > 0) i++;
+    if (i != nk)
+      error(_("n must be positive integer values (> 0)"));
+  } else {
+    if (isVectorAtomic(k)) {
+      kl = PROTECT(allocVector(VECSXP, 1)); protecti++;
+      if (isInteger(k)) {
+        SET_VECTOR_ELT(kl, 0, k);
+      } else if (isReal(k)) {
+        SET_VECTOR_ELT(kl, 0, coerceVector(k, INTSXP));
       } else {
-        error(_("n must be integer"));
+        error(_("n must be integer vector or list of integer vectors"));
       }
+      nk = 1;
     } else {
-      error(_("n must be integer"));
+      nk = length(k);
+      kl = PROTECT(allocVector(VECSXP, nk)); protecti++;
+      for (R_len_t i=0; i<nk; i++) {
+        if (isInteger(VECTOR_ELT(k, i))) {
+          SET_VECTOR_ELT(kl, i, VECTOR_ELT(k, i));
+        } else if (isReal(VECTOR_ELT(k, i))) {
+          SET_VECTOR_ELT(kl, i, coerceVector(VECTOR_ELT(k, i), INTSXP));
+        } else {
+          error(_("n must be integer vector or list of integer vectors"));
+        }
+      }
     }
   }
-  R_len_t nk = length(k);
-  if (nk == 0)
-    error(_("n must be non 0 length"));
-  int *ik = INTEGER(k);
+  int **ikl = (int**)R_alloc(nk, sizeof(int*));
+  if (badaptive) {
+    for (int j=0; j<nk; j++)
+      ikl[j] = INTEGER(VECTOR_ELT(kl, j));
+  }
+
+  int* iik = NULL;
+  if (!badaptive) {
+    if (!isInteger(ik))
+      error(_("Internal error: badaptive=%d but ik is not integer"), badaptive); // # nocov
+    iik = INTEGER(ik);
+  } else {
+    // ik is still R_NilValue from initialization. But that's ok as it's only needed below when !badaptive.
+  }
 
   int ialign=-2;
   if (!strcmp(CHAR(STRING_ELT(align, 0)), "right")) {
@@ -246,6 +304,9 @@ SEXP frollapplyR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP align, SEXP rho) {
     internal_error(__func__, "invalid %s argument in %s function should have been caught earlier", "align", "rolling"); // # nocov
   }
 
+  if (badaptive && ialign==0)
+    error(_("using adaptive TRUE and align 'center' is not implemented"));
+
   if (length(fill) != 1)
     error(_("fill must be a vector of length 1"));
   if (!isInteger(fill) && !isReal(fill) && !isLogical(fill))
@@ -262,6 +323,12 @@ SEXP frollapplyR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP align, SEXP rho) {
   for (R_len_t i=0; i<nx; i++) {
     inx[i] = xlength(VECTOR_ELT(x, i));
     for (R_len_t j=0; j<nk; j++) {
+      if (badaptive) {
+        if (i > 0 && (inx[i]!=inx[i-1]))
+          error(_("adaptive rolling function can only process 'x' having equal length of elements, like data.table or data.frame; If you want to call rolling function on list having variable length of elements call it for each field separately"));
+        if (xlength(VECTOR_ELT(kl, j))!=inx[0])
+          error(_("length of integer vector(s) provided as list to 'n' argument must be equal to number of observations provided in 'x'"));
+      }
       SET_VECTOR_ELT(ans, i*nk+j, allocVector(REALSXP, inx[i]));
       dans[i*nk+j] = ((ans_t) { .dbl_v=REAL(VECTOR_ELT(ans, i*nk+j)), .status=0, .message={"\0","\0","\0","\0"} });
     }
@@ -274,16 +341,26 @@ SEXP frollapplyR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP align, SEXP rho) {
   // in the outer loop we handle vectorized k argument
   // for each k we need to allocate a width window object: pw
   // we also need to construct distinct R call pointing to that window
-  for (R_len_t j=0; j<nk; j++) {
-    pw = PROTECT(allocVector(REALSXP, ik[j]));
-    dw = REAL(pw);
-    pc = PROTECT(LCONS(fun, LCONS(pw, LCONS(R_DotsSymbol, R_NilValue))));
-
-    for (R_len_t i=0; i<nx; i++) {
-      frollapply(dx[i], inx[i], dw, ik[j], &dans[i*nk+j], ialign, dfill, pc, rho, verbose);
+  if (!badaptive) {
+    for (R_len_t j=0; j<nk; j++) {
+      pw = PROTECT(allocVector(REALSXP, iik[j]));
+      dw = REAL(pw);
+      pc = PROTECT(LCONS(fun, LCONS(pw, LCONS(R_DotsSymbol, R_NilValue))));
+      for (R_len_t i=0; i<nx; i++) {
+        frollapply(dx[i], inx[i], dw, iik[j], &dans[i*nk+j], ialign, dfill, pc, rho, verbose);
+      }
+      UNPROTECT(2);
+    }
+  } else {
+    for (R_len_t j=0; j<nk; j++) {
+      pw = PROTECT(allocVector(REALSXP, maxk(ikl[j], inx[0]))); // max window size, inx[0] because inx is constant for adaptive
+      SET_GROWABLE_BIT(pw); // so we can set length of window for each observation
+      pc = PROTECT(LCONS(fun, LCONS(pw, LCONS(R_DotsSymbol, R_NilValue))));
+      for (R_len_t i=0; i<nx; i++) {
+        frolladaptiveapply(dx[i], inx[i], pw, ikl[j], &dans[i*nk+j], dfill, pc, rho, verbose);
+      }
+      UNPROTECT(2);
     }
-
-    UNPROTECT(2);
   }
 
   if (verbose)
diff --git a/src/frolladaptive.c b/src/frolladaptive.c

Original file line number	Diff line number	Diff line change
`@@ -107,7 +107,6 @@`
`107`	`107`	`\item \code{align} does not support \code{"center"}.`
`108`	`108`	`\item if list of vectors is passed to \code{x}, then all vectors within it must have equal length.`
`109`	`109`	`\item \code{partial=TRUE} is not supported.`
`110`		`- \item functionality is not supported in \code{frollapply} (to be changed).`
`111`	`110`	`}`
`112`	`111`	`}`
`113`	`112`	`\section{\code{partial} argument}{`