rolling prod more accurate (#7351)

jangorecki · web-flow · commit 55b0de600d17 · 2025-10-09T19:59:11.000+02:00
* zeros and num stability * frollprod adaptive fast redirect to exact * more tests * codecov * correct expected ans * Revert "zeros and num stability" This reverts commit 4074e12. * handle zeros in frollprod fast
diff --git a/inst/tests/froll.Rraw b/inst/tests/froll.Rraw
@@ -1202,8 +1202,8 @@ test(6000.931, frollprod(1:3, 2), c(NA, 2, 6), output="frollprodFast: running fo
 test(6000.932, frollprod(1:3, 2, align="left"), c(2, 6, NA), output="frollfun: align")
 test(6000.933, frollprod(c(1,2,NA), 2), c(NA, 2, NA), output="non-finite values are present in input, re-running with extra care for NFs")
 test(6000.934, frollprod(c(NA,2,3), 2), c(NA, NA, 6), output="non-finite values are present in input, skip non-finite inaware attempt and run with extra care for NFs straighaway")
-test(6000.935, frollprod(1:3, c(2,2,2), adaptive=TRUE), c(NA, 2, 6), output="frolladaptiveprodFast: running for input length")
-test(6000.936, frollprod(c(NA,2,3), c(2,2,2), adaptive=TRUE), c(NA, NA, 6), output="non-finite values are present in input, re-running with extra care for NFs")
+test(6000.935, frollprod(1:3, c(2,2,2), adaptive=TRUE), c(NA, 2, 6), output="algo 0 not implemented, fall back to 1")
+test(6000.936, frollprod(c(NA,2,3), c(2,2,2), adaptive=TRUE), c(NA, NA, 6), output="non-finite values are present in input, na.rm=FALSE and algo='exact' propagates NFs properply, no need to re-run")
 options(datatable.verbose=FALSE)
 # floating point overflow
 test(6000.941, frollprod(c(1e100, 1e100, 1e100, 1e100, 1e100), 5), c(NA,NA,NA,NA,Inf))
@@ -1222,6 +1222,15 @@ test(6000.953, frollprod(c(1e100, 1e100, 1e100, 1e100, 1e100), rep(5, 5), algo="
 test(6000.954, frollprod(c(1e100, 1e100, 1e100, 1e100, 1e100), rep(4, 5), algo="exact", adaptive=TRUE), c(NA,NA,NA,Inf,Inf))
 test(6000.955, frollprod(c(1e100, 1e100, 1e100, 1e100, -1e100), rep(5, 5), algo="exact", adaptive=TRUE), c(NA,NA,NA,NA,-Inf))
 test(6000.956, frollprod(c(1e100, 1e100, 1e100, 1e100, -1e100), rep(4, 5), algo="exact", adaptive=TRUE), c(NA,NA,NA,Inf,-Inf))
+# rolling product and numerical stability #7349
+test(6000.9601, frollprod(c(2,2,0,2,2), 2), c(NA,4,0,0,4))
+test(6000.9602, frollprod(c(2,2,0,-2,2), 2), c(NA,4,0,0,-4))
+test(6000.9603, frollprod(c(2,2,0,-2,-2), 2), c(NA,4,0,0,4))
+test(6000.9604, frollprod(c(2,2,0,Inf,2), 2), c(NA,4,0,NaN,Inf))
+test(6000.9605, frollprod(c(2,2,0,-Inf,2), 2), c(NA,4,0,NaN,-Inf))
+test(6000.9606, frollprod(c(2,2,0,-Inf,-2), 2), c(NA,4,0,NaN,Inf))
+test(6000.9607, frollprod(c(0,2,2,2,2), 2), c(NA,0,4,4,4))
+test(6000.9608, frollprod(c(2,0,2,2,2), 2), c(NA,0,0,4,4))
 
 # n==0, k==0, k[i]==0
 test(6001.111, frollmean(1:3, 0), c(NaN,NaN,NaN), options=c("datatable.verbose"=TRUE), output="window width of size 0")
@@ -2242,7 +2251,7 @@ rollfun = function(x, n, FUN, fill=NA_real_, na.rm=FALSE, nf.rm=FALSE, partial=F
   ans
 }
 base_compare = function(x, n, funs=c("mean","sum","max","min","prod","median"), algos=c("fast","exact")) {
-  num.step = 0.001
+  num.step = 0.0001
   for (fun in funs) {
     for (na.rm in c(FALSE, TRUE)) {
       for (fill in c(NA_real_, 0)) {
@@ -2276,6 +2285,39 @@ base_compare = function(x, n, funs=c("mean","sum","max","min","prod","median"),
     }
   }
 }
+num = 7000.0
+x = rnorm(1e3); n = 50
+base_compare(x, n)
+x = rnorm(1e3+1); n = 50 ## uneven len
+base_compare(x, n)
+x = rnorm(1e3); n = 51 ## uneven window
+base_compare(x, n)
+x = rnorm(1e3+1); n = 51
+base_compare(x, n)
+x = sort(rnorm(1e3)); n = 50 ## inc
+base_compare(x, n)
+x = sort(rnorm(1e3+1)); n = 50
+base_compare(x, n)
+x = sort(rnorm(1e3)); n = 51
+base_compare(x, n)
+x = sort(rnorm(1e3+1)); n = 51
+base_compare(x, n)
+x = rev(sort(rnorm(1e3))); n = 50 ## desc
+base_compare(x, n)
+x = rev(sort(rnorm(1e3+1))); n = 50
+base_compare(x, n)
+x = rev(sort(rnorm(1e3))); n = 51
+base_compare(x, n)
+x = rev(sort(rnorm(1e3+1))); n = 51
+base_compare(x, n)
+x = rep(rnorm(1), 1e3); n = 50 ## const
+base_compare(x, n)
+x = rep(rnorm(1), 1e3+1); n = 50
+base_compare(x, n)
+x = rep(rnorm(1), 1e3); n = 51
+base_compare(x, n)
+x = rep(rnorm(1), 1e3+1); n = 51
+base_compare(x, n)
 num = 7100.0
 ## random NA non-finite
 x = makeNA(rnorm(1e3), nf=TRUE); n = 50
@@ -2433,6 +2475,39 @@ afun_compare = function(x, n, funs=c("mean","sum","max","min","prod","median"),
   }
 }
 num = 7300.0
+x = rnorm(1e3); n = sample(50, length(x), TRUE)
+afun_compare(x, n)
+x = rnorm(1e3+1); n = sample(50, length(x), TRUE) ## uneven len
+afun_compare(x, n)
+x = rnorm(1e3); n = sample(51, length(x), TRUE) ## uneven window
+afun_compare(x, n)
+x = rnorm(1e3+1); n = sample(51, length(x), TRUE)
+afun_compare(x, n)
+x = sort(rnorm(1e3)); n = sample(50, length(x), TRUE) ## inc
+afun_compare(x, n)
+x = sort(rnorm(1e3+1)); n = sample(50, length(x), TRUE)
+afun_compare(x, n)
+x = sort(rnorm(1e3)); n = sample(51, length(x), TRUE)
+afun_compare(x, n)
+x = sort(rnorm(1e3+1)); n = sample(51, length(x), TRUE)
+afun_compare(x, n)
+x = rev(sort(rnorm(1e3))); n = sample(50, length(x), TRUE) ## desc
+afun_compare(x, n)
+x = rev(sort(rnorm(1e3+1))); n = sample(50, length(x), TRUE)
+afun_compare(x, n)
+x = rev(sort(rnorm(1e3))); n = sample(51, length(x), TRUE)
+afun_compare(x, n)
+x = rev(sort(rnorm(1e3+1))); n = sample(51, length(x), TRUE)
+afun_compare(x, n)
+x = rep(rnorm(1), 1e3); n = sample(50, length(x), TRUE) ## const
+afun_compare(x, n)
+x = rep(rnorm(1), 1e3+1); n = sample(50, length(x), TRUE)
+afun_compare(x, n)
+x = rep(rnorm(1), 1e3); n = sample(51, length(x), TRUE)
+afun_compare(x, n)
+x = rep(rnorm(1), 1e3+1); n = sample(51, length(x), TRUE)
+afun_compare(x, n)
+num = 7400.0
 #### no NA
 x = rnorm(1e3); n = sample(50, length(x), TRUE) # x even, n even
 afun_compare(x, n)
diff --git a/src/data.table.h b/src/data.table.h
@@ -258,7 +258,7 @@ void frolladaptivesumExact(const double *x, uint64_t nx, ans_t *ans, const int *
 void frolladaptivemaxExact(const double *x, uint64_t nx, ans_t *ans, const int *k, double fill, bool narm, int hasnf, bool verbose);
 //void frolladaptiveminFast(const double *x, uint64_t nx, ans_t *ans, const int *k, double fill, bool narm, int hasnf, bool verbose); // does not exists as of now
 void frolladaptiveminExact(const double *x, uint64_t nx, ans_t *ans, const int *k, double fill, bool narm, int hasnf, bool verbose);
-void frolladaptiveprodFast(const double *x, uint64_t nx, ans_t *ans, const int *k, double fill, bool narm, int hasnf, bool verbose);
+//void frolladaptiveprodFast(const double *x, uint64_t nx, ans_t *ans, const int *k, double fill, bool narm, int hasnf, bool verbose); // does not exists as of now
 void frolladaptiveprodExact(const double *x, uint64_t nx, ans_t *ans, const int *k, double fill, bool narm, int hasnf, bool verbose);
 //void frolladaptivemedianFast(const double *x, uint64_t nx, ans_t *ans, const int *k, double fill, bool narm, int hasnf, bool verbose); // does not exists as of now
 void frolladaptivemedianExact(const double *x, uint64_t nx, ans_t *ans, const int *k, double fill, bool narm, int hasnf, bool verbose);
diff --git a/src/froll.c b/src/froll.c
@@ -895,7 +895,11 @@ void frollminExact(const double *x, uint64_t nx, ans_t *ans, int k, double fill,
 #undef PROD_WINDOW_STEP_FRONT
 #define PROD_WINDOW_STEP_FRONT                                   \
   if (R_FINITE(x[i])) {                                          \
-    w *= x[i];                                                   \
+    if (x[i] == 0.0) {                                           \
+      zerc++;                                                    \
+    } else {                                                     \
+      w *= x[i];                                                 \
+    }                                                            \
   } else if (ISNAN(x[i])) {                                      \
     nc++;                                                        \
   } else if (x[i]==R_PosInf) {                                   \
@@ -906,7 +910,11 @@ void frollminExact(const double *x, uint64_t nx, ans_t *ans, int k, double fill,
 #undef PROD_WINDOW_STEP_BACK
 #define PROD_WINDOW_STEP_BACK                                    \
   if (R_FINITE(x[i-k])) {                                        \
-    w /= x[i-k];                                                 \
+    if (x[i-k] == 0.0) {                                         \
+      zerc--;                                                    \
+    } else {                                                     \
+      w /= x[i-k];                                               \
+    }                                                            \
   } else if (ISNAN(x[i-k])) {                                    \
     nc--;                                                        \
   } else if (x[i-k]==R_PosInf) {                                 \
@@ -918,18 +926,34 @@ void frollminExact(const double *x, uint64_t nx, ans_t *ans, int k, double fill,
 #define PROD_WINDOW_STEP_VALUE                                   \
   if (nc == 0) {                                                 \
     if (pinf == 0 && ninf == 0) {                                \
-      ans->dbl_v[i] = (double) w;                                \
+      if (zerc) {                                                \
+        ans->dbl_v[i] = 0.0;                                     \
+      } else {                                                   \
+        ans->dbl_v[i] = (double) w;                              \
+      }                                                          \
     } else {                                                     \
-      ans->dbl_v[i] = (ninf+(w<0))%2 ? R_NegInf : R_PosInf;      \
+      if (zerc) {                                                \
+        ans->dbl_v[i] = R_NaN;                                   \
+      } else {                                                   \
+        ans->dbl_v[i] = (ninf+(w<0))%2 ? R_NegInf : R_PosInf;    \
+      }                                                          \
     }                                                            \
   } else if (nc == k) {                                          \
     ans->dbl_v[i] = narm ? 1.0 : NA_REAL;                        \
   } else {                                                       \
     if (narm) {                                                  \
       if (pinf == 0 && ninf == 0) {                              \
-        ans->dbl_v[i] = (double) w;                              \
+        if (zerc) {                                              \
+          ans->dbl_v[i] = 0.0;                                   \
+        } else {                                                 \
+          ans->dbl_v[i] = (double) w;                            \
+        }                                                        \
       } else {                                                   \
-        ans->dbl_v[i] = (ninf+(w<0))%2 ? R_NegInf : R_PosInf;    \
+        if (zerc) {                                              \
+          ans->dbl_v[i] = R_NaN;                                 \
+        } else {                                                 \
+          ans->dbl_v[i] = (ninf+(w<0))%2 ? R_NegInf : R_PosInf;  \
+        }                                                        \
       }                                                          \
     } else {                                                     \
       ans->dbl_v[i] = NA_REAL;                                   \
@@ -951,34 +975,59 @@ void frollprodFast(const double *x, uint64_t nx, ans_t *ans, int k, double fill,
     return;
   }
   long double w = 1.0;
+  int zerc = 0;
   bool truehasnf = hasnf>0;
   if (!truehasnf) {
     int i;
     for (i=0; i<k-1; i++) { // #loop_counter_not_local_scope_ok
-      w *= x[i];
+      if (x[i] == 0.0) {
+        zerc++;
+      } else {
+        w *= x[i];
+      }
       ans->dbl_v[i] = fill;
     }
-    w *= x[i];
-    ans->dbl_v[i] = (double) w;
+    if (x[i] == 0.0) {
+      zerc++;
+    } else {
+      w *= x[i];
+    }
+    if (zerc) {
+      ans->dbl_v[i] = 0.0;
+    } else {
+      ans->dbl_v[i] = (double) w;
+    }
     if (R_FINITE((double) w)) {
       for (uint64_t i=k; i<nx; i++) {
-        w /= x[i-k];
-        w *= x[i];
-        ans->dbl_v[i] = (double) w;
+        if (x[i-k] == 0.0) {
+          zerc--;
+        } else {
+          w /= x[i-k];
+        }
+        if (x[i] == 0.0) {
+          zerc++;
+        } else {
+          w *= x[i];
+        }
+        if (zerc) {
+          ans->dbl_v[i] = 0.0;
+        } else {
+          ans->dbl_v[i] = (double) w;
+        }
       }
       if (!R_FINITE((double) w)) {
         if (hasnf==-1)
           ansSetMsg(ans, 2, "%s: has.nf=FALSE used but non-finite values are present in input, use default has.nf=NA to avoid this warning", __func__);
         if (verbose)
           ansSetMsg(ans, 0, "%s: non-finite values are present in input, re-running with extra care for NFs\n", __func__);
-        w = 1.0; truehasnf = true;
+        w = 1.0; zerc = 0; truehasnf = true;
       }
     } else {
       if (hasnf==-1)
         ansSetMsg(ans, 2, "%s: has.nf=FALSE used but non-finite values are present in input, use default has.nf=NA to avoid this warning", __func__);
       if (verbose)
         ansSetMsg(ans, 0, "%s: non-finite values are present in input, skip non-finite inaware attempt and run with extra care for NFs straighaway\n", __func__);
-      w = 1.0; truehasnf = true;
+      w = 1.0; zerc = 0; truehasnf = true;
     }
   }
   if (truehasnf) {
diff --git a/src/frolladaptive.c b/src/frolladaptive.c