Skip to content

Commit 60f7c22

Browse files
committed
frollprod adaptive fast redirect to exact
1 parent 4074e12 commit 60f7c22

File tree

3 files changed

+20
-123
lines changed

3 files changed

+20
-123
lines changed

inst/tests/froll.Rraw

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1202,8 +1202,8 @@ test(6000.931, frollprod(1:3, 2), c(NA, 2, 6), output="frollprodFast: running fo
12021202
test(6000.932, frollprod(1:3, 2, align="left"), c(2, 6, NA), output="frollfun: align")
12031203
test(6000.933, frollprod(c(1,2,NA), 2), c(NA, 2, NA), output="non-finite values are present in input, re-running with extra care for NFs")
12041204
test(6000.934, frollprod(c(NA,2,3), 2), c(NA, NA, 6), output="non-finite values are present in input, skip non-finite inaware attempt and run with extra care for NFs straighaway")
1205-
test(6000.935, frollprod(1:3, c(2,2,2), adaptive=TRUE), c(NA, 2, 6), output="frolladaptiveprodFast: running for input length")
1206-
test(6000.936, frollprod(c(NA,2,3), c(2,2,2), adaptive=TRUE), c(NA, NA, 6), output="non-finite values are present in input, re-running with extra care for NFs")
1205+
test(6000.935, frollprod(1:3, c(2,2,2), adaptive=TRUE), c(NA, 2, 6), output="algo 0 not implemented, fall back to 1")
1206+
test(6000.936, frollprod(c(NA,2,3), c(2,2,2), adaptive=TRUE), c(NA, NA, 6), output="non-finite values are present in input, na.rm=FALSE and algo='exact' propagates NFs properply, no need to re-run")
12071207
options(datatable.verbose=FALSE)
12081208
# floating point overflow
12091209
test(6000.941, frollprod(c(1e100, 1e100, 1e100, 1e100, 1e100), 5), c(NA,NA,NA,NA,Inf))
@@ -1222,6 +1222,13 @@ test(6000.953, frollprod(c(1e100, 1e100, 1e100, 1e100, 1e100), rep(5, 5), algo="
12221222
test(6000.954, frollprod(c(1e100, 1e100, 1e100, 1e100, 1e100), rep(4, 5), algo="exact", adaptive=TRUE), c(NA,NA,NA,Inf,Inf))
12231223
test(6000.955, frollprod(c(1e100, 1e100, 1e100, 1e100, -1e100), rep(5, 5), algo="exact", adaptive=TRUE), c(NA,NA,NA,NA,-Inf))
12241224
test(6000.956, frollprod(c(1e100, 1e100, 1e100, 1e100, -1e100), rep(4, 5), algo="exact", adaptive=TRUE), c(NA,NA,NA,Inf,-Inf))
1225+
# rolling product and numerical stability #7349
1226+
test(6000.9601, frollprod(c(2,2,0,2,2), 2), c(NA,4,0,0,4))
1227+
test(6000.9602, frollprod(c(2,2,0,-2,2), 2), c(NA,4,0,0,-4))
1228+
test(6000.9603, frollprod(c(2,2,0,-2,-2), 2), c(NA,4,0,0,4))
1229+
test(6000.9604, frollprod(c(2,2,0,Inf,2), 2), c(NA,4,0,NaN,Inf))
1230+
test(6000.9605, frollprod(c(2,2,0,-Inf,2), 2), c(NA,4,0,NaN,-Inf))
1231+
test(6000.9606, frollprod(c(2,2,0,-Inf,-2), 2), c(NA,4,0,NaN,Inf))
12251232

12261233
# n==0, k==0, k[i]==0
12271234
test(6001.111, frollmean(1:3, 0), c(NaN,NaN,NaN), options=c("datatable.verbose"=TRUE), output="window width of size 0")

src/data.table.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ void frolladaptivesumExact(const double *x, uint64_t nx, ans_t *ans, const int *
251251
void frolladaptivemaxExact(const double *x, uint64_t nx, ans_t *ans, const int *k, double fill, bool narm, int hasnf, bool verbose);
252252
//void frolladaptiveminFast(const double *x, uint64_t nx, ans_t *ans, const int *k, double fill, bool narm, int hasnf, bool verbose); // does not exists as of now
253253
void frolladaptiveminExact(const double *x, uint64_t nx, ans_t *ans, const int *k, double fill, bool narm, int hasnf, bool verbose);
254-
void frolladaptiveprodFast(const double *x, uint64_t nx, ans_t *ans, const int *k, double fill, bool narm, int hasnf, bool verbose);
254+
//void frolladaptiveprodFast(const double *x, uint64_t nx, ans_t *ans, const int *k, double fill, bool narm, int hasnf, bool verbose); // does not exists as of now
255255
void frolladaptiveprodExact(const double *x, uint64_t nx, ans_t *ans, const int *k, double fill, bool narm, int hasnf, bool verbose);
256256

257257
// frollR.c

src/frolladaptive.c

Lines changed: 10 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,11 @@ void frolladaptivefun(rollfun_t rfun, unsigned int algo, const double *x, uint64
4141
frolladaptiveminExact(x, nx, ans, k, fill, narm, hasnf, verbose);
4242
break;
4343
case PROD :
44-
if (algo==0) {
45-
frolladaptiveprodFast(x, nx, ans, k, fill, narm, hasnf, verbose);
46-
} else if (algo==1) {
47-
frolladaptiveprodExact(x, nx, ans, k, fill, narm, hasnf, verbose);
44+
if (algo==0 && verbose) {
45+
//frolladaptiveprodFast(x, nx, ans, k, fill, narm, hasnf, verbose); // frolladaptiveprodFast does not exists as of now
46+
snprintf(end(ans->message[0]), 500, _("%s: algo %u not implemented, fall back to %u\n"), __func__, algo, (unsigned int) 1);
4847
}
48+
frolladaptiveprodExact(x, nx, ans, k, fill, narm, hasnf, verbose);
4949
break;
5050
default: // #nocov
5151
error(_("Internal error: Unknown rfun value in froll: %d"), rfun); // #nocov
@@ -635,123 +635,13 @@ void frolladaptiveminExact(const double *x, uint64_t nx, ans_t *ans, const int *
635635
}
636636
}
637637

638-
#undef PROD_WINDOW_STEP_VALUE
639-
#define PROD_WINDOW_STEP_VALUE \
640-
if (wn>0) { \
641-
if (narm) { \
642-
if (wpinf == 0 && wninf == 0) { \
643-
int thisk = k[i] - ((int) wn); \
644-
ans->dbl_v[i] = thisk==0 ? 1.0 : (double) ws; \
645-
} else { \
646-
ans->dbl_v[i] = (wninf+(ws<0))%2 ? R_NegInf : R_PosInf; \
647-
} \
648-
} else { \
649-
ans->dbl_v[i] = NA_REAL; \
650-
} \
651-
} else { \
652-
if (wpinf == 0 && wninf == 0) { \
653-
ans->dbl_v[i] = (double) ws; /* k[i]==0 produces 1.0 */ \
654-
} else { \
655-
ans->dbl_v[i] = (wninf+(ws<0))%2 ? R_NegInf : R_PosInf; \
656-
} \
657-
}
658-
659-
/* fast rolling adaptive prod - fast
660-
* same as adaptive mean fast
638+
/* fast rolling adaptive prod - fast - now redirects to exact!
639+
* it was same as adaptive mean fast in 5975ffdc9839d507f4cd9cbba960198e301cf604
640+
* it was fast but not fully accurate and considering non-adaptive partial=TRUE falls back to adaptive we would like it to be as accurate as non-adaptive
641+
* this could be implemented by tracking logsum rather than prod, together with zero and negative, as as non-adaptive fast roll prod
642+
* for the current moment it will redirect to algo=exact, follow git hash for previous implementation if needed
661643
*/
662-
void frolladaptiveprodFast(const double *x, uint64_t nx, ans_t *ans, const int *k, double fill, bool narm, int hasnf, bool verbose) {
663-
if (verbose)
664-
snprintf(end(ans->message[0]), 500, _("%s: running for input length %"PRIu64", hasnf %d, narm %d\n"), "frolladaptiveprodFast", (uint64_t)nx, hasnf, (int) narm);
665-
bool truehasnf = hasnf>0;
666-
long double w = 1.0;
667-
double *cs = malloc(sizeof(*cs) * nx);
668-
if (!cs) { // # nocov start
669-
ansSetMsg(ans, 3, "%s: Unable to allocate memory for cumprod", __func__); // raise error
670-
return;
671-
} // # nocov end
672-
if (!truehasnf) {
673-
for (uint64_t i=0; i<nx; i++) {
674-
w *= x[i];
675-
cs[i] = (double) w;
676-
}
677-
if (R_FINITE((double) w)) {
678-
#pragma omp parallel for num_threads(getDTthreads(nx, true))
679-
for (uint64_t i=0; i<nx; i++) {
680-
if (i+1 == k[i]) {
681-
ans->dbl_v[i] = cs[i];
682-
} else if (i+1 > k[i]) {
683-
ans->dbl_v[i] = cs[i]/cs[i-k[i]]; // for k[i]==0 it turns into cs[i]/cs[i] = 1, as expected
684-
} else {
685-
ans->dbl_v[i] = fill;
686-
}
687-
}
688-
} else {
689-
if (hasnf==-1)
690-
ansSetMsg(ans, 2, "%s: has.nf=FALSE used but non-finite values are present in input, use default has.nf=NA to avoid this warning", __func__);
691-
if (verbose)
692-
ansSetMsg(ans, 0, "%s: non-finite values are present in input, re-running with extra care for NFs\n", __func__);
693-
w = 1.0; truehasnf = true;
694-
}
695-
}
696-
if (truehasnf) {
697-
uint64_t nc = 0, pinf = 0, ninf = 0; // running NA counter
698-
uint64_t *cn = malloc(sizeof(*cn) * nx); // cumulative NA counter, used the same way as cumprod, same as uint64_t cn[nx] but no segfault
699-
if (!cn) { // # nocov start
700-
ansSetMsg(ans, 3, "%s: Unable to allocate memory for cum NA counter", __func__); // raise error
701-
free(cs);
702-
return;
703-
} // # nocov end
704-
uint64_t *cpinf = malloc(sizeof(*cpinf) * nx);
705-
if (!cpinf) { // # nocov start
706-
ansSetMsg(ans, 3, "%s: Unable to allocate memory for cum Inf counter", __func__); // raise error
707-
free(cs); free(cn);
708-
return;
709-
} // # nocov end
710-
uint64_t *cninf = malloc(sizeof(*cninf) * nx);
711-
if (!cninf) { // # nocov start
712-
ansSetMsg(ans, 3, "%s: Unable to allocate memory for cum -Inf counter", __func__); // raise error
713-
free(cs); free(cn); free(cpinf);
714-
return;
715-
} // # nocov end
716-
for (uint64_t i=0; i<nx; i++) { // loop over observations to calculate cumprod and cum NA counter
717-
if (R_FINITE(x[i])) {
718-
w *= x[i]; // add observation to running prod
719-
} else if (ISNAN(x[i])) {
720-
nc++; // increment non-finite counter
721-
} else if (x[i]==R_PosInf) {
722-
pinf++;
723-
} else if (x[i]==R_NegInf) {
724-
ninf++;
725-
}
726-
cs[i] = (double) w; // cumprod, na.rm=TRUE always, NAs handled using cum NA counter
727-
cn[i] = nc; // cum NA counter
728-
cpinf[i] = pinf;
729-
cninf[i] = ninf;
730-
}
731-
#pragma omp parallel for num_threads(getDTthreads(nx, true))
732-
for (uint64_t i=0; i<nx; i++) {
733-
uint64_t wn, wpinf, wninf;
734-
double ws;
735-
if (i+1 < k[i]) { // partial window
736-
ans->dbl_v[i] = fill;
737-
} else if (i+1 == k[i]) { // first full window
738-
wn = cn[i];
739-
wpinf = cpinf[i];
740-
wninf = cninf[i];
741-
ws = cs[i];
742-
PROD_WINDOW_STEP_VALUE
743-
} else { // all the remaining full windows
744-
wn = cn[i] - cn[i-k[i]]; // NAs in current window
745-
wpinf = cpinf[i] - cpinf[i-k[i]]; // Inf in current window
746-
wninf = cninf[i] - cninf[i-k[i]]; // -Inf in current window
747-
ws = cs[i] / cs[i-k[i]]; // cumprod in current window
748-
PROD_WINDOW_STEP_VALUE
749-
}
750-
}
751-
free(cninf); free(cpinf); free(cn);
752-
}
753-
free(cs);
754-
}
644+
755645
/* fast rolling adaptive prod - exact
756646
* same as adaptive mean exact
757647
*/

0 commit comments

Comments
 (0)