Skip to content

Commit 2e53105

Browse files
committed
RM static vars; na = NA in forderReuseSorting
1 parent ed388d5 commit 2e53105

File tree

1 file changed

+41
-21
lines changed

1 file changed

+41
-21
lines changed

src/forder.c

Lines changed: 41 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -50,12 +50,20 @@ static int ustr_alloc = 0;
5050
static int ustr_n = 0;
5151
static int ustr_maxlen = 0;
5252
static int sortType = 0; // 0 just group; -1 descending, +1 ascending
53-
static int nalast = 0; // 1 (true i.e. last), 0 (false i.e. first), -1 (na i.e. remove)
53+
5454
static int nradix = 0;
5555
static uint8_t **key = NULL;
5656
static int *anso = NULL;
5757
static bool notFirst=false;
5858

59+
static int lgl_arg_to_int(int lgl_arg) {
60+
if (lgl_arg == NA_LOGICAL) {
61+
return -1;
62+
}
63+
return lgl_arg;
64+
}
65+
66+
5967
static char msg[1001];
6068
// use STOP in this file (not error()) to ensure cleanup() is called first
6169
// snprintf to msg first in case nrow (just as an example) is provided in the message because cleanup() sets nrow to 0
@@ -440,7 +448,7 @@ uint64_t dtwiddle(double x) //const void *p, int i)
440448
STOP(_("Unknown non-finite value; not NA, NaN, -Inf or +Inf")); // # nocov
441449
}
442450

443-
void radix_r(const int from, const int to, int radix);
451+
void radix_r(const int from, const int to, int radix, bool na_remove);
444452

445453
/*
446454
OpenMP is used here to parallelize multiple operations that come together to
@@ -527,7 +535,8 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP retStatsArg, SEXP sortGroupsA
527535
STOP(_("At least one of retGrp= or sort= must be TRUE"));
528536
if (!isLogical(naArg) || LENGTH(naArg) != 1)
529537
STOP(_("na.last must be logical TRUE, FALSE or NA of length 1")); // # nocov # covered in reuseSorting forder
530-
nalast = (LOGICAL(naArg)[0] == NA_LOGICAL) ? -1 : LOGICAL(naArg)[0]; // 1=na last, 0=na first (default), -1=remove na
538+
const int nalast = lgl_arg_to_int(LOGICAL(naArg)[0]); // 1=na last, 0=na first (default), -1=remove na
539+
const bool na_remove = (nalast == -1);
531540

532541
if (nrow==0) {
533542
// empty vector or 0-row DT is always sorted
@@ -803,7 +812,7 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP retStatsArg, SEXP sortGroupsA
803812
free(TMP); free(UGRP); // # nocov
804813
STOP(_("Failed to allocate TMP or UGRP or they weren't cache line aligned: nth=%d"), nth); // # nocov
805814
}
806-
815+
807816
if (retgrp) {
808817
gs_thread = calloc(nth, sizeof(*gs_thread)); // thread private group size buffers
809818
gs_thread_alloc = calloc(nth, sizeof(*gs_thread_alloc));
@@ -814,7 +823,7 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP retStatsArg, SEXP sortGroupsA
814823
}
815824
}
816825
if (nradix) {
817-
radix_r(0, nrow-1, 0); // top level recursive call: (from, to, radix)
826+
radix_r(0, nrow-1, 0, na_remove); // top level recursive call: (from, to, radix)
818827
} else {
819828
push(&nrow, 1);
820829
}
@@ -906,7 +915,7 @@ static bool sort_ugrp(uint8_t *x, const int n)
906915
return skip;
907916
}
908917

909-
void radix_r(const int from, const int to, int radix) {
918+
void radix_r(const int from, const int to, int radix, bool na_remove) {
910919
for (;;) {
911920
TBEG();
912921
const int my_n = to-from+1;
@@ -1038,7 +1047,7 @@ void radix_r(const int from, const int to, int radix) {
10381047
continue;
10391048
} else {
10401049
for (int i=0, f=from; i<ngrp; i++) {
1041-
radix_r(f, f+my_gs[i]-1, radix+1);
1050+
radix_r(f, f+my_gs[i]-1, radix+1, na_remove);
10421051
f+=my_gs[i];
10431052
}
10441053
}
@@ -1097,7 +1106,7 @@ void radix_r(const int from, const int to, int radix) {
10971106
}
10981107

10991108
int *restrict my_TMP = TMP + omp_get_thread_num()*UINT16_MAX; // Allocated up front to save malloc calls which i) block internally and ii) could fail
1100-
if (radix==0 && nalast!=-1) {
1109+
if (radix==0 && !na_remove) {
11011110
// anso contains 1:n so skip reading and copying it. Only happens when nrow<65535. Saving worth the branch (untested) when user repeatedly calls a small-n small-cardinality order.
11021111
for (int i=0; i<my_n; i++) anso[my_starts[my_key[i]]++] = i+1; // +1 as R is 1-based.
11031112
// The loop counter could be uint_fast16_t since max i here will be UINT16_MAX-1 (65534), hence ++ after last iteration won't overflow 16bits. However, have chosen signed
@@ -1147,7 +1156,7 @@ void radix_r(const int from, const int to, int radix) {
11471156
} else {
11481157
// this single thread will now descend and resolve all groups, now that the groups are close in cache
11491158
for (int i=0, my_from=from; i<ngrp; i++) {
1150-
radix_r(my_from, my_from+my_gs[i]-1, radix+1);
1159+
radix_r(my_from, my_from+my_gs[i]-1, radix+1, na_remove);
11511160
my_from+=my_gs[i];
11521161
}
11531162
}
@@ -1368,7 +1377,7 @@ void radix_r(const int from, const int to, int radix) {
13681377
// each in parallel here and they're all dealt with in parallel. There is no nestedness here.
13691378
for (int i=0; i<ngrp; i++) {
13701379
int start = from + starts[ugrp[i]];
1371-
radix_r(start, start+my_gs[i]-1, radix+1);
1380+
radix_r(start, start+my_gs[i]-1, radix+1, na_remove);
13721381
flush();
13731382
}
13741383
TEND(24)
@@ -1380,7 +1389,7 @@ void radix_r(const int from, const int to, int radix) {
13801389
#pragma omp parallel for ordered schedule(dynamic) num_threads(MIN(nth, ngrp)) // #5077
13811390
for (int i=0; i<ngrp; i++) {
13821391
int start = from + starts[ugrp[i]];
1383-
radix_r(start, start+my_gs[i]-1, radix+1);
1392+
radix_r(start, start+my_gs[i]-1, radix+1, na_remove);
13841393
#pragma omp ordered
13851394
flush();
13861395
}
@@ -1389,7 +1398,7 @@ void radix_r(const int from, const int to, int radix) {
13891398
#pragma omp parallel for schedule(dynamic) num_threads(MIN(nth, ngrp)) // #5077
13901399
for (int i=0; i<ngrp; i++) {
13911400
int start = from + starts[ugrp[i]];
1392-
radix_r(start, start+my_gs[i]-1, radix+1);
1401+
radix_r(start, start+my_gs[i]-1, radix+1, na_remove);
13931402
}
13941403
}
13951404
TEND(25)
@@ -1682,8 +1691,15 @@ bool GetAutoIndex(void) {
16821691
}
16831692

16841693
// attr(idx, "anyna")>0 || attr(idx, "anyinfnan")>0
1685-
bool idxAnyNF(SEXP idx) {
1686-
return INTEGER(getAttrib(idx, sym_anyna))[0]>0 || INTEGER(getAttrib(idx, sym_anyinfnan))[0]>0;
1694+
static inline bool idxAnyNF(SEXP idx) {
1695+
SEXP any_na = getAttrib(idx, sym_anyna);
1696+
// Defensive: #7519 to avoid segfaults on CRAN
1697+
if (!isInteger(any_na) || xlength(any_na) != 1) return true;
1698+
1699+
SEXP any_infnan = getAttrib(idx, sym_anyinfnan);
1700+
if (!isInteger(any_infnan) || xlength(any_infnan) != 1) return true;
1701+
1702+
return INTEGER(any_na)[0]>0 || INTEGER(any_infnan)[0]>0;
16871703
}
16881704

16891705
// forder, re-use existing key or index if possible, otherwise call forder
@@ -1708,7 +1724,11 @@ SEXP forderReuseSorting(SEXP DT, SEXP by, SEXP retGrpArg, SEXP retStatsArg, SEXP
17081724
bool sortGroups = (bool)LOGICAL(sortGroupsArg)[0];
17091725
if (!isLogical(naArg) || LENGTH(naArg) != 1)
17101726
error(_("na.last must be logical TRUE, FALSE or NA of length 1"));
1711-
bool na = (bool)LOGICAL(naArg)[0];
1727+
int na_arg = lgl_arg_to_int(LOGICAL(naArg)[0]);
1728+
1729+
const bool na_first = (na_arg == 0); // na.last=FALSE
1730+
const bool na_last = (na_arg == 1); // na.last=TRUE
1731+
const bool na_remove = (na_arg == -1); // na.last=NA
17121732
if (!isInteger(ascArg))
17131733
error(_("order must be integer")); // # nocov # coerced to int in R
17141734
if (!isLogical(reuseSortingArg) || LENGTH(reuseSortingArg) != 1)
@@ -1739,18 +1759,18 @@ SEXP forderReuseSorting(SEXP DT, SEXP by, SEXP retGrpArg, SEXP retStatsArg, SEXP
17391759
opt = 0;
17401760
}
17411761
SEXP ans = R_NilValue;
1742-
if (opt == -1 && !na && !retGrp && colsKeyHead(DT, by)) {
1762+
if (opt == -1 && na_first && !retGrp && colsKeyHead(DT, by)) {
17431763
opt = 1; // keyOpt
17441764
ans = PROTECT(allocVector(INTSXP, 0)); protecti++;
17451765
if (verbose)
17461766
Rprintf(_("forderReuseSorting: using key: %s\n"), CHAR(STRING_ELT(idxName(DT, by), 0)));
17471767
}
17481768
if (opt == -1 && GetUseIndex()) {
17491769
SEXP idx = getIndex(DT, by);
1750-
if (!isNull(idx)) {
1751-
bool hasStats = !isNull(getAttrib(idx, sym_anyna));
1752-
if (!na || // na.last=FALSE
1753-
(hasStats && !idxAnyNF(idx))) { // na.last=TRUE && !anyNA
1770+
if (!isNull(idx) && !na_remove) {
1771+
bool hasStats = !isNull(getAttrib(idx, sym_anyna)) && !isNull(getAttrib(idx, sym_anyinfnan));
1772+
if (na_first || // na.last=FALSE
1773+
(na_last && hasStats && !idxAnyNF(idx))) { // na.last=TRUE && !anyNA
17541774
bool hasGrp = !isNull(getAttrib(idx, sym_starts));
17551775
if (hasGrp && !hasStats)
17561776
internal_error_with_cleanup(__func__, "index has 'starts' attribute but not 'anyna', please report to issue tracker"); // # nocov
@@ -1809,7 +1829,7 @@ SEXP forderReuseSorting(SEXP DT, SEXP by, SEXP retGrpArg, SEXP retStatsArg, SEXP
18091829
if (opt < 1) {
18101830
ans = PROTECT(forder(DT, by, retGrpArg, retStatsArg, sortGroupsArg, ascArg, naArg)); protecti++;
18111831
if (opt == -1 && // opt==0 means that arguments (sort, asc) were not of type index, or reuseSorting=FALSE
1812-
(!na || (retStats && !idxAnyNF(ans))) && // lets create index even if na.last=T used but no NAs detected!
1832+
(na_first || (retStats && !idxAnyNF(ans))) && // lets create index even if na.last=T used but no NAs detected!
18131833
GetUseIndex() &&
18141834
GetAutoIndex()) { // disabled by default, use datatable.forder.auto.index=T to enable, do not export/document, use for debugging only
18151835
putIndex(DT, by, ans);

0 commit comments

Comments
 (0)