Skip to content

Commit 89d39f0

Browse files
committed
range_str: propagate rehashed marks to caller
1 parent a7f5c6c commit 89d39f0

File tree

3 files changed

+13
-3
lines changed

3 files changed

+13
-3
lines changed

inst/tests/tests.Rraw

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21876,3 +21876,10 @@ test(2347, DT[i, .(result = all(is.na(grp) == is.na(a))), by = grp][,all(result)
2187621876
DT = data.table(a = as.Date("2010-01-01"), b = 1L)
2187721877
test(2348.1, tryCatch(DT[a == as.Date("20100101")], error=conditionCall)[[1L]], quote(charToDate))
2187821878
test(2348.2, tryCatch(DT[a == as.Date("20100101") | b == 2L], error=conditionCall)[[1L]], quote(charToDate))
21879+
21880+
# exercise rehashing during forder, #6694
21881+
strings = as.character(6145:1)
21882+
DT = data.table(x = strings)
21883+
setorder(DT, x)
21884+
test(2349, DT[["x"]], sort.int(strings, method='radix'))
21885+
rm(DT, strings)

src/forder.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -291,14 +291,15 @@ static void cradix(SEXP *x, int n)
291291
free(cradix_xtmp); cradix_xtmp=NULL;
292292
}
293293

294-
static void range_str(const SEXP *x, int n, uint64_t *out_min, uint64_t *out_max, int *out_na_count, bool *out_anynotascii, bool *out_anynotutf8, hashtab * marks)
294+
static void range_str(const SEXP *x, int n, uint64_t *out_min, uint64_t *out_max, int *out_na_count, bool *out_anynotascii, bool *out_anynotutf8, hashtab **marks_)
295295
// group numbers are left in truelength to be fetched by WRITE_KEY
296296
{
297297
int na_count=0;
298298
bool anynotascii=false, anynotutf8=false;
299299
if (ustr_n!=0) internal_error_with_cleanup(__func__, "ustr isn't empty when starting range_str: ustr_n=%d, ustr_alloc=%d", ustr_n, ustr_alloc); // # nocov
300300
if (ustr_maxlen!=0) internal_error_with_cleanup(__func__, "ustr_maxlen isn't 0 when starting range_str"); // # nocov
301301
bool fail = false;
302+
hashtab *marks = *marks_;
302303
#pragma omp parallel for num_threads(getDTthreads(n, true)) shared(marks, fail)
303304
for(int i=0; i<n; i++) {
304305
SEXP s = x[i];
@@ -338,6 +339,8 @@ static void range_str(const SEXP *x, int n, uint64_t *out_min, uint64_t *out_max
338339
}
339340
}
340341
}
342+
// if the hash table grew, propagate the changes to the caller
343+
*marks_ = marks;
341344
if (fail) internal_error_with_cleanup(__func__, "failed to grow the 'marks' hash table");
342345
*out_na_count = na_count;
343346
*out_anynotascii = anynotascii;
@@ -613,7 +616,7 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP retStatsArg, SEXP sortGroupsA
613616
// need2utf8 now happens inside range_str on the uniques
614617
marks = hash_create(4096); // relatively small to allocate, can grow exponentially later
615618
PROTECT(marks->prot); n_protect++;
616-
range_str(STRING_PTR_RO(x), nrow, &min, &max, &na_count, &anynotascii, &anynotutf8, marks);
619+
range_str(STRING_PTR_RO(x), nrow, &min, &max, &na_count, &anynotascii, &anynotutf8, &marks);
617620
break;
618621
default:
619622
STOP(_("Column %d passed to [f]order is type '%s', not yet supported."), col+1, type2char(TYPEOF(x)));

src/hash.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ void hash_set(hashtab *self, SEXP key, R_xlen_t value) {
149149
if (!new_h) internal_error( // # nocov
150150
__func__, "hash table full at n_full=%zu and failed to rehash", h->size
151151
);
152-
// overwrite the existing table, keeping the external pointer
152+
// overwrite the existing table, keeping the EXTPTR -> (next ->)* h chain intact
153153
free(h->table);
154154
*h = *new_h;
155155
free(new_h);

0 commit comments

Comments
 (0)