Skip to content

Commit dd7609e

Browse files
Fixes rare non-equi join segfault, closes #4388. (#4389)
1 parent b1b1832 commit dd7609e

File tree

4 files changed

+18
-6
lines changed

4 files changed

+18
-6
lines changed

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,8 @@ unit = "s")
107107

108108
12. `rbindlist` no longer errors when coercing complex vectors to character vectors, [#4202](https://github.com/Rdatatable/data.table/issues/4202). Thanks to @sritchie73 for reporting and the PR.
109109

110+
13. A relatively rare case of segfault when combining non-equi joins with `by=.EACHI` is now fixed, closes [#4388](https://github.com/Rdatatable/data.table/issues/4388).
111+
110112
## NOTES
111113

112114
0. Retrospective license change permission was sought from and granted by 4 contributors who were missed in [PR#2456](https://github.com/Rdatatable/data.table/pull/2456), [#4140](https://github.com/Rdatatable/data.table/pull/4140). We had used [GitHub's contributor page](https://github.com/Rdatatable/data.table/graphs/contributors) which omits 3 of these due to invalid email addresses, unlike GitLab's contributor page which includes the ids. The 4th omission was a PR to a script which should not have been excluded; a script is code too. We are sorry these contributors were not properly credited before. They have now been added to the contributors list as displayed on CRAN. All the contributors of code to data.table hold its copyright jointly; your contributions belong to you. You contributed to data.table when it had a particular license at that time, and you contributed on that basis. This is why in the last license change, all contributors of code were consulted and each had a veto.

R/data.table.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -497,7 +497,7 @@ replace_dot_alias = function(e) {
497497
if (nqbyjoin) {
498498
irows = if (length(xo)) xo[irows] else irows
499499
xo = setorder(setDT(list(indices=rep.int(indices__, len__), irows=irows)))[["irows"]]
500-
ans = .Call(CnqRecreateIndices, xo, len__, indices__, max(indices__))
500+
ans = .Call(CnqRecreateIndices, xo, len__, indices__, max(indices__), nomatch) # issue#4388 fix
501501
f__ = ans[[1L]]; len__ = ans[[2L]]
502502
allLen1 = FALSE # TODO; should this always be FALSE?
503503
irows = NULL # important to reset

inst/tests/tests.Rraw

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11937,7 +11937,14 @@ DT2[, DT2_ID := .I][, (cols) := lapply(.SD, as.Date), .SDcols=cols]
1193711937
ans1 = DT2[DT1, on=.(RANDOM_STRING, START_DATE <= DATE, EXPIRY_DATE >= DATE), .N, by=.EACHI ]$N > 0L
1193811938
tmp = DT1[DT2, on=.(RANDOM_STRING, DATE >= START_DATE, DATE <= EXPIRY_DATE), which=TRUE, nomatch=0L]
1193911939
ans2 = DT1[, DT1_ID %in% tmp]
11940-
test(1848, ans1, ans2)
11940+
test(1848.1, ans1, ans2)
11941+
11942+
# Fix for #4388; related to #2275 fix
11943+
x <- data.table(id = "a", t = as.ITime(c(31140L, 31920L, 31860L, 31680L, 31200L, 31380L, 31020L, 31260L, 31320L, 31560L, 31080L, 31800L, 31500L, 31440L, 31740L, 31620L)), s = c(37.19, 37.10, 37.10, 37.10, 37.1, 24.81, 61.99, 37.1, 37.1, 37.38, 49.56, 73.89, 37.38, 24.81, 37.01, 37.38), val = c(40L, 53L, 52L, 49L, 41L, 44L, 38L, 42L, 43L, 47L, 39L, 51L, 46L, 45L, 50L, 48L))
11944+
y <- data.table(id = c("a", "b"), t1 = as.ITime(c(31020L, 42240L)), t2 = as.ITime(c(31920L, 43140L)), s1 = c(0L, 0L),
11945+
s2 = c(200, 200))
11946+
# testing that it doesn't segfault
11947+
test(1848.2, x[y, on=.(id, s >= s1, s <= s2, t >= t1, t <= t2), .(val), by=.EACHI, nomatch=0L, allow.cartesian=TRUE]$val, x$val)
1194111948

1194211949
# when last field is quoted contains sep and select= is used too, #2464
1194311950
test(1849.1, fread('Date,Description,Amount,Balance\n20150725,abcd,"$3,004","$5,006"', select=c("Date", "Description", "Amount")),

src/nqrecreateindices.c

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22

33
// TODO: Add oxygen style comments and cleanup var names.
44
// See other TODOs inside the function.
5-
SEXP nqRecreateIndices(SEXP xo, SEXP len, SEXP indices, SEXP nArg) {
5+
SEXP nqRecreateIndices(SEXP xo, SEXP len, SEXP indices, SEXP nArg, SEXP nomatch) {
66

7-
R_len_t n=INTEGER(nArg)[0];
7+
R_len_t n=INTEGER(nArg)[0], xn=length(xo);
88
SEXP ans, newstarts, newlen;
99
ans = PROTECT(allocVector(VECSXP, 2));
1010
SET_VECTOR_ELT(ans, 0, (newstarts = allocVector(INTSXP, n)));
@@ -14,6 +14,7 @@ SEXP nqRecreateIndices(SEXP xo, SEXP len, SEXP indices, SEXP nArg) {
1414
const int *iindices = INTEGER(indices);
1515
const int *ilen = INTEGER(len);
1616
const int *ixo = INTEGER(xo);
17+
const int *inomatch = INTEGER(nomatch);
1718
int *inewstarts = INTEGER(newstarts);
1819

1920
for (int i=0; i<n; ++i) inewlen[i] = 0;
@@ -25,8 +26,10 @@ SEXP nqRecreateIndices(SEXP xo, SEXP len, SEXP indices, SEXP nArg) {
2526
// TODO: revisit to see if this be simplified further when I've some time.
2627
R_len_t j=0, tmp=0;
2728
for (int i=0; i<n; ++i) {
28-
if (ixo[j] <= 0) { // NA_integer_ = INT_MIN is checked in init.c
29-
inewstarts[i] = ixo[j];
29+
if (ixo[j] <= 0 || j >= xn) {
30+
// NA_integer_ = INT_MIN is checked in init.c
31+
// j >= xn needed for special nomatch=0L case, see issue#4388 (due to xo[irows] from R removing '0' value in xo)
32+
inewstarts[i] = inomatch[0];
3033
j++; // newlen will be 1 for xo=NA and 0 for xo=0 .. but we need to increment by 1 for both
3134
} else {
3235
inewstarts[i] = tmp+1;

0 commit comments

Comments
 (0)