Skip to content

Commit 98696e9

Browse files
Allow double-integer64 joins when double is in (integer32 , integer64] range
1 parent 546259d commit 98696e9

File tree

6 files changed

+37
-8
lines changed

6 files changed

+37
-8
lines changed

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,8 @@ rowwiseDT(
107107
108108
11. `tables()` now returns the correct size for data.tables over 2GiB, [#6607](https://github.com/Rdatatable/data.table/issues/6607). Thanks to @vlulla for the report and the PR.
109109
110+
12. Joins of `integer64` and `double` columns succeed when the `double` column has lossless `integer64` representation, [#6625](https://github.com/Rdatatable/data.table/issues/6625). Previously, this only worked when the double column had lossless _32-bit_ integer representation. Thanks @MichaelChirico for the report and fix.
111+
110112
## NOTES
111113
112114
1. Tests run again when some Suggests packages are missing, [#6411](https://github.com/Rdatatable/data.table/issues/6411). Thanks @aadler for the note and @MichaelChirico for the fix.

R/bmerge.R

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,10 +92,10 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
9292
if (xclass=="integer64" || iclass=="integer64") {
9393
nm = c(iname, xname)
9494
if (xclass=="integer64") { w=i; wc=ic; wclass=iclass; } else { w=x; wc=xc; wclass=xclass; nm=rev(nm) } # w is which to coerce
95-
if (wclass=="integer" || (wclass=="double" && !isReallyReal(w[[wc]]))) {
96-
if (verbose) catf("Coercing %s column %s%s to type integer64 to match type of %s.\n", wclass, nm[1L], if (wclass=="double") " (which contains no fractions)" else "", nm[2L])
95+
if (wclass=="integer" || (wclass=="double" && !isReallyReal(w[[wc]], i64=TRUE))) {
96+
if (verbose) catf("Coercing %s column %s%s to type integer64 to match type of %s.\n", wclass, nm[1L], if (wclass=="double") " (which has integer64 representation, e.g. no fractions)" else "", nm[2L])
9797
set(w, j=wc, value=bit64::as.integer64(w[[wc]]))
98-
} else stopf("Incompatible join types: %s is type integer64 but %s is type double and contains fractions", nm[2L], nm[1L])
98+
} else stopf("Incompatible join types: %s is type integer64 but %s is type double and cannot be coerced to integer64 (e.g. has fractions)", nm[2L], nm[1L])
9999
} else {
100100
# just integer and double left
101101
if (iclass=="double") {

R/wrappers.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,6 @@ colnamesInt = function(x, cols, check_dups=FALSE, skip_absent=FALSE) .Call(Ccoln
1818
testMsg = function(status=0L, nx=2L, nk=2L) .Call(CtestMsgR, as.integer(status)[1L], as.integer(nx)[1L], as.integer(nk)[1L])
1919

2020
isRealReallyInt = function(x) .Call(CisRealReallyIntR, x)
21-
isReallyReal = function(x) .Call(CisReallyReal, x)
21+
isReallyReal = function(x, i64=FALSE) .Call(CisReallyReal, x, i64)
2222

2323
coerceAs = function(x, as, copy=TRUE) .Call(CcoerceAs, x, as, copy)

inst/tests/tests.Rraw

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20596,3 +20596,15 @@ test(2295.3, is.data.table(d2))
2059620596

2059720597
# #6588: .checkTypos used to give arbitrary strings to stopf as the first argument
2059820598
test(2296, d2[x %no such operator% 1], error = '%no such operator%')
20599+
20600+
if (test_bit64) {
20601+
# Join to integer64 doesn't require integer32 representation, just integer64, #6625
20602+
i64_val = .Machine$integer.max + 1
20603+
DT1 = data.table(id = as.integer64(i64_val))
20604+
DT2 = data.table(id = i64_val)
20605+
test(2297.1, DT1[DT2, on='id', verbose=TRUE], DT2, output="has integer64 representation")
20606+
test(2297.2, DT2[DT1, on='id', verbose=TRUE], DT1, output="has integer64 representation")
20607+
DT2[, id := id+.01]
20608+
test(2297.3, DT1[DT2, on='id'], error="Incompatible join types")
20609+
test(2297.4, DT2[DT1, on='id'], error="Incompatible join types")
20610+
}

src/data.table.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ bool within_int32_repres(double x);
245245
bool within_int64_repres(double x);
246246
bool isRealReallyInt(SEXP x);
247247
SEXP isRealReallyIntR(SEXP x);
248-
SEXP isReallyReal(SEXP x);
248+
SEXP isReallyReal(SEXP x, SEXP i64);
249249
bool allNA(SEXP x, bool errorForBadType);
250250
SEXP colnamesInt(SEXP x, SEXP cols, SEXP check_dups, SEXP skip_absent);
251251
bool INHERITS(SEXP x, SEXP char_);

src/utils.c

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,17 @@ static R_xlen_t firstNonInt(SEXP x) {
2222
return i==n ? 0 : i+1;
2323
}
2424

25+
static R_xlen_t firstNonInt64(SEXP x) {
26+
R_xlen_t n=xlength(x), i=0;
27+
const double *dx = REAL(x);
28+
while (i<n &&
29+
( ISNA(dx[i]) ||
30+
(within_int64_repres(dx[i]) && dx[i]==(int64_t)(dx[i])))) {
31+
i++;
32+
}
33+
return i==n ? 0 : i+1;
34+
}
35+
2536
bool isRealReallyInt(SEXP x) {
2637
return isReal(x) ? firstNonInt(x)==0 : false;
2738
// used to error if not passed type double but this needed extra is.double() calls in calling R code
@@ -32,9 +43,13 @@ SEXP isRealReallyIntR(SEXP x) {
3243
return ScalarLogical(isRealReallyInt(x));
3344
}
3445

35-
SEXP isReallyReal(SEXP x) {
36-
return ScalarInteger(isReal(x) ? firstNonInt(x) : 0);
37-
// return the 1-based location of first element which is really real (i.e. not an integer) otherwise 0 (false)
46+
// return the 1-based location of first element which is really real (i.e. not an integer) otherwise 0 (false)
47+
SEXP isReallyReal(SEXP x, SEXP i64) {
48+
if (LOGICAL(i64)[0]) {
49+
return ScalarInteger(isReal(x) ? firstNonInt64(x) : 0);
50+
} else {
51+
return ScalarInteger(isReal(x) ? firstNonInt(x) : 0);
52+
}
3853
}
3954

4055
bool allNA(SEXP x, bool errorForBadType) {

0 commit comments

Comments
 (0)