Skip to content

Commit 05d6797

Browse files
badasahogaitap
andauthored
Respect integer64= selection when out-of-sample type bumps happen in fread() (#7032)
* fixed #2749 * updated news * added tests * changed readInt64As to an argument instead of extern object * Style fix * Revert internal variable to static --------- Co-authored-by: Ivan K <[email protected]>
1 parent 8fa0ffb commit 05d6797

File tree

5 files changed

+24
-2
lines changed

5 files changed

+24
-2
lines changed

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@
4040

4141
10. On a heavily loaded machine, a `forder` thread could try to perform a zero-length copy from a null pointer, which was de-facto harmless but is against the C standard and was caught by additional CRAN checks, [#7051](https://github.com/Rdatatable/data.table/issues/7051). Thanks to @helske for the report and @aitap for the PR.
4242

43+
11. Out of sample type bumps now respect `integer64=` selection, [#7032](https://github.com/Rdatatable/data.table/pull/7032).
44+
4345
### NOTES
4446

4547
1. Continued work to remove non-API C functions, [#6180](https://github.com/Rdatatable/data.table/issues/6180). Thanks Ivan Krylov for the PRs and for writing a clear and concise guide about the R API: https://aitap.codeberg.page/R-api/.

inst/tests/tests.Rraw

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3070,6 +3070,16 @@ if (test_bit64) test(1017.1, fread(f), copy(DT)[,A:=as.integer64(A)])
30703070
test(1017.2, fread(f, integer64="character"), DT)
30713071
unlink(f)
30723072

3073+
DT = data.table(a=seq(10000), b="100")
3074+
DT[111, b := "1000000000000"]
3075+
f = tempfile()
3076+
fwrite(DT, f)
3077+
3078+
test(1017.3, fread(f, integer64="numeric"), fread(f, colClasses=c("integer", "numeric")))
3079+
test(1017.4, fread(f, integer64="character"), fread(f, colClasses=c("integer", "character")))
3080+
3081+
unlink(f)
3082+
30733083
# ERANGE errno handled, #106 #4165
30743084
test(1018.1, identical(fread("1.46761e-313\n"), data.table(V1=1.46761e-313)))
30753085
test(1018.2, identical(fread("1.46761e+313\n"), data.table(V1=1.46761e+313)))

src/fread.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2300,7 +2300,7 @@ int freadMain(freadMainArgs _args) {
23002300
nth = imin(nJumps, nth);
23012301

23022302
if (verbose) DTPRINT(_("[11] Read the data\n"));
2303-
while(true){ // we'll return here to reread any columns with out-of-sample type exceptions, or dirty jumps
2303+
for (;;) { // we'll return here to reread any columns with out-of-sample type exceptions, or dirty jumps
23042304
restartTeam = false;
23052305
if (verbose)
23062306
DTPRINT(" jumps=[%d..%d), chunk_size=%zu, total_size=%td\n", jump0, nJumps, chunkBytes, eof-pos); // # notranslate
@@ -2510,7 +2510,12 @@ int freadMain(freadMainArgs _args) {
25102510
// check that the new type is sufficient for the rest of the column (and any other columns also in out-of-sample bump status) to be
25112511
// sure a single re-read will definitely work.
25122512
while (++absType<CT_STRING && disabled_parsers[absType]) {};
2513-
thisType = TOGGLE_BUMP(absType);
2513+
2514+
if(args.readInt64As != CT_INT64 && absType == CT_INT64)
2515+
thisType = TOGGLE_BUMP(args.readInt64As);
2516+
else
2517+
thisType = TOGGLE_BUMP(absType);
2518+
25142519
tch = fieldStart;
25152520
}
25162521

src/fread.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,9 @@ typedef struct freadMainArgs
167167
// should datetime with no Z or UTZ-offset be read as UTC?
168168
bool noTZasUTC;
169169

170+
// Integer64 remap
171+
colType readInt64As;
172+
170173
char _padding[1];
171174

172175
// Any additional implementation-specific parameters.

src/freadR.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,8 @@ SEXP freadR(
177177
readInt64As = CT_FLOAT64;
178178
} else STOP(_("Invalid value integer64='%s'. Must be 'integer64', 'character', 'double' or 'numeric'"), tt);
179179

180+
args.readInt64As = readInt64As;
181+
180182
colClassesSxp = colClassesArg;
181183

182184
selectSxp = selectArg;

0 commit comments

Comments
 (0)