Skip to content

Commit 8f505d5

Browse files
Reset dec='\0' to detect numbers while dec is undecided (#6445)
1 parent 4d42cdc commit 8f505d5

File tree

4 files changed

+33
-11
lines changed

4 files changed

+33
-11
lines changed

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ rowwiseDT(
6767

6868
10. Fixed possible segfault in `setDT(df); attr(df, key) <- value; set(df, ...)`, i.e. adding columns to an object with `set()` that was converted to data.table with `setDT()` and later had attributes add with `attr<-`, [#6410](https://github.com/Rdatatable/data.table/issues/6410). Thanks to @hongyuanjia for the report and @ben-schwen for the PR. Note that `setattr()` should be preferred for adding attributes to a data.table.
6969

70+
11. `fread()` automatically detects timestamps with sub-second accuracy again, [#6440](https://github.com/Rdatatable/data.table/issues/6440). This was a regression due to interference with new `dec='auto'` support. Thanks @kav2k for the concise report and @MichaelChirico for the fix.
71+
7072
## NOTES
7173

7274
1. Tests run again when some Suggests packages are missing, [#6411](https://github.com/Rdatatable/data.table/issues/6411). Thanks @aadler for the note and @MichaelChirico for the fix.

inst/tests/tests.Rraw

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18549,21 +18549,38 @@ test(2255, as.data.table(DF), output="DF1.V1.*DF1.V2.*DF2.V3.*DF2.V4.*V5")
1854918549
DT = data.table(a = letters, b = 1:26/6, c = 1:26)
1855018550
## auto-detect dec=','
1855118551
fwrite(DT, f <- tempfile(), dec=',', sep=';')
18552-
test(2256.1, fread(f), DT)
18552+
test(2256.01, fread(f), DT)
1855318553

1855418554
fwrite(DT, f, dec=',', sep='|')
18555-
test(2256.2, fread(f), DT)
18555+
test(2256.02, fread(f), DT)
1855618556

1855718557
## auto-detect dec='.'
1855818558
fwrite(DT, f)
18559-
test(2256.3, fread(f), DT)
18559+
test(2256.03, fread(f), DT)
1856018560

1856118561
## verbose output
18562-
test(2256.4, fread(f, verbose=TRUE), DT, output="sep=',' so dec set to '.'")
18562+
test(2256.04, fread(f, verbose=TRUE), DT, output="sep=',' so dec set to '.'")
1856318563

1856418564
fwrite(DT, f, dec=',', sep=';')
18565-
test(2256.5, fread(f, verbose=TRUE), DT, output="dec=',' detected based on a balance of 18")
18566-
test(2256.6, fread('a;b\n1,14;5', verbose=TRUE), data.table(a=1.14, b=5L), output="dec=',' detected based on a balance of 1 ")
18565+
test(2256.05, fread(f, verbose=TRUE), DT, output="dec=',' detected based on a balance of 18")
18566+
test(2256.06, fread('a;b\n1,14;5', verbose=TRUE), data.table(a=1.14, b=5L), output="dec=',' detected based on a balance of 1 ")
18567+
18568+
## timestamps with subsecond accuracy thrown off by auto-dec #6440
18569+
test(2256.07, fread(text="t\n2023-10-12T06:53:53.123Z"), data.table(t=as.POSIXct('2023-10-12 06:53:53.123', tz='UTC')))
18570+
### TODO(#6447): sep="\t" shouldn't be needed here, right?
18571+
test(2256.08, fread(text="t\n2023-10-12T06:53:53,123Z", sep="\t"), data.table(t=as.POSIXct('2023-10-12 06:53:53.123', tz='UTC')))
18572+
test(2256.09, fread(text="x,t\n1.0,2023-10-12T06:53:53.123Z"), data.table(x=1.0, t=as.POSIXct('2023-10-12 06:53:53.123', tz='UTC')))
18573+
test(2256.10, fread(text="t,x\n2023-10-12T06:53:53.123Z,1.0"), data.table(t=as.POSIXct('2023-10-12 06:53:53.123', tz='UTC'), x=1.0))
18574+
### from PR comment
18575+
s = 'CoilID;AntennaID;Time;TagID;Pen;Side;Position;Location;Coil_Y;Coil_X
18576+
1;16403160;2023-10-12T10:30:55.270Z;DA2C6411;1;AKB;Litter central;middle;1;6
18577+
3;16403160;2023-10-12T10:30:55.270Z;DA459D86;1;AKB;Litter central;middle;1;4
18578+
15;16402963;2023-10-12T10:31:00.900Z;DA459D86;1;AKB;Litter central;right;2;3
18579+
14;16402963;2023-10-12T10:31:02.240Z;DA2C6411;1;AKB;Litter central;right;2;1
18580+
11;16403160;2023-10-12T10:31:02.650Z;DA2C6411;1;AKB;Litter central;middle;2;6'
18581+
test(2256.11,
18582+
unname(sapply(fread(s, sep=';'), function(x) class(x)[1L])),
18583+
c("integer", "integer", "POSIXct", "character", "integer", "character", "character", "character", "integer", "integer"))
1856718584

1856818585
# helpful error about deleting during grouping, #1873
1856918586
DT = data.table(id = c(1, 1, 2, 2), a = 1:4, b = 5:8)

src/fread.c

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1072,7 +1072,6 @@ static void parse_iso8601_timestamp(FieldParseContext *ctx)
10721072

10731073
date_only:
10741074

1075-
//Rprintf("date=%d\thour=%d\tz_hour=%d\tminute=%d\ttz_minute=%d\tsecond=%.1f\n", date, hour, tz_hour, minute, tz_minute, second);
10761075
// cast upfront needed to prevent silent overflow
10771076
*target = 86400*(double)date + 3600*(hour - tz_hour) + 60*(minute - tz_minute) + second;
10781077

@@ -1233,9 +1232,13 @@ static int detect_types( const char **pch, int8_t type[], int ncol, bool *bumped
12331232
}
12341233
}
12351234
ch = fieldStart;
1236-
if (autoDec && IS_DEC_TYPE(tmpType[field]) && dec == '.') { // . didn't parse a double; try ,
1237-
dec = ',';
1238-
continue;
1235+
if (autoDec && IS_DEC_TYPE(tmpType[field])) {
1236+
if (dec == '.') { // '.' didn't parse a double; try ','
1237+
dec = ',';
1238+
continue; // i.e., restart since tmpType not incremented
1239+
} else if (dec == ',') { // ',' didn't parse a double either; reset
1240+
dec = '\0';
1241+
}
12391242
}
12401243
while (++tmpType[field]<CT_STRING && disabled_parsers[tmpType[field]]) {};
12411244
*bumped = true;

src/fread.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ typedef enum {
3636
NUMTYPE // placeholder for the number of types including drop; used for allocation and loop bounds
3737
} colType;
3838

39-
#define IS_DEC_TYPE(x) ((x) == CT_FLOAT64 || (x) == CT_FLOAT64_EXT) // types where dec matters
39+
#define IS_DEC_TYPE(x) ((x) == CT_FLOAT64 || (x) == CT_FLOAT64_EXT || (x) == CT_ISO8601_TIME) // types where dec matters
4040

4141
extern int8_t typeSize[NUMTYPE];
4242
extern const char typeName[NUMTYPE][10];

0 commit comments

Comments
 (0)