Skip to content

Commit 86b3af6

Browse files
ben-schwenMichaelChirico
authored andcommitted
fread parse dates with leading zeros and keepLeadingZeros=TRUE (#6856)
* tell parse to not skip leading zeros for parsing dates * update wording
1 parent dac08f6 commit 86b3af6

File tree

3 files changed

+17
-10
lines changed

3 files changed

+17
-10
lines changed

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66

77
1. `fwrite(compress="gzip")` once again produces a gzip header when the column names are missing or disabled, [@6852](https://github.com/Rdatatable/data.table/issues/6852). Thanks @maxscheiber for the report and @aitap for the fix.
88

9+
2. `fread(keepLeadingZeros=TRUE)` now correctly parses dates with leading zeros as dates instead of strings, [#6851](https://github.com/Rdatatable/data.table/issues/6851). Thanks @TurnaevEvgeny for the report and @ben-schwen for the fix.
10+
911
# data.table [v1.17.0](https://github.com/Rdatatable/data.table/milestone/34) (20 Feb 2025)
1012

1113
## POTENTIALLY BREAKING CHANGES

inst/tests/tests.Rraw

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21044,3 +21044,8 @@ test(2304.100, set(copy(DT), i=2L, j=c("L1", "L2"), value=list(list(NULL), list(
2104421044

2104521045
# the integer overflow in #6729 is only noticeable with UBSan
2104621046
test(2305, { fread(testDir("issue_6729.txt.bz2")); TRUE })
21047+
21048+
# fread with colClasses and keepLeadingZeros=TRUE #6851
21049+
dt = data.table(date=as.IDate(c(NA, "2014-12-05")))
21050+
test(2306.01, fread("date\nNA\n2014-12-05", keepLeadingZeros=TRUE), dt)
21051+
test(2306.02, fread("date\nNA\n2014-12-05", keepLeadingZeros=FALSE), dt)

src/fread.c

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -580,11 +580,11 @@ static void Field(FieldParseContext *ctx)
580580
}
581581
}
582582

583-
static void str_to_i32_core(const char **pch, int32_t *target)
583+
static void str_to_i32_core(const char **pch, int32_t *target, bool parse_date)
584584
{
585585
const char *ch = *pch;
586586

587-
if (*ch=='0' && args.keepLeadingZeros && IS_DIGIT(ch[1])) return;
587+
if (*ch=='0' && args.keepLeadingZeros && IS_DIGIT(ch[1]) && !parse_date) return;
588588
bool neg = *ch=='-';
589589
ch += (neg || *ch=='+');
590590
const char *start = ch; // to know if at least one digit is present
@@ -620,7 +620,7 @@ static void str_to_i32_core(const char **pch, int32_t *target)
620620

621621
static void StrtoI32(FieldParseContext *ctx)
622622
{
623-
str_to_i32_core(ctx->ch, (int32_t*) ctx->targets[sizeof(int32_t)]);
623+
str_to_i32_core(ctx->ch, (int32_t*) ctx->targets[sizeof(int32_t)], false);
624624
}
625625

626626

@@ -966,7 +966,7 @@ static void parse_iso8601_date_core(const char **pch, int32_t *target)
966966

967967
int32_t year=0, month=0, day=0;
968968

969-
str_to_i32_core(&ch, &year);
969+
str_to_i32_core(&ch, &year, true);
970970

971971
// .Date(.Machine$integer.max*c(-1, 1)):
972972
// -5877641-06-24 -- 5881580-07-11
@@ -979,12 +979,12 @@ static void parse_iso8601_date_core(const char **pch, int32_t *target)
979979
bool isLeapYear = year % 4 == 0 && (year % 100 != 0 || year/100 % 4 == 0);
980980
ch++;
981981

982-
str_to_i32_core(&ch, &month);
982+
str_to_i32_core(&ch, &month, true);
983983
if (month == NA_INT32 || month < 1 || month > 12 || *ch != '-')
984984
goto fail;
985985
ch++;
986986

987-
str_to_i32_core(&ch, &day);
987+
str_to_i32_core(&ch, &day, true);
988988
if (day == NA_INT32 || day < 1 ||
989989
(day > (isLeapYear ? leapYearDays[month-1] : normYearDays[month-1])))
990990
goto fail;
@@ -1022,12 +1022,12 @@ static void parse_iso8601_timestamp(FieldParseContext *ctx)
10221022
// allows date-only field in a column with UTC-marked datetimes to be parsed as UTC too; test 2150.13
10231023
ch++;
10241024

1025-
str_to_i32_core(&ch, &hour);
1025+
str_to_i32_core(&ch, &hour, true);
10261026
if (hour == NA_INT32 || hour < 0 || hour > 23 || *ch != ':')
10271027
goto fail;
10281028
ch++;
10291029

1030-
str_to_i32_core(&ch, &minute);
1030+
str_to_i32_core(&ch, &minute, true);
10311031
if (minute == NA_INT32 || minute < 0 || minute > 59 || *ch != ':')
10321032
goto fail;
10331033
ch++;
@@ -1044,7 +1044,7 @@ static void parse_iso8601_timestamp(FieldParseContext *ctx)
10441044
if (*ch == '+' || *ch == '-') {
10451045
const char *start = ch; // facilitates distinguishing +04, +0004, +0000, +00:00
10461046
// three recognized formats: [+-]AA:BB, [+-]AABB, and [+-]AA
1047-
str_to_i32_core(&ch, &tz_hour);
1047+
str_to_i32_core(&ch, &tz_hour, true);
10481048
if (tz_hour == NA_INT32)
10491049
goto fail;
10501050
if (ch - start == 5 && tz_hour != 0) { // +AABB
@@ -1057,7 +1057,7 @@ static void parse_iso8601_timestamp(FieldParseContext *ctx)
10571057
goto fail;
10581058
if (*ch == ':') {
10591059
ch++;
1060-
str_to_i32_core(&ch, &tz_minute);
1060+
str_to_i32_core(&ch, &tz_minute, true);
10611061
if (tz_minute == NA_INT32)
10621062
goto fail;
10631063
}

0 commit comments

Comments
 (0)