Skip to content

Commit 11aef2f

Browse files
authored
fread parse dates with leading zeros and keepLeadingZeros=TRUE (#6856)
* tell parse to not skip leading zeros for parsing dates * update wording
1 parent fc43386 commit 11aef2f

File tree

3 files changed

+17
-10
lines changed

3 files changed

+17
-10
lines changed

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212

1313
2. `fwrite(compress="gzip")` once again produces a gzip header when the column names are missing or disabled, [@6852](https://github.com/Rdatatable/data.table/issues/6852). Thanks @maxscheiber for the report and @aitap for the fix.
1414

15+
3. `fread(keepLeadingZeros=TRUE)` now correctly parses dates with leading zeros as dates instead of strings, [#6851](https://github.com/Rdatatable/data.table/issues/6851). Thanks @TurnaevEvgeny for the report and @ben-schwen for the fix.
16+
1517
## NOTES
1618

1719
1. Continued work to remove non-API C functions, [#6180](https://github.com/Rdatatable/data.table/issues/6180). Thanks Ivan Krylov for the PRs and for writing a clear and concise guide about the R API: https://aitap.codeberg.page/R-api/.

inst/tests/tests.Rraw

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21079,3 +21079,8 @@ setindex(DT, b)
2107921079
# make sure that print(DT) doesn't warn due to the header missing index column types, #6806
2108021080
# can't use output= here because the print() call is outside withCallingHandlers(...)
2108121081
test(2307, { capture.output(print(DT, class = TRUE, show.indices = TRUE)); TRUE })
21082+
21083+
# fread with colClasses and keepLeadingZeros=TRUE #6851
21084+
dt = data.table(date=as.IDate(c(NA, "2014-12-05")))
21085+
test(2308.01, fread("date\nNA\n2014-12-05", keepLeadingZeros=TRUE), dt)
21086+
test(2308.02, fread("date\nNA\n2014-12-05", keepLeadingZeros=FALSE), dt)

src/fread.c

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -580,11 +580,11 @@ static void Field(FieldParseContext *ctx)
580580
}
581581
}
582582

583-
static void str_to_i32_core(const char **pch, int32_t *target)
583+
static void str_to_i32_core(const char **pch, int32_t *target, bool parse_date)
584584
{
585585
const char *ch = *pch;
586586

587-
if (*ch=='0' && args.keepLeadingZeros && IS_DIGIT(ch[1])) return;
587+
if (*ch=='0' && args.keepLeadingZeros && IS_DIGIT(ch[1]) && !parse_date) return;
588588
bool neg = *ch=='-';
589589
ch += (neg || *ch=='+');
590590
const char *start = ch; // to know if at least one digit is present
@@ -620,7 +620,7 @@ static void str_to_i32_core(const char **pch, int32_t *target)
620620

621621
static void StrtoI32(FieldParseContext *ctx)
622622
{
623-
str_to_i32_core(ctx->ch, (int32_t*) ctx->targets[sizeof(int32_t)]);
623+
str_to_i32_core(ctx->ch, (int32_t*) ctx->targets[sizeof(int32_t)], false);
624624
}
625625

626626

@@ -966,7 +966,7 @@ static void parse_iso8601_date_core(const char **pch, int32_t *target)
966966

967967
int32_t year=0, month=0, day=0;
968968

969-
str_to_i32_core(&ch, &year);
969+
str_to_i32_core(&ch, &year, true);
970970

971971
// .Date(.Machine$integer.max*c(-1, 1)):
972972
// -5877641-06-24 -- 5881580-07-11
@@ -979,12 +979,12 @@ static void parse_iso8601_date_core(const char **pch, int32_t *target)
979979
bool isLeapYear = year % 4 == 0 && (year % 100 != 0 || year/100 % 4 == 0);
980980
ch++;
981981

982-
str_to_i32_core(&ch, &month);
982+
str_to_i32_core(&ch, &month, true);
983983
if (month == NA_INT32 || month < 1 || month > 12 || *ch != '-')
984984
goto fail;
985985
ch++;
986986

987-
str_to_i32_core(&ch, &day);
987+
str_to_i32_core(&ch, &day, true);
988988
if (day == NA_INT32 || day < 1 ||
989989
(day > (isLeapYear ? leapYearDays[month-1] : normYearDays[month-1])))
990990
goto fail;
@@ -1022,12 +1022,12 @@ static void parse_iso8601_timestamp(FieldParseContext *ctx)
10221022
// allows date-only field in a column with UTC-marked datetimes to be parsed as UTC too; test 2150.13
10231023
ch++;
10241024

1025-
str_to_i32_core(&ch, &hour);
1025+
str_to_i32_core(&ch, &hour, true);
10261026
if (hour == NA_INT32 || hour < 0 || hour > 23 || *ch != ':')
10271027
goto fail;
10281028
ch++;
10291029

1030-
str_to_i32_core(&ch, &minute);
1030+
str_to_i32_core(&ch, &minute, true);
10311031
if (minute == NA_INT32 || minute < 0 || minute > 59 || *ch != ':')
10321032
goto fail;
10331033
ch++;
@@ -1044,7 +1044,7 @@ static void parse_iso8601_timestamp(FieldParseContext *ctx)
10441044
if (*ch == '+' || *ch == '-') {
10451045
const char *start = ch; // facilitates distinguishing +04, +0004, +0000, +00:00
10461046
// three recognized formats: [+-]AA:BB, [+-]AABB, and [+-]AA
1047-
str_to_i32_core(&ch, &tz_hour);
1047+
str_to_i32_core(&ch, &tz_hour, true);
10481048
if (tz_hour == NA_INT32)
10491049
goto fail;
10501050
if (ch - start == 5 && tz_hour != 0) { // +AABB
@@ -1057,7 +1057,7 @@ static void parse_iso8601_timestamp(FieldParseContext *ctx)
10571057
goto fail;
10581058
if (*ch == ':') {
10591059
ch++;
1060-
str_to_i32_core(&ch, &tz_minute);
1060+
str_to_i32_core(&ch, &tz_minute, true);
10611061
if (tz_minute == NA_INT32)
10621062
goto fail;
10631063
}

0 commit comments

Comments
 (0)