Skip to content
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@

1. `data.table(x=1, <expr>)`, where `<expr>` is an expression resulting in a 1-column matrix without column names, will eventually have names `x` and `V2`, not `x` and `V1`, consistent with `data.table(x=1, <expr>)` where `<expr>` results in an atomic vector, for example `data.table(x=1, cbind(1))` and `data.table(x=1, 1)` will both have columns named `x` and `V2`. In this release, the matrix case continues to be named `V1`, but the new behavior can be activated by setting `options(datatable.old.matrix.autoname)` to `FALSE`. See point 5 under Bug Fixes for more context; this change will provide more internal consistency as well as more consistency with `data.frame()`.

2. The behavior of `week()` will be changed in a future release to calculate weeks sequentially (days 1-7 as week 1), which is a potential breaking change. For now, the current "legacy" behavior remains the default, and a deprecation warning will be issued when the old and new behaviors differ. Users can control this behavior with the temporary option `options(datatable.week = "...")`:
* `"sequential"`: Opt-in to the new, sequential behavior (no warning).
* `"legacy"`: Continue using the legacy behavior but suppress the deprecation warning.
See [#2611](https://github.com/Rdatatable/data.table/issues/2611) for details. Thanks @MichaelChirico for the report and @venom1204 for the PR.

### NEW FEATURES

1. New `sort_by()` method for data.tables, [#6662](https://github.com/Rdatatable/data.table/issues/6662). It uses `forder()` to improve upon the data.frame method and also matches `DT[order(...)]` behavior with respect to locale. Thanks @rikivillalba for the suggestion and PR.
Expand Down
11 changes: 10 additions & 1 deletion inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -18384,7 +18384,7 @@ x = c("1111-11-11", "2019-01-01", "2019-02-28", "2019-03-01", "2019-12-31", "202
test(2236.1, yday(x), c(315L, 1L, 59L, 60L, 365L, 60L, 61L, 366L, 1L, 366L, 60L, NA))
test(2236.2, mday(x), c(11L, 1L, 28L, 1L, 31L, 29L, 1L, 31L, 1L, 31L, 1L, NA))
test(2236.3, wday(x), c(7L, 3L, 5L, 6L, 3L, 7L, 1L, 5L, 1L, 2L, 2L, NA))
test(2236.4, week(x), c(46L, 1L, 9L, 9L, 53L, 9L, 9L, 53L, 1L, 53L, 9L, NA))
test(2236.4, options = c(datatable.week = "legacy"), week(x), c(46L, 1L, 9L, 9L, 53L, 9L, 9L, 53L, 1L, 53L, 9L, NA))
test(2236.5, month(x), c(11L, 1L, 2L, 3L, 12L, 2L, 3L, 12L, 1L, 12L, 3L, NA))
test(2236.6, quarter(x), c(4L, 1L, 1L, 1L, 4L, 1L, 1L, 4L, 1L, 4L, 1L, NA))
test(2236.7, year(x), c(1111L, 2019L, 2019L, 2019L, 2019L, 2020L, 2020L, 2020L, 2040L, 2040L, 2100L, NA))
Expand Down Expand Up @@ -21815,3 +21815,12 @@ test(2341.24, fread('a
# leading cmnt
b
', comment.char = '#', strip.white = FALSE, sep = ","), data.table(a=c(" ", "b")))

# week() sequential numbering fix tests #2611
test(2342.1, options = c(datatable.week = "sequential"), week(as.IDate("1970-01-01") + 0:7), c(1L,1L,1L,1L,1L,1L,1L,2L))
test(2342.2, week(as.IDate(c("2012-02-28","2012-02-29","2012-03-01"))), c(9L,9L,9L), options = list(datatable.week = "sequential"))
test(2342.3, week(as.IDate(c("2019-12-31","2020-01-01"))), c(53L,1L), options = list(datatable.week = "sequential"))
test(2342.4, week(as.IDate(c("2020-12-31","2021-01-01"))), c(53L,1L), options = list(datatable.week = "sequential"))
test(2342.5, week(as.IDate("2021-01-06") + 0:6), c(1L,1L,2L,2L,2L,2L,2L), options = list(datatable.week = "sequential"))
test(2342.6, week(as.IDate(c("2016-02-27","2016-02-28","2016-02-29","2016-03-01","2016-03-02"))), c(9L,9L,9L,9L,9L), options = list(datatable.week = "sequential"))
test(2342.7, week(as.IDate("1970-01-07")), 2L, warning = "The default behavior of week() is changing", options = list(datatable.week = NULL))
39 changes: 36 additions & 3 deletions src/idatetime.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,11 @@ void convertSingleDate(int x, datetype type, void *out)

int leap = !years1 && (years4 || !years100);

if (type == YDAY || type == WEEK) {
if (type == YDAY) {
int yday = days + 31 + 28 + leap;
if (yday >= YEARS1 + leap)
yday -= YEARS1 + leap;
*(int *)out = ++yday;
if (type == WEEK)
*(int *)out = (*(int *)out / 7) + 1;
return;
}

Expand Down Expand Up @@ -143,6 +141,41 @@ SEXP convertDate(SEXP x, SEXP type)
else if (!strcmp(ctype_str, "yearqtr")) { ctype = YEARQTR; ansint = false; }
else internal_error(__func__, "invalid type, should have been caught before"); // # nocov

if (ctype == WEEK) {
SEXP ans = PROTECT(allocVector(INTSXP, n));
int *ansp = INTEGER(ans);

SEXP opt = GetOption(install("datatable.week"), R_NilValue);
const char *mode = isString(opt) && length(opt) == 1 ? CHAR(STRING_ELT(opt, 0)) : "default";

bool use_sequential = !strcmp(mode, "sequential");
bool use_legacy = !strcmp(mode, "legacy");
bool can_warn = !use_sequential && !use_legacy;

for (int i = 0; i < n; i++) {
if (ix[i] == NA_INTEGER) {
ansp[i] = NA_INTEGER;
continue;
}
int yday;
convertSingleDate(ix[i], YDAY, &yday);
int new_week = ((yday - 1) / 7) + 1;

if (use_sequential) {
ansp[i] = new_week;
} else {
int old_week = (yday / 7) + 1;
ansp[i] = old_week;
if (can_warn && new_week != old_week) {
warning(_("The default behavior of week() is changing. Previously ('legacy' mode), week numbers advanced every 7th day of the year. The new 'sequential' mode ensures the first week always has 7 days. For example, as.IDate('2023-01-07') returns week 2 in legacy mode but week 1 in sequential mode (week 2 starts on '2023-01-08'). To adopt the new behavior now, set options(datatable.week = 'sequential'). To keep the old results and silence this warning, set options(datatable.week = 'legacy'). See https://github.com/Rdatatable/data.table/issues/2611"));
can_warn = false;
}
}
}
UNPROTECT(1);
return ans;
}

if (ansint) {
SEXP ans = PROTECT(allocVector(INTSXP, n));
int *ansp = INTEGER(ans);
Expand Down
Loading