Skip to content
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@

1. `data.table(x=1, <expr>)`, where `<expr>` is an expression resulting in a 1-column matrix without column names, will eventually have names `x` and `V2`, not `x` and `V1`, consistent with `data.table(x=1, <expr>)` where `<expr>` results in an atomic vector, for example `data.table(x=1, cbind(1))` and `data.table(x=1, 1)` will both have columns named `x` and `V2`. In this release, the matrix case continues to be named `V1`, but the new behavior can be activated by setting `options(datatable.old.matrix.autoname)` to `FALSE`. See point 5 under Bug Fixes for more context; this change will provide more internal consistency as well as more consistency with `data.frame()`.

2. The behavior of `week()` will be changed in a future release to calculate weeks sequentially (days 1-7 as week 1), which is a potential breaking change. For now, the current "legacy" behavior remains the default, and a one-time deprecation warning will be issued. Users can control this behavior with the temporary option `options(datatable.week = "...")`:
* `"sequential"`: Opt-in to the new, sequential behavior (no warning).
* `"legacy"`: Continue using the legacy behavior but suppress the deprecation warning.
See [#2611](https://github.com/Rdatatable/data.table/issues/2611) for details. Thanks @MichaelChirico for the report and @venom12_04 for the PR.

### NEW FEATURES

1. New `sort_by()` method for data.tables, [#6662](https://github.com/Rdatatable/data.table/issues/6662). It uses `forder()` to improve upon the data.frame method and also matches `DT[order(...)]` behavior with respect to locale. Thanks @rikivillalba for the suggestion and PR.
Expand Down
10 changes: 9 additions & 1 deletion inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -18384,7 +18384,7 @@ x = c("1111-11-11", "2019-01-01", "2019-02-28", "2019-03-01", "2019-12-31", "202
test(2236.1, yday(x), c(315L, 1L, 59L, 60L, 365L, 60L, 61L, 366L, 1L, 366L, 60L, NA))
test(2236.2, mday(x), c(11L, 1L, 28L, 1L, 31L, 29L, 1L, 31L, 1L, 31L, 1L, NA))
test(2236.3, wday(x), c(7L, 3L, 5L, 6L, 3L, 7L, 1L, 5L, 1L, 2L, 2L, NA))
test(2236.4, week(x), c(46L, 1L, 9L, 9L, 53L, 9L, 9L, 53L, 1L, 53L, 9L, NA))
test(2236.4, week(x), c(46L, 1L, 9L, 9L, 53L, 9L, 9L, 53L, 1L, 53L, 9L, NA), warning = "The default behavior of data.table::week() is deprecated", options = list(datatable.week = NULL))
test(2236.5, month(x), c(11L, 1L, 2L, 3L, 12L, 2L, 3L, 12L, 1L, 12L, 3L, NA))
test(2236.6, quarter(x), c(4L, 1L, 1L, 1L, 4L, 1L, 1L, 4L, 1L, 4L, 1L, NA))
test(2236.7, year(x), c(1111L, 2019L, 2019L, 2019L, 2019L, 2020L, 2020L, 2020L, 2040L, 2040L, 2100L, NA))
Expand Down Expand Up @@ -21815,3 +21815,11 @@ test(2341.24, fread('a
# leading cmnt
b
', comment.char = '#', strip.white = FALSE, sep = ","), data.table(a=c(" ", "b")))

# week() sequential numbering fix tests #2611
test(2342.1, week(as.IDate("1970-01-01") + 0:7), c(1L,1L,1L,1L,1L,1L,1L,2L), options = list(datatable.week = "sequential"))
test(2342.2, week(as.IDate(c("2012-02-28","2012-02-29","2012-03-01"))), c(9L,9L,9L), options = list(datatable.week = "sequential"))
test(2342.3, week(as.IDate(c("2019-12-31","2020-01-01"))), c(53L,1L), options = list(datatable.week = "sequential"))
test(2342.4, week(as.IDate(c("2020-12-31","2021-01-01"))), c(53L,1L), options = list(datatable.week = "sequential"))
test(2342.5, week(as.IDate("2021-01-06") + 0:6), c(1L,1L,2L,2L,2L,2L,2L), options = list(datatable.week = "sequential"))
test(2342.6, week(as.IDate(c("2016-02-27","2016-02-28","2016-02-29","2016-03-01","2016-03-02"))), c(9L,9L,9L,9L,9L), options = list(datatable.week = "sequential"))
43 changes: 40 additions & 3 deletions src/idatetime.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#include "data.table.h"

static int week_deprecation_warning_issued = 0;

static const int YEARS400 = 146097;
static const int YEARS100 = 36524;
static const int YEARS4 = 1461;
Expand Down Expand Up @@ -58,13 +60,11 @@ void convertSingleDate(int x, datetype type, void *out)

int leap = !years1 && (years4 || !years100);

if (type == YDAY || type == WEEK) {
if (type == YDAY) {
int yday = days + 31 + 28 + leap;
if (yday >= YEARS1 + leap)
yday -= YEARS1 + leap;
*(int *)out = ++yday;
if (type == WEEK)
*(int *)out = (*(int *)out / 7) + 1;
return;
}

Expand Down Expand Up @@ -143,6 +143,43 @@ SEXP convertDate(SEXP x, SEXP type)
else if (!strcmp(ctype_str, "yearqtr")) { ctype = YEARQTR; ansint = false; }
else internal_error(__func__, "invalid type, should have been caught before"); // # nocov

if (ctype == WEEK) {
SEXP ans = PROTECT(allocVector(INTSXP, n));
int *ansp = INTEGER(ans);

SEXP opt = GetOption(install("datatable.week"), R_NilValue);
const char *mode = isString(opt) && length(opt) == 1 ? CHAR(STRING_ELT(opt, 0)) : "default";

bool use_sequential = !strcmp(mode, "sequential");
bool use_legacy = !strcmp(mode, "legacy");
bool can_warn = !use_sequential && !use_legacy && !week_deprecation_warning_issued;

for (int i = 0; i < n; i++) {
if (ix[i] == NA_INTEGER) {
ansp[i] = NA_INTEGER;
continue;
}
int yday;
convertSingleDate(ix[i], YDAY, &yday);
int new_week = ((yday - 1) / 7) + 1;

if (use_sequential) {
ansp[i] = new_week;
} else {
int old_week = (yday / 7) + 1;
ansp[i] = old_week;
if (can_warn && new_week != old_week) {
warning(_("The default behavior of data.table::week() is deprecated. It will be changed to 'sequential' in a future version. To opt-in to the new behavior now, run: options(datatable.week = 'sequential'). To suppress this warning and continue using the legacy behavior, run: options(datatable.week = 'legacy')."));
week_deprecation_warning_issued = 1;
can_warn = false;

}
}
}
UNPROTECT(1);
return ans;
}

if (ansint) {
SEXP ans = PROTECT(allocVector(INTSXP, n));
int *ansp = INTEGER(ans);
Expand Down
Loading