Merge branch 'master' into dot-dot-1

MichaelChirico · MichaelChirico · commit 30b307961499 · 2025-07-10T17:59:43.000Z
diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R
@@ -277,5 +277,14 @@ test.list <- atime::atime_test_list(
     Slow = "73d79edf8ff8c55163e90631072192301056e336",   # Parent of the first commit in the PR (https://github.com/Rdatatable/data.table/commit/8397dc3c993b61a07a81c786ca68c22bc589befc)
     Fast = "8397dc3c993b61a07a81c786ca68c22bc589befc"),  # Commit in the PR (https://github.com/Rdatatable/data.table/pull/7019/commits) that removes inefficiency
 
+  "isoweek improved in #7144" = atime::atime_test(
+    setup = {
+      set.seed(349)
+      x = sample(Sys.Date() - 0:5000, N, replace=TRUE)
+    },
+    expr = data.table::isoweek(x),
+    Slow = "548410d23dd74b625e8ea9aeb1a5d2e9dddd2927",   # Parent of the first commit in the PR (https://github.com/Rdatatable/data.table/commit/548410d23dd74b625e8ea9aeb1a5d2e9dddd2927)
+    Fast = "c0b32a60466bed0e63420ec105bc75c34590865e"),  # Commit in the PR (https://github.com/Rdatatable/data.table/pull/7144/commits) that uses a much faster implementation
+
     tests=extra.test.list)
 # nolint end: undesirable_operator_linter.
diff --git a/.ci/linters/md/news_github_link_mismatch_linter.R b/.ci/linters/md/news_github_link_mismatch_linter.R
@@ -1,32 +1,6 @@
-# ensure that numbered list in each section is in sequence
-check_section_numbering = function(news) {
-  if (!grepl("NEWS", news)) return(invisible())
-  news = readLines(news)
-  # plain '#' catches some examples; 'd' for 'data.table'
-  sections = grep("^#+ [A-Zd]", news)
-  entries = grep("^[0-9]+[.]", news)
-  entry_value = as.integer(gsub("^([0-9]+)[.].*", "\\1", news[entries]))
-  section_id = findInterval(entries, sections)
-
-  any_mismatch = FALSE
-  for (id in unique(section_id)) {
-    section_entries = entry_value[section_id == id]
-    intended_value = seq_along(section_entries)
-    matched = section_entries == intended_value
-    if (all(matched)) next
-    any_mismatch = TRUE
-    section_header = news[sections[id]]
-    cat(sprintf(
-      "In section '%s' (line %d), bad numbering:\n%s\n",
-      section_header, sections[id],
-      paste0("  [", section_entries[!matched], " --> ", intended_value[!matched], "]", collapse="\n")
-    ))
-  }
-  stopifnot("Please fix the NEWS issues above" = !any_mismatch)
-}
-
 # ensure that GitHub link text & URL actually agree
-check_gh_links = function(news) {
+news_github_link_mismatch_linter = function(news) {
+  if (!grepl("NEWS", news)) return(invisible())
   news = readLines(news)
   gh_links_info = gregexpr(
     "\\[#(?<md_number>[0-9]+)\\]\\(https://github.com/Rdatatable/data.table/(?<link_type>[^/]+)/(?<link_number>[0-9]+)\\)",
diff --git a/.ci/linters/md/news_section_numbering_linter.R b/.ci/linters/md/news_section_numbering_linter.R
@@ -0,0 +1,26 @@
+# ensure that numbered list in each section is in sequence
+news_section_numbering_linter = function(news) {
+  if (!grepl("NEWS", news)) return(invisible())
+  news = readLines(news)
+  # plain '#' catches some examples; 'd' for 'data.table'
+  sections = grep("^#+ [A-Zd]", news)
+  entries = grep("^[0-9]+[.]", news)
+  entry_value = as.integer(gsub("^([0-9]+)[.].*", "\\1", news[entries]))
+  section_id = findInterval(entries, sections)
+
+  any_mismatch = FALSE
+  for (id in unique(section_id)) {
+    section_entries = entry_value[section_id == id]
+    intended_value = seq_along(section_entries)
+    matched = section_entries == intended_value
+    if (all(matched)) next
+    any_mismatch = TRUE
+    section_header = news[sections[id]]
+    cat(sprintf(
+      "In section '%s' (line %d), bad numbering:\n%s\n",
+      section_header, sections[id],
+      paste0("  [", section_entries[!matched], " --> ", intended_value[!matched], "]", collapse="\n")
+    ))
+  }
+  stopifnot("Please fix the NEWS issues above" = !any_mismatch)
+}
diff --git a/.ci/linters/md/vignette_heading_id_linter.R b/.ci/linters/md/vignette_heading_id_linter.R
@@ -1,6 +1,6 @@
 # ensure that ids are limited to alphanumerics and dashes
 # (in particular, dots and underscores break the links)
-check_header_ids = function(md) {
+vignette_heading_id_linter = function(md) {
   if (!grepl('[.]Rmd$', md)) return(invisible())
   md = readLines(md)
   # A bit surprisingly, some headings don't start with a letter.
diff --git a/GOVERNANCE.md b/GOVERNANCE.md
@@ -63,7 +63,7 @@ Functionality that is out of current scope:
 ## Committer
 
 * Definition: permission to commit to, and merge PRs into, master branch. 
-* How to obtain this role: after a Reviewer has a consistent history of careful reviews of others' PRs, then a current Committer should ask all other current Committers if they approve promoting the Reviewer to Committer, and it should be done if there is Consensus among active Committers. 
+* How to obtain this role: after a Reviewer has a consistent history of careful reviews of others' substantial PRs, then a current Committer should ask all other current Committers if they approve promoting the Reviewer to Committer, and it should be done if there is Consensus among active Committers. 
 * How this role is recognized: credited via role="aut" in DESCRIPTION (so they appear in Author list on CRAN), and added to https://github.com/orgs/Rdatatable/teams/committers which gives permission to merge PRs into master branch. 
 
 ## CRAN Maintainer
@@ -139,6 +139,8 @@ data.table Version line in DESCRIPTION typically has the following meanings
 
 # Governance history
 
+July 2025: require potential new committers' considered history to be of "substantial" PRs
+
 May 2025: update Finance and CoC language for NumFOCUS incorporation.
 
 Feb 2025: add Finances and Funding section, update Code of Conduct section to be a brief summary and reference the broader CoC document.
diff --git a/NEWS.md b/NEWS.md
@@ -40,7 +40,11 @@
     # 2:     2     6     4     5
     ```
 
-8. `groupingsets()` gets a new argument `enclos` for use together with the `jj` argument in functions wrapping `groupingsets()`, including the existing wrappers `rollup()` and `cube()`. When forwarding a `j`-expression as `groupingsets(jj = substitute(j))`, make sure to pass `enclos = parent.frame()` as well, so that the `j`-expression will be evaluated in the right context. This makes it possible for `j` to refer to variables outside the `data.table`.
+8. `groupingsets()` gets a new argument `enclos` for use together with the `jj` argument in functions wrapping `groupingsets()`, including the existing wrappers `rollup()` and `cube()`, [#5560](https://github.com/Rdatatable/data.table/issues/5560). When forwarding a `j`-expression as `groupingsets(jj = substitute(j))`, make sure to pass `enclos = parent.frame()` as well, so that the `j`-expression will be evaluated in the right context. This makes it possible for `j` to refer to variables outside the `data.table`. Thanks @sindribaldur for the report and @aitap for the fix.
+
+9. `isoweek()` is much faster (e.g. 20x) by re-using an implementation from {base}, [#5111](https://github.com/Rdatatable/data.table/issues/5111). Thanks @MichaelChirico for the report and PR.
+
+10. `data.table()` and `as.data.table()` with `keep.rownames=TRUE` now extract row names from named vectors, matching `data.frame()` behavior. Names from the first named vector in the input are used to create the row names column (default name `"rn"` or custom name via `keep.rownames="column_name"`), [#1916](https://github.com/Rdatatable/data.table/issues/1916). Thanks to @richierocks for the feature request and @Mukulyadav2004 for the implementation.
 
 ### BUG FIXES
 
@@ -70,7 +74,7 @@
 
 13. In rare cases, `data.table` failed to expand ALTREP columns when assigning a full column by reference. This could result in the target column getting modified unintentionally if the next call to the data.table was a modification by reference of the source column. E.g. in `DT[, b := as.character(a)]` the string conversion gets deferred and subsequent modification of column `a` would also modify column `b`, [#5400](https://github.com/Rdatatable/data.table/issues/5400). Thanks to @aquasync for the report and Václav Tlapák for the PR.
 
-14. `data.table()` function is now more aligned with `data.frame()` with respect to the names of the output when one of its inputs is a single-column matrix object, [#4124](https://github.com/Rdatatable/data.table/issues/4124). Thanks @PavoDive for the report and @jangorecki for the PR.
+14. `data.table()` function is now more aligned with `data.frame()` with respect to the names of the output when one of its inputs is a single-column matrix object, [#4124](https://github.com/Rdatatable/data.table/issues/4124). Thanks @PavoDive for the report, @jangorecki for the PR, and @MichaelChirico for a follow-up for back-compatibility.
 
 15. Including an `ITime` object as a named input to `data.frame()` respects the provided name, i.e. `data.frame(a = as.ITime(...))` will have column `a`, [#4673](https://github.com/Rdatatable/data.table/issues/4673). Thanks @shrektan for the report and @MichaelChirico for the fix.
 
@@ -84,7 +88,9 @@
 
 20. `droplevels()` works on 0-row data.tables, [#7043](https://github.com/Rdatatable/data.table/issues/7043). The result will have factor columns `factor(character())`, consistent with the data.frame method. Thanks @advieser for the report and @MichaelChirico for the fix.
 
-21. Ellipsis elements like `..1` are correctly excluded when searching for variables in "up-a-level" syntax inside `[`, [#5460](https://github.com/Rdatatable/data.table/issues/5460). Thanks @ggrothendieck for the report and @MichaelChirico for the fix.
+21. `print(..., col.names = 'none')` now correctly adapts column widths to the data content, ignoring the original column names and producing a more compact output, [#6882](https://github.com/Rdatatable/data.table/issues/6882). Thanks to @brooksambrose for the report and @venom1204 for the PR.
+
+22. Ellipsis elements like `..1` are correctly excluded when searching for variables in "up-a-level" syntax inside `[`, [#5460](https://github.com/Rdatatable/data.table/issues/5460). Thanks @ggrothendieck for the report and @MichaelChirico for the fix.
 
 ### NOTES
 
diff --git a/R/IDateTime.R b/R/IDateTime.R
@@ -342,19 +342,20 @@ yday    = function(x) convertDate(as.IDate(x), "yday")
 wday    = function(x) convertDate(as.IDate(x), "wday")
 mday    = function(x) convertDate(as.IDate(x), "mday")
 week    = function(x) convertDate(as.IDate(x), "week")
-isoweek = function(x) {
+# TODO(#3279): Investigate if improved as.IDate() makes our below implementation faster than this
+isoweek = function(x) as.integer(format(as.IDate(x), "%V"))
   # ISO 8601-conformant week, as described at
   #   https://en.wikipedia.org/wiki/ISO_week_date
   # Approach:
   # * Find nearest Thursday to each element of x
   # * Find the number of weeks having passed between
   #   January 1st of the year of the nearest Thursdays and x
 
-  x = as.IDate(x)   # number of days since 1 Jan 1970 (a Thurs)
-  nearest_thurs = as.IDate(7L * (as.integer(x + 3L) %/% 7L))
-  year_start = as.IDate(format(nearest_thurs, '%Y-01-01'))
-  1L + (nearest_thurs - year_start) %/% 7L
-}
+#  x = as.IDate(x)   # number of days since 1 Jan 1970 (a Thurs)
+#  nearest_thurs = as.IDate(7L * (as.integer(x + 3L) %/% 7L))
+#  year_start = as.IDate(format(nearest_thurs, '%Y-01-01'))
+#  1L + (nearest_thurs - year_start) %/% 7L
+
 
 month   = function(x) convertDate(as.IDate(x), "month")
 quarter = function(x) convertDate(as.IDate(x), "quarter")
diff --git a/R/as.data.table.R b/R/as.data.table.R
@@ -48,6 +48,9 @@ as.data.table.matrix = function(x, keep.rownames=FALSE, key=NULL, ...) {
   if (!identical(keep.rownames, FALSE)) {
     # can specify col name to keep.rownames, #575
     ans = data.table(rn=rownames(x), x, keep.rownames=FALSE)
+    # auto-inferred name 'x' is not back-compatible & inconsistent, #7145
+    if (ncol(x) == 1L && is.null(colnames(x)))
+      setnames(ans, 'x', 'V1')
     if (is.character(keep.rownames))
       setnames(ans, 'rn', keep.rownames[1L])
     return(ans)
@@ -133,9 +136,26 @@ as.data.table.list = function(x,
   missing.check.names = missing(check.names)
   origListNames = if (missing(.named)) names(x) else NULL  # as.data.table called directly, not from inside data.table() which provides .named, #3854
   empty_atomic = FALSE
+
+  # Handle keep.rownames for vectors (mimicking data.frame behavior)
+  rownames_ = NULL
+  check_rownames = !isFALSE(keep.rownames)
+
   for (i in seq_len(n)) {
     xi = x[[i]]
     if (is.null(xi)) next    # eachncol already initialized to 0 by integer() above
+    if (check_rownames && is.null(rownames_)) {
+      if (is.null(dim(xi))) {
+        if (!is.null(nm <- names(xi))) {
+          rownames_ = nm
+          x[[i]] = unname(xi)
+        }
+      } else {
+        if (!is.null(nm <- rownames(xi))) {
+          rownames_ = nm
+        }
+      }
+    }
     if (!is.null(dim(xi)) && missing.check.names) check.names=TRUE
     if ("POSIXlt" %chin% class(xi)) {
       warningf("POSIXlt column type detected and converted to POSIXct. We do not recommend use of POSIXlt at all because it uses 40 bytes to store one date.")
@@ -200,6 +220,18 @@ as.data.table.list = function(x,
   }
   if (any(vnames==".SD")) stopf("A column may not be called .SD. That has special meaning.")
   if (check.names) vnames = make.names(vnames, unique=TRUE)
+
+  # Add rownames column when vector names were found
+  if (!is.null(rownames_)) {
+    rn_name = if (is.character(keep.rownames)) keep.rownames[1L] else "rn"
+    if (!is.na(idx <- chmatch(rn_name, vnames)[1L])) {
+      ans = c(list(ans[[idx]]), ans[-idx])
+      vnames = c(vnames[idx], vnames[-idx])
+    } else {
+      ans = c(list(recycle(rownames_, nrow)), ans)
+      vnames = c(rn_name, vnames)
+    }
+  }
   setattr(ans, "names", vnames)
   setDT(ans, key=key) # copy ensured above; also, setDT handles naming
   if (length(origListNames)==length(ans)) setattr(ans, "names", origListNames)  # PR 3854 and tests 2058.15-17
diff --git a/R/print.data.table.R b/R/print.data.table.R
@@ -141,6 +141,8 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"),
     print_default(toprint)
     return(invisible(x))
   }
+  if (col.names == "none")
+    colnames(toprint) = rep.int("", ncol(toprint))
   if (nrow(toprint)>20L && col.names == "auto")
     # repeat colnames at the bottom if over 20 rows so you don't have to scroll up to see them
     #   option to shut this off per request of Oleg Bondar on SO, #1482
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
diff --git a/man/as.data.table.Rd b/man/as.data.table.Rd
diff --git a/src/fwrite.c b/src/fwrite.c
diff --git a/src/fwriteR.c b/src/fwriteR.c

Original file line number	Diff line number	Diff line change
`@@ -141,6 +141,8 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"),`
`141`	`141`	`print_default(toprint)`
`142`	`142`	`return(invisible(x))`
`143`	`143`	`}`
	`144`	`+ if (col.names == "none")`
	`145`	`+ colnames(toprint) = rep.int("", ncol(toprint))`
`144`	`146`	`if (nrow(toprint)>20L && col.names == "auto")`
`145`	`147`	`# repeat colnames at the bottom if over 20 rows so you don't have to scroll up to see them`
`146`	`148`	`# option to shut this off per request of Oleg Bondar on SO, #1482`