Skip to content

Commit 30b3079

Browse files
Merge branch 'master' into dot-dot-1
2 parents 5eaacb7 + bfa049c commit 30b3079

File tree

13 files changed

+386
-272
lines changed

13 files changed

+386
-272
lines changed

.ci/atime/tests.R

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,5 +277,14 @@ test.list <- atime::atime_test_list(
277277
Slow = "73d79edf8ff8c55163e90631072192301056e336", # Parent of the first commit in the PR (https://github.com/Rdatatable/data.table/commit/8397dc3c993b61a07a81c786ca68c22bc589befc)
278278
Fast = "8397dc3c993b61a07a81c786ca68c22bc589befc"), # Commit in the PR (https://github.com/Rdatatable/data.table/pull/7019/commits) that removes inefficiency
279279

280+
"isoweek improved in #7144" = atime::atime_test(
281+
setup = {
282+
set.seed(349)
283+
x = sample(Sys.Date() - 0:5000, N, replace=TRUE)
284+
},
285+
expr = data.table::isoweek(x),
286+
Slow = "548410d23dd74b625e8ea9aeb1a5d2e9dddd2927", # Parent of the first commit in the PR (https://github.com/Rdatatable/data.table/commit/548410d23dd74b625e8ea9aeb1a5d2e9dddd2927)
287+
Fast = "c0b32a60466bed0e63420ec105bc75c34590865e"), # Commit in the PR (https://github.com/Rdatatable/data.table/pull/7144/commits) that uses a much faster implementation
288+
280289
tests=extra.test.list)
281290
# nolint end: undesirable_operator_linter.

.ci/linters/md/news_linter.R renamed to .ci/linters/md/news_github_link_mismatch_linter.R

Lines changed: 2 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,6 @@
1-
# ensure that numbered list in each section is in sequence
2-
check_section_numbering = function(news) {
3-
if (!grepl("NEWS", news)) return(invisible())
4-
news = readLines(news)
5-
# plain '#' catches some examples; 'd' for 'data.table'
6-
sections = grep("^#+ [A-Zd]", news)
7-
entries = grep("^[0-9]+[.]", news)
8-
entry_value = as.integer(gsub("^([0-9]+)[.].*", "\\1", news[entries]))
9-
section_id = findInterval(entries, sections)
10-
11-
any_mismatch = FALSE
12-
for (id in unique(section_id)) {
13-
section_entries = entry_value[section_id == id]
14-
intended_value = seq_along(section_entries)
15-
matched = section_entries == intended_value
16-
if (all(matched)) next
17-
any_mismatch = TRUE
18-
section_header = news[sections[id]]
19-
cat(sprintf(
20-
"In section '%s' (line %d), bad numbering:\n%s\n",
21-
section_header, sections[id],
22-
paste0(" [", section_entries[!matched], " --> ", intended_value[!matched], "]", collapse="\n")
23-
))
24-
}
25-
stopifnot("Please fix the NEWS issues above" = !any_mismatch)
26-
}
27-
281
# ensure that GitHub link text & URL actually agree
29-
check_gh_links = function(news) {
2+
news_github_link_mismatch_linter = function(news) {
3+
if (!grepl("NEWS", news)) return(invisible())
304
news = readLines(news)
315
gh_links_info = gregexpr(
326
"\\[#(?<md_number>[0-9]+)\\]\\(https://github.com/Rdatatable/data.table/(?<link_type>[^/]+)/(?<link_number>[0-9]+)\\)",
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# ensure that numbered list in each section is in sequence
2+
news_section_numbering_linter = function(news) {
3+
if (!grepl("NEWS", news)) return(invisible())
4+
news = readLines(news)
5+
# plain '#' catches some examples; 'd' for 'data.table'
6+
sections = grep("^#+ [A-Zd]", news)
7+
entries = grep("^[0-9]+[.]", news)
8+
entry_value = as.integer(gsub("^([0-9]+)[.].*", "\\1", news[entries]))
9+
section_id = findInterval(entries, sections)
10+
11+
any_mismatch = FALSE
12+
for (id in unique(section_id)) {
13+
section_entries = entry_value[section_id == id]
14+
intended_value = seq_along(section_entries)
15+
matched = section_entries == intended_value
16+
if (all(matched)) next
17+
any_mismatch = TRUE
18+
section_header = news[sections[id]]
19+
cat(sprintf(
20+
"In section '%s' (line %d), bad numbering:\n%s\n",
21+
section_header, sections[id],
22+
paste0(" [", section_entries[!matched], " --> ", intended_value[!matched], "]", collapse="\n")
23+
))
24+
}
25+
stopifnot("Please fix the NEWS issues above" = !any_mismatch)
26+
}

.ci/linters/md/heading_id_linter.R renamed to .ci/linters/md/vignette_heading_id_linter.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# ensure that ids are limited to alphanumerics and dashes
22
# (in particular, dots and underscores break the links)
3-
check_header_ids = function(md) {
3+
vignette_heading_id_linter = function(md) {
44
if (!grepl('[.]Rmd$', md)) return(invisible())
55
md = readLines(md)
66
# A bit surprisingly, some headings don't start with a letter.

GOVERNANCE.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ Functionality that is out of current scope:
6363
## Committer
6464

6565
* Definition: permission to commit to, and merge PRs into, master branch.
66-
* How to obtain this role: after a Reviewer has a consistent history of careful reviews of others' PRs, then a current Committer should ask all other current Committers if they approve promoting the Reviewer to Committer, and it should be done if there is Consensus among active Committers.
66+
* How to obtain this role: after a Reviewer has a consistent history of careful reviews of others' substantial PRs, then a current Committer should ask all other current Committers if they approve promoting the Reviewer to Committer, and it should be done if there is Consensus among active Committers.
6767
* How this role is recognized: credited via role="aut" in DESCRIPTION (so they appear in Author list on CRAN), and added to https://github.com/orgs/Rdatatable/teams/committers which gives permission to merge PRs into master branch.
6868

6969
## CRAN Maintainer
@@ -139,6 +139,8 @@ data.table Version line in DESCRIPTION typically has the following meanings
139139

140140
# Governance history
141141

142+
July 2025: require potential new committers' considered history to be of "substantial" PRs
143+
142144
May 2025: update Finance and CoC language for NumFOCUS incorporation.
143145

144146
Feb 2025: add Finances and Funding section, update Code of Conduct section to be a brief summary and reference the broader CoC document.

NEWS.md

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,11 @@
4040
# 2: 2 6 4 5
4141
```
4242

43-
8. `groupingsets()` gets a new argument `enclos` for use together with the `jj` argument in functions wrapping `groupingsets()`, including the existing wrappers `rollup()` and `cube()`. When forwarding a `j`-expression as `groupingsets(jj = substitute(j))`, make sure to pass `enclos = parent.frame()` as well, so that the `j`-expression will be evaluated in the right context. This makes it possible for `j` to refer to variables outside the `data.table`.
43+
8. `groupingsets()` gets a new argument `enclos` for use together with the `jj` argument in functions wrapping `groupingsets()`, including the existing wrappers `rollup()` and `cube()`, [#5560](https://github.com/Rdatatable/data.table/issues/5560). When forwarding a `j`-expression as `groupingsets(jj = substitute(j))`, make sure to pass `enclos = parent.frame()` as well, so that the `j`-expression will be evaluated in the right context. This makes it possible for `j` to refer to variables outside the `data.table`. Thanks @sindribaldur for the report and @aitap for the fix.
44+
45+
9. `isoweek()` is much faster (e.g. 20x) by re-using an implementation from {base}, [#5111](https://github.com/Rdatatable/data.table/issues/5111). Thanks @MichaelChirico for the report and PR.
46+
47+
10. `data.table()` and `as.data.table()` with `keep.rownames=TRUE` now extract row names from named vectors, matching `data.frame()` behavior. Names from the first named vector in the input are used to create the row names column (default name `"rn"` or custom name via `keep.rownames="column_name"`), [#1916](https://github.com/Rdatatable/data.table/issues/1916). Thanks to @richierocks for the feature request and @Mukulyadav2004 for the implementation.
4448

4549
### BUG FIXES
4650

@@ -70,7 +74,7 @@
7074
7175
13. In rare cases, `data.table` failed to expand ALTREP columns when assigning a full column by reference. This could result in the target column getting modified unintentionally if the next call to the data.table was a modification by reference of the source column. E.g. in `DT[, b := as.character(a)]` the string conversion gets deferred and subsequent modification of column `a` would also modify column `b`, [#5400](https://github.com/Rdatatable/data.table/issues/5400). Thanks to @aquasync for the report and Václav Tlapák for the PR.
7276
73-
14. `data.table()` function is now more aligned with `data.frame()` with respect to the names of the output when one of its inputs is a single-column matrix object, [#4124](https://github.com/Rdatatable/data.table/issues/4124). Thanks @PavoDive for the report and @jangorecki for the PR.
77+
14. `data.table()` function is now more aligned with `data.frame()` with respect to the names of the output when one of its inputs is a single-column matrix object, [#4124](https://github.com/Rdatatable/data.table/issues/4124). Thanks @PavoDive for the report, @jangorecki for the PR, and @MichaelChirico for a follow-up for back-compatibility.
7478
7579
15. Including an `ITime` object as a named input to `data.frame()` respects the provided name, i.e. `data.frame(a = as.ITime(...))` will have column `a`, [#4673](https://github.com/Rdatatable/data.table/issues/4673). Thanks @shrektan for the report and @MichaelChirico for the fix.
7680
@@ -84,7 +88,9 @@
8488
8589
20. `droplevels()` works on 0-row data.tables, [#7043](https://github.com/Rdatatable/data.table/issues/7043). The result will have factor columns `factor(character())`, consistent with the data.frame method. Thanks @advieser for the report and @MichaelChirico for the fix.
8690
87-
21. Ellipsis elements like `..1` are correctly excluded when searching for variables in "up-a-level" syntax inside `[`, [#5460](https://github.com/Rdatatable/data.table/issues/5460). Thanks @ggrothendieck for the report and @MichaelChirico for the fix.
91+
21. `print(..., col.names = 'none')` now correctly adapts column widths to the data content, ignoring the original column names and producing a more compact output, [#6882](https://github.com/Rdatatable/data.table/issues/6882). Thanks to @brooksambrose for the report and @venom1204 for the PR.
92+
93+
22. Ellipsis elements like `..1` are correctly excluded when searching for variables in "up-a-level" syntax inside `[`, [#5460](https://github.com/Rdatatable/data.table/issues/5460). Thanks @ggrothendieck for the report and @MichaelChirico for the fix.
8894
8995
### NOTES
9096

R/IDateTime.R

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -342,19 +342,20 @@ yday = function(x) convertDate(as.IDate(x), "yday")
342342
wday = function(x) convertDate(as.IDate(x), "wday")
343343
mday = function(x) convertDate(as.IDate(x), "mday")
344344
week = function(x) convertDate(as.IDate(x), "week")
345-
isoweek = function(x) {
345+
# TODO(#3279): Investigate if improved as.IDate() makes our below implementation faster than this
346+
isoweek = function(x) as.integer(format(as.IDate(x), "%V"))
346347
# ISO 8601-conformant week, as described at
347348
# https://en.wikipedia.org/wiki/ISO_week_date
348349
# Approach:
349350
# * Find nearest Thursday to each element of x
350351
# * Find the number of weeks having passed between
351352
# January 1st of the year of the nearest Thursdays and x
352353

353-
x = as.IDate(x) # number of days since 1 Jan 1970 (a Thurs)
354-
nearest_thurs = as.IDate(7L * (as.integer(x + 3L) %/% 7L))
355-
year_start = as.IDate(format(nearest_thurs, '%Y-01-01'))
356-
1L + (nearest_thurs - year_start) %/% 7L
357-
}
354+
# x = as.IDate(x) # number of days since 1 Jan 1970 (a Thurs)
355+
# nearest_thurs = as.IDate(7L * (as.integer(x + 3L) %/% 7L))
356+
# year_start = as.IDate(format(nearest_thurs, '%Y-01-01'))
357+
# 1L + (nearest_thurs - year_start) %/% 7L
358+
358359

359360
month = function(x) convertDate(as.IDate(x), "month")
360361
quarter = function(x) convertDate(as.IDate(x), "quarter")

R/as.data.table.R

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ as.data.table.matrix = function(x, keep.rownames=FALSE, key=NULL, ...) {
4848
if (!identical(keep.rownames, FALSE)) {
4949
# can specify col name to keep.rownames, #575
5050
ans = data.table(rn=rownames(x), x, keep.rownames=FALSE)
51+
# auto-inferred name 'x' is not back-compatible & inconsistent, #7145
52+
if (ncol(x) == 1L && is.null(colnames(x)))
53+
setnames(ans, 'x', 'V1')
5154
if (is.character(keep.rownames))
5255
setnames(ans, 'rn', keep.rownames[1L])
5356
return(ans)
@@ -133,9 +136,26 @@ as.data.table.list = function(x,
133136
missing.check.names = missing(check.names)
134137
origListNames = if (missing(.named)) names(x) else NULL # as.data.table called directly, not from inside data.table() which provides .named, #3854
135138
empty_atomic = FALSE
139+
140+
# Handle keep.rownames for vectors (mimicking data.frame behavior)
141+
rownames_ = NULL
142+
check_rownames = !isFALSE(keep.rownames)
143+
136144
for (i in seq_len(n)) {
137145
xi = x[[i]]
138146
if (is.null(xi)) next # eachncol already initialized to 0 by integer() above
147+
if (check_rownames && is.null(rownames_)) {
148+
if (is.null(dim(xi))) {
149+
if (!is.null(nm <- names(xi))) {
150+
rownames_ = nm
151+
x[[i]] = unname(xi)
152+
}
153+
} else {
154+
if (!is.null(nm <- rownames(xi))) {
155+
rownames_ = nm
156+
}
157+
}
158+
}
139159
if (!is.null(dim(xi)) && missing.check.names) check.names=TRUE
140160
if ("POSIXlt" %chin% class(xi)) {
141161
warningf("POSIXlt column type detected and converted to POSIXct. We do not recommend use of POSIXlt at all because it uses 40 bytes to store one date.")
@@ -200,6 +220,18 @@ as.data.table.list = function(x,
200220
}
201221
if (any(vnames==".SD")) stopf("A column may not be called .SD. That has special meaning.")
202222
if (check.names) vnames = make.names(vnames, unique=TRUE)
223+
224+
# Add rownames column when vector names were found
225+
if (!is.null(rownames_)) {
226+
rn_name = if (is.character(keep.rownames)) keep.rownames[1L] else "rn"
227+
if (!is.na(idx <- chmatch(rn_name, vnames)[1L])) {
228+
ans = c(list(ans[[idx]]), ans[-idx])
229+
vnames = c(vnames[idx], vnames[-idx])
230+
} else {
231+
ans = c(list(recycle(rownames_, nrow)), ans)
232+
vnames = c(rn_name, vnames)
233+
}
234+
}
203235
setattr(ans, "names", vnames)
204236
setDT(ans, key=key) # copy ensured above; also, setDT handles naming
205237
if (length(origListNames)==length(ans)) setattr(ans, "names", origListNames) # PR 3854 and tests 2058.15-17

R/print.data.table.R

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,8 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"),
141141
print_default(toprint)
142142
return(invisible(x))
143143
}
144+
if (col.names == "none")
145+
colnames(toprint) = rep.int("", ncol(toprint))
144146
if (nrow(toprint)>20L && col.names == "auto")
145147
# repeat colnames at the bottom if over 20 rows so you don't have to scroll up to see them
146148
# option to shut this off per request of Oleg Bondar on SO, #1482

0 commit comments

Comments
 (0)