Skip to content

Commit 8da66f2

Browse files
Merge branch 'master' into dot-dot-1
2 parents d9386fa + 8f5ffa8 commit 8da66f2

24 files changed

+1460
-407
lines changed

.ci/linters/r/eval_parse_linter.R

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
eval_parse_linter = make_linter_from_xpath(
2+
"//SYMBOL_FUNCTION_CALL[text() = 'parse']
3+
/ancestor::expr
4+
/preceding-sibling::expr[SYMBOL_FUNCTION_CALL[text() = 'eval']]
5+
/parent::expr
6+
",
7+
"Avoid eval(parse()); build the language directly, possibly using substitute2()."
8+
)
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# Ensure that data.table condition classes in code match documentation
2+
condition_classes_documentation_linter = function(rd_file) {
3+
if (!grepl("\\name{data.table-condition-classes}", readChar(rd_file, 100L), fixed = TRUE)) return(invisible())
4+
5+
# Find condition classes in R code
6+
walk_r_ast_for_classes = function(expr) {
7+
if (is.call(expr) && is.name(e <- expr[[1L]]) && as.character(e) %in% c("stopf", "warningf", "messagef", "packageStartupMessagef") && is.character(class_arg <- expr[["class"]]) && startsWith(class_arg, "dt_")) {
8+
class_arg
9+
} else if (is.recursive(expr)) {
10+
unlist(lapply(expr, walk_r_ast_for_classes))
11+
}
12+
}
13+
14+
# Find condition classes in documentation
15+
walk_rd_ast_for_classes = function(rd_element) {
16+
if (!is.list(rd_element)) return(character())
17+
18+
result = character()
19+
if (isTRUE(attr(rd_element, "Rd_tag") == "\\code") && length(rd_element) >= 1L) {
20+
content = rd_element[[1L]]
21+
if (is.character(content) && startsWith(content, "dt_")) {
22+
result = content
23+
}
24+
}
25+
c(result, unlist(lapply(rd_element, walk_rd_ast_for_classes)))
26+
}
27+
28+
code_classes = list.files("R", pattern = "\\.R$", full.names = TRUE) |>
29+
lapply(\(f) lapply(parse(f), walk_r_ast_for_classes)) |>
30+
unlist() |>
31+
unique()
32+
33+
doc_classes = rd_file |>
34+
tools::parse_Rd() |>
35+
walk_rd_ast_for_classes() |>
36+
unique()
37+
38+
miss_in_doc = setdiff(code_classes, doc_classes)
39+
miss_in_code = setdiff(doc_classes, code_classes)
40+
41+
if (length(miss_in_doc) > 0L || length(miss_in_code) > 0L) {
42+
if (length(miss_in_doc) > 0L) {
43+
cat(sprintf("Condition classes in code but missing from docs: %s\n", toString(miss_in_doc)))
44+
}
45+
if (length(miss_in_code) > 0L) {
46+
cat(sprintf("Condition classes in docs but not in code: %s\n", toString(miss_in_code)))
47+
}
48+
stop("Please sync man/datatable-condition-classes.Rd with code condition classes")
49+
}
50+
}
51+
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
on:
2+
workflow_dispatch:
3+
schedule:
4+
- cron: '0 6 * * 1,3,5' # Runs at 06:00 on Mon/Wed/Fri
5+
6+
name: check-cran-status
7+
8+
jobs:
9+
fetch-deadlines:
10+
runs-on: ubuntu-latest
11+
env:
12+
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
13+
permissions:
14+
issues: write
15+
steps:
16+
- uses: actions/checkout@v4
17+
18+
- uses: r-lib/actions/setup-r@v2
19+
with:
20+
use-public-rspm: true
21+
22+
- uses: r-lib/actions/setup-r-dependencies@v2
23+
with:
24+
packages: |
25+
gh
26+
glue
27+
28+
- name: Check for existing CRAN issues
29+
id: check-issues
30+
env:
31+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
32+
run: |
33+
# Count open issues with CRAN-related labels
34+
ISSUE_COUNT=$(gh issue list --label "cran-deadline" --state open --json number | jq length)
35+
if [ $ISSUE_COUNT -eq 0 ]; then
36+
echo "should-run=true" >> $GITHUB_OUTPUT
37+
echo "✅ Will run CRAN check"
38+
else
39+
echo "should-run=false" >> $GITHUB_OUTPUT
40+
echo "⏭️ Skipping CRAN check - existing issues found"
41+
fi
42+
43+
- name: Fetch deadline for this package
44+
if: steps.check-issues.outputs.should-run == 'true'
45+
shell: Rscript {0}
46+
run: |
47+
pkgname <- drop(read.dcf("DESCRIPTION", "Package"))
48+
49+
deadline <- subset(tools::CRAN_package_db(), Package == pkgname, "Deadline", drop=TRUE)
50+
51+
if (is.na(deadline)) {
52+
quit()
53+
}
54+
55+
gh::gh(
56+
"POST /repos/{owner_repo}/issues",
57+
owner_repo = Sys.getenv("GITHUB_REPOSITORY"),
58+
title = paste("Fix CRAN R CMD check issues by", deadline),
59+
body = glue::glue(
60+
"This package is failing CRAN checks and is at risk of archival.",
61+
"https://cran.r-project.org/web/checks/check_results_{pkgname}.html",
62+
"This issue was opened by https://github.com/{Sys.getenv('GITHUB_REPOSITORY')}/actions/runs/{Sys.getenv('GITHUB_RUN_ID')}.",
63+
.sep = "\n\n"
64+
),
65+
labels = list("cran-deadline")
66+
)

NEWS.md

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44

55
## data.table [v1.17.99](https://github.com/Rdatatable/data.table/milestone/35) (in development)
66

7+
### NOTICE OF INTENDED FUTURE POTENTIAL BREAKING CHANGES
8+
9+
1. `data.table(x=1, <expr>)`, where `<expr>` is an expression resulting in a 1-column matrix without column names, will eventually have names `x` and `V2`, not `x` and `V1`, consistent with `data.table(x=1, <expr>)` where `<expr>` results in an atomic vector, for example `data.table(x=1, cbind(1))` and `data.table(x=1, 1)` will both have columns named `x` and `V2`. In this release, the matrix case continues to be named `V1`, but the new behavior can be activated by setting `options(datatable.old.matrix.autoname)` to `FALSE`. See point 5 under Bug Fixes for more context; this change will provide more internal consistency as well as more consistency with `data.frame()`.
10+
711
### NEW FEATURES
812

913
1. New `sort_by()` method for data.tables, [#6662](https://github.com/Rdatatable/data.table/issues/6662). It uses `forder()` to improve upon the data.frame method and also match `DT[order(...)]` behavior with respect to locale. Thanks @rikivillalba for the suggestion and PR.
@@ -62,7 +66,7 @@
6266
6367
4. In rare cases, `data.table` failed to expand ALTREP columns when assigning a full column by reference. This could result in the target column getting modified unintentionally if the next call to the data.table was a modification by reference of the source column. E.g. in `DT[, b := as.character(a)]` the string conversion gets deferred and subsequent modification of column `a` would also modify column `b`, [#5400](https://github.com/Rdatatable/data.table/issues/5400). Thanks to @aquasync for the report and Václav Tlapák for the PR.
6468
65-
5. `data.table()` function is now more aligned with `data.frame()` with respect to the names of the output when one of its inputs is a single-column matrix object, [#4124](https://github.com/Rdatatable/data.table/issues/4124). Thanks @PavoDive for the report, @jangorecki for the PR, and @MichaelChirico for a follow-up for back-compatibility.
69+
5. `data.table()` function is now more aligned with `data.frame()` with respect to the names of the output when one of its inputs is a single-column matrix object, [#4124](https://github.com/Rdatatable/data.table/issues/4124), [#3193](https://github.com/Rdatatable/data.table/issues/3193), and [#5367](https://github.com/Rdatatable/data.table/issues/5367). Thanks @PavoDive for the report, @jangorecki for the PR, and @MichaelChirico for a follow-up for back-compatibility.
6670
6771
6. Including an `ITime` object as a named input to `data.frame()` respects the provided name, i.e. `data.frame(a = as.ITime(...))` will have column `a`, [#4673](https://github.com/Rdatatable/data.table/issues/4673). Thanks @shrektan for the report and @MichaelChirico for the fix.
6872
@@ -80,7 +84,9 @@
8084
8185
13. Reference to `.SD` in `...` arguments to `lapply()`, e.g. ``lapply(list_of_tables, `[`, j=.SD[1L])`` is evaluated correctly, [#2982](https://github.com/Rdatatable/data.table/issues/2982). Thanks @franknarf1 for the report and @MichaelChirico for the fix.
8286
83-
14. Ellipsis elements like `..1` are correctly excluded when searching for variables in "up-a-level" syntax inside `[`, [#5460](https://github.com/Rdatatable/data.table/issues/5460). Thanks @ggrothendieck for the report and @MichaelChirico for the fix.
87+
14. Filling columns of class Date with POSIXct (and vice versa) using `shift()` now yields a clear, informative error message specifying the class mismatch, [#5218](https://github.com/Rdatatable/data.table/issues/5218). Thanks @ashbaldry for the report and @ben-schwen for the fix.
88+
89+
15. Ellipsis elements like `..1` are correctly excluded when searching for variables in "up-a-level" syntax inside `[`, [#5460](https://github.com/Rdatatable/data.table/issues/5460). Thanks @ggrothendieck for the report and @MichaelChirico for the fix.
8490
8591
### NOTES
8692
@@ -99,6 +105,9 @@
99105
+ On non-Windows systems, `fread()` now prints the reason why the file couldn't be opened, which could also be due to it being too large to map.
100106
+ With `verbose=TRUE`, file sizes are now printed using correct binary SI prefixes (the sizes have always been reported as bytes denominated in powers of `2^10`, so e.g. `1024*1024` bytes was reported as `1 MB` where `1 MiB` or `1.05 MB` is correct).
101107
108+
4. The default `format_list_item()` method (and hence `print.data.table()`) annotates truncated list items with their length, [#605](https://github.com/Rdatatable/data.table/issues/605). Thanks Matt Dowle for the original report (2012!) and @MichaelChirico for the fix.
109+
110+
5. A GitHub Actions workflow is now in place to warn the entire maintainer team, as well as any contributor following the GitHub repository, when the package is at risk of archival on CRAN [#7008](https://github.com/Rdatatable/data.table/issues/7008). Thanks @tdhock for the original report and @Bisaloo and @TysonStanley for the fix.
102111
103112
# data.table [v1.17.8](https://github.com/Rdatatable/data.table/milestone/41) (6 July 2025)
104113

R/as.data.table.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ as.data.table.matrix = function(x, keep.rownames=FALSE, key=NULL, ...) {
5050
ans = data.table(rn=rownames(x), x, keep.rownames=FALSE)
5151
# auto-inferred name 'x' is not back-compatible & inconsistent, #7145
5252
if (ncol(x) == 1L && is.null(colnames(x)))
53-
setnames(ans, 'x', 'V1')
53+
setnames(ans, 'x', 'V1', skip_absent=TRUE)
5454
if (is.character(keep.rownames))
5555
setnames(ans, 'rn', keep.rownames[1L])
5656
return(ans)
@@ -162,7 +162,7 @@ as.data.table.list = function(x,
162162
xi = x[[i]] = as.POSIXct(xi)
163163
} else if (is.matrix(xi) || is.data.frame(xi)) {
164164
if (!is.data.table(xi)) {
165-
if (is.matrix(xi) && NCOL(xi)<=1L && is.null(colnames(xi))) { # 1 column matrix naming #4124
165+
if (is.matrix(xi) && NCOL(xi)==1L && is.null(colnames(xi)) && isFALSE(getOption('datatable.old.matrix.autoname'))) { # 1 column matrix naming #4124
166166
xi = x[[i]] = c(xi)
167167
} else {
168168
xi = x[[i]] = as.data.table(xi, keep.rownames=keep.rownames) # we will never allow a matrix to be a column; always unpack the columns

R/bmerge.R

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
9090
next
9191
}
9292
}
93-
stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, x_merge_type, iname, i_merge_type)
93+
stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, x_merge_type, iname, i_merge_type, class="dt_join_type_mismatch_error")
9494
}
9595
if (x_merge_type == i_merge_type) {
9696
if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, x_merge_type, xname)
@@ -106,15 +106,15 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
106106
coerce_col(x, xcol, x_merge_type, i_merge_type, xname, iname, from_detail=gettext(" (all-NA)"), verbose=verbose)
107107
next
108108
}
109-
stopf("Incompatible join types: %s (%s) and %s (%s)", xname, x_merge_type, iname, i_merge_type)
109+
stopf("Incompatible join types: %s (%s) and %s (%s)", xname, x_merge_type, iname, i_merge_type, class="dt_join_type_mismatch_error")
110110
}
111111
if (x_merge_type=="integer64" || i_merge_type=="integer64") {
112112
nm = c(iname, xname)
113113
if (x_merge_type=="integer64") { w=i; wc=icol; wclass=i_merge_type; } else { w=x; wc=xcol; wclass=x_merge_type; setfrev(nm) } # w is which to coerce
114114
if (wclass=="integer" || (wclass=="double" && fitsInInt64(w[[wc]]))) {
115115
from_detail = if (wclass == "double") gettext(" (which has integer64 representation, e.g. no fractions)") else ""
116116
coerce_col(w, wc, wclass, "integer64", nm[1L], nm[2L], from_detail, verbose=verbose)
117-
} else stopf("Incompatible join types: %s is type integer64 but %s is type double and cannot be coerced to integer64 (e.g. has fractions)", nm[2L], nm[1L])
117+
} else stopf("Incompatible join types: %s is type integer64 but %s is type double and cannot be coerced to integer64 (e.g. has fractions)", nm[2L], nm[1L], class="dt_join_type_mismatch_error")
118118
} else {
119119
# just integer and double left
120120
ic_idx = which(icol == icols) # check if on is joined on multiple conditions, #6602

R/data.table.R

Lines changed: 19 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -97,34 +97,32 @@ replace_dot_alias = function(e) {
9797
}
9898

9999
.checkTypos = function(err, ref) {
100+
err_str <- conditionMessage(err)
100101
# a slightly wonky workaround so that this still works in non-English sessions, #4989
101102
# generate this at run time (as opposed to e.g. onAttach) since session language is
102103
# technically OK to update (though this should be rare), and since it's low-cost
103104
# to do so here because we're about to error anyway.
104-
missing_obj_fmt = gsub(
105-
"'missing_datatable_variable____'",
105+
missing_obj_regex = gsub(
106+
"'____missing_datatable_variable____'",
106107
"'(?<obj_name>[^']+)'",
107-
tryCatch(eval(parse(text="missing_datatable_variable____")), error=identity)$message
108-
# eval(parse()) to avoid "no visible binding for global variable" note from R CMD check
109-
# names starting with _ don't parse, so no leading _ in the name
108+
# expression() to avoid "no visible binding for global variable" note from R CMD check
109+
conditionMessage(tryCatch(eval(quote(`____missing_datatable_variable____`)), error=identity)),
110+
fixed=TRUE
110111
)
111-
idx = regexpr(missing_obj_fmt, err$message, perl=TRUE)
112-
if (idx > 0L) {
113-
start = attr(idx, "capture.start", exact=TRUE)[ , "obj_name"]
114-
used = substr(
115-
err$message,
116-
start,
117-
start + attr(idx, "capture.length", exact=TRUE)[ , "obj_name"] - 1L
118-
)
119-
found = agrep(used, ref, value=TRUE, ignore.case=TRUE, fixed=TRUE)
120-
if (length(found)) {
121-
stopf("Object '%s' not found. Perhaps you intended %s", used, brackify(found))
122-
} else {
123-
stopf("Object '%s' not found amongst %s", used, brackify(ref))
124-
}
112+
idx = regexpr(missing_obj_regex, err_str, perl=TRUE)
113+
if (idx == -1L)
114+
stopf("%s", err_str, domain=NA) # Don't use stopf() directly, since err_str might have '%', #6588
115+
start = attr(idx, "capture.start", exact=TRUE)[ , "obj_name"]
116+
used = substr(
117+
err_str,
118+
start,
119+
start + attr(idx, "capture.length", exact=TRUE)[ , "obj_name"] - 1L
120+
)
121+
found = agrep(used, ref, value=TRUE, ignore.case=TRUE, fixed=TRUE)
122+
if (length(found)) {
123+
stopf("Object '%s' not found. Perhaps you intended %s", used, brackify(found))
125124
} else {
126-
# Don't use stopf() directly, since err$message might have '%', #6588
127-
stopf("%s", err$message, domain=NA)
125+
stopf("Object '%s' not found amongst %s", used, brackify(ref))
128126
}
129127
}
130128

R/groupingsets.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ rollup = function(x, ...) {
44
rollup.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) {
55
# input data type basic validation
66
if (!is.data.table(x))
7-
stopf("Argument 'x' must be a data.table object")
7+
stopf("Argument 'x' must be a data.table object", class="dt_invalid_input_error")
88
if (!is.character(by))
99
stopf("Argument 'by' must be a character vector of column names used in grouping.")
1010
if (!is.logical(id))
@@ -22,7 +22,7 @@ cube = function(x, ...) {
2222
cube.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) {
2323
# input data type basic validation
2424
if (!is.data.table(x))
25-
stopf("Argument 'x' must be a data.table object")
25+
stopf("Argument 'x' must be a data.table object", class="dt_invalid_input_error")
2626
if (!is.character(by))
2727
stopf("Argument 'by' must be a character vector of column names used in grouping.")
2828
if (!is.logical(id))

R/merge.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
3434
warningf("Supplied both `by` and `by.x`/`by.y`. `by` argument will be ignored.")
3535
if (!is.null(by.x)) {
3636
if (length(by.x) == 0L || !is.character(by.x) || !is.character(by.y))
37-
stopf("A non-empty vector of column names is required for `by.x` and `by.y`.")
37+
stopf("A non-empty vector of column names is required for `by.x` and `by.y`.", class="dt_invalid_input_error")
3838
if (!all(idx <- by.x %chin% nm_x)) {
3939
stopf("The following columns listed in `%s` are missing from %s: %s", "by.x", "x", brackify(by.x[!idx]))
4040
}

R/onLoad.R

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -73,28 +73,29 @@
7373
# In fread and fwrite we have moved back to using getOption's default argument since it is unlikely fread and fread will be called in a loop many times, plus they
7474
# are relatively heavy functions where the overhead in getOption() would not be noticed. It's only really [.data.table where getOption default bit.
7575
# Improvement to base::getOption() now submitted (100x; 5s down to 0.05s): https://bugs.r-project.org/bugzilla/show_bug.cgi?id=17394
76-
opts = c("datatable.verbose"="FALSE", # datatable.<argument name>
77-
"datatable.optimize"="Inf", # datatable.<argument name>
78-
"datatable.print.nrows"="100L", # datatable.<argument name>
79-
"datatable.print.topn"="5L", # datatable.<argument name>
80-
"datatable.print.class"="TRUE", # for print.data.table
81-
"datatable.print.rownames"="TRUE", # for print.data.table
82-
"datatable.print.colnames"="'auto'", # for print.data.table
83-
"datatable.print.keys"="TRUE", # for print.data.table
84-
"datatable.print.trunc.cols"="FALSE", # for print.data.table
85-
"datatable.show.indices"="FALSE", # for print.data.table
86-
"datatable.allow.cartesian"="FALSE", # datatable.<argument name>
87-
"datatable.join.many"="TRUE", # mergelist, [.data.table #4383 #914
88-
"datatable.dfdispatchwarn"="TRUE", # not a function argument
89-
"datatable.warnredundantby"="TRUE", # not a function argument
90-
"datatable.alloccol"="1024L", # argument 'n' of alloc.col. Over-allocate 1024 spare column slots
91-
"datatable.auto.index"="TRUE", # DT[col=="val"] to auto add index so 2nd time faster
92-
"datatable.use.index"="TRUE", # global switch to address #1422
93-
"datatable.prettyprint.char" = NULL # FR #1091
94-
)
95-
for (i in setdiff(names(opts),names(options()))) {
96-
eval(parse(text=paste0("options(",i,"=",opts[i],")")))
97-
}
76+
opts = list(
77+
datatable.verbose=FALSE, # datatable.<argument name>
78+
datatable.optimize=Inf, # datatable.<argument name>
79+
datatable.print.nrows=100L, # datatable.<argument name>
80+
datatable.print.topn=5L, # datatable.<argument name>
81+
datatable.print.class=TRUE, # for print.data.table
82+
datatable.print.rownames=TRUE, # for print.data.table
83+
datatable.print.colnames='auto', # for print.data.table
84+
datatable.print.keys=TRUE, # for print.data.table
85+
datatable.print.trunc.cols=FALSE, # for print.data.table
86+
datatable.show.indices=FALSE, # for print.data.table
87+
datatable.allow.cartesian=FALSE, # datatable.<argument name>
88+
datatable.join.many=TRUE, # mergelist, [.data.table #4383 #914
89+
datatable.dfdispatchwarn=TRUE, # not a function argument
90+
datatable.warnredundantby=TRUE, # not a function argument
91+
datatable.alloccol=1024L, # argument 'n' of alloc.col. Over-allocate 1024 spare column slots
92+
datatable.auto.index=TRUE, # DT[col=="val"] to auto add index so 2nd time faster
93+
datatable.use.index=TRUE, # global switch to address #1422
94+
datatable.prettyprint.char=NULL, # FR #1091
95+
datatable.old.matrix.autoname=TRUE # #7145: how data.table(x=1, matrix(1)) is auto-named set to change
96+
)
97+
opts = opts[!names(opts) %chin% names(options())]
98+
options(opts)
9899

99100
# Test R behaviour that changed in v3.1 and is now depended on
100101
x = 1L:3L

0 commit comments

Comments
 (0)