Skip to content

Commit a8ddaf7

Browse files
authored
Merge branch 'master' into issue7171
2 parents 20be6f3 + 0a11ffa commit a8ddaf7

File tree

18 files changed

+1197
-205
lines changed

18 files changed

+1197
-205
lines changed

.ci/linters/r/eval_parse_linter.R

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
eval_parse_linter = make_linter_from_xpath(
2+
"//SYMBOL_FUNCTION_CALL[text() = 'parse']
3+
/ancestor::expr
4+
/preceding-sibling::expr[SYMBOL_FUNCTION_CALL[text() = 'eval']]
5+
/parent::expr
6+
",
7+
"Avoid eval(parse()); build the language directly, possibly using substitute2()."
8+
)
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# Ensure that data.table condition classes in code match documentation
2+
condition_classes_documentation_linter = function(rd_file) {
3+
if (!grepl("\\name{data.table-condition-classes}", readChar(rd_file, 100L), fixed = TRUE)) return(invisible())
4+
5+
# Find condition classes in R code
6+
walk_r_ast_for_classes = function(expr) {
7+
if (is.call(expr) && is.name(e <- expr[[1L]]) && as.character(e) %in% c("stopf", "warningf", "messagef", "packageStartupMessagef") && is.character(class_arg <- expr[["class"]]) && startsWith(class_arg, "dt_")) {
8+
class_arg
9+
} else if (is.recursive(expr)) {
10+
unlist(lapply(expr, walk_r_ast_for_classes))
11+
}
12+
}
13+
14+
# Find condition classes in documentation
15+
walk_rd_ast_for_classes = function(rd_element) {
16+
if (!is.list(rd_element)) return(character())
17+
18+
result = character()
19+
if (isTRUE(attr(rd_element, "Rd_tag") == "\\code") && length(rd_element) >= 1L) {
20+
content = rd_element[[1L]]
21+
if (is.character(content) && startsWith(content, "dt_")) {
22+
result = content
23+
}
24+
}
25+
c(result, unlist(lapply(rd_element, walk_rd_ast_for_classes)))
26+
}
27+
28+
code_classes = list.files("R", pattern = "\\.R$", full.names = TRUE) |>
29+
lapply(\(f) lapply(parse(f), walk_r_ast_for_classes)) |>
30+
unlist() |>
31+
unique()
32+
33+
doc_classes = rd_file |>
34+
tools::parse_Rd() |>
35+
walk_rd_ast_for_classes() |>
36+
unique()
37+
38+
miss_in_doc = setdiff(code_classes, doc_classes)
39+
miss_in_code = setdiff(doc_classes, code_classes)
40+
41+
if (length(miss_in_doc) > 0L || length(miss_in_code) > 0L) {
42+
if (length(miss_in_doc) > 0L) {
43+
cat(sprintf("Condition classes in code but missing from docs: %s\n", toString(miss_in_doc)))
44+
}
45+
if (length(miss_in_code) > 0L) {
46+
cat(sprintf("Condition classes in docs but not in code: %s\n", toString(miss_in_code)))
47+
}
48+
stop("Please sync man/datatable-condition-classes.Rd with code condition classes")
49+
}
50+
}
51+
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
on:
2+
workflow_dispatch:
3+
schedule:
4+
- cron: '0 6 * * 1,3,5' # Runs at 06:00 on Mon/Wed/Fri
5+
6+
name: check-cran-status
7+
8+
jobs:
9+
fetch-deadlines:
10+
runs-on: ubuntu-latest
11+
env:
12+
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
13+
permissions:
14+
issues: write
15+
steps:
16+
- uses: actions/checkout@v4
17+
18+
- uses: r-lib/actions/setup-r@v2
19+
with:
20+
use-public-rspm: true
21+
22+
- uses: r-lib/actions/setup-r-dependencies@v2
23+
with:
24+
packages: |
25+
gh
26+
glue
27+
28+
- name: Check for existing CRAN issues
29+
id: check-issues
30+
env:
31+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
32+
run: |
33+
# Count open issues with CRAN-related labels
34+
ISSUE_COUNT=$(gh issue list --label "cran-deadline" --state open --json number | jq length)
35+
if [ $ISSUE_COUNT -eq 0 ]; then
36+
echo "should-run=true" >> $GITHUB_OUTPUT
37+
echo "✅ Will run CRAN check"
38+
else
39+
echo "should-run=false" >> $GITHUB_OUTPUT
40+
echo "⏭️ Skipping CRAN check - existing issues found"
41+
fi
42+
43+
- name: Fetch deadline for this package
44+
if: steps.check-issues.outputs.should-run == 'true'
45+
shell: Rscript {0}
46+
run: |
47+
pkgname <- drop(read.dcf("DESCRIPTION", "Package"))
48+
49+
deadline <- subset(tools::CRAN_package_db(), Package == pkgname, "Deadline", drop=TRUE)
50+
51+
if (is.na(deadline)) {
52+
quit()
53+
}
54+
55+
gh::gh(
56+
"POST /repos/{owner_repo}/issues",
57+
owner_repo = Sys.getenv("GITHUB_REPOSITORY"),
58+
title = paste("Fix CRAN R CMD check issues by", deadline),
59+
body = glue::glue(
60+
"This package is failing CRAN checks and is at risk of archival.",
61+
"https://cran.r-project.org/web/checks/check_results_{pkgname}.html",
62+
"This issue was opened by https://github.com/{Sys.getenv('GITHUB_REPOSITORY')}/actions/runs/{Sys.getenv('GITHUB_RUN_ID')}.",
63+
.sep = "\n\n"
64+
),
65+
labels = list("cran-deadline")
66+
)

NEWS.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,8 @@
116116
117117
13. Reference to `.SD` in `...` arguments to `lapply()`, e.g. ``lapply(list_of_tables, `[`, j=.SD[1L])`` is evaluated correctly, [#2982](https://github.com/Rdatatable/data.table/issues/2982). Thanks @franknarf1 for the report and @MichaelChirico for the fix.
118118
119+
14. Filling columns of class Date with POSIXct (and vice versa) using `shift()` now yields a clear, informative error message specifying the class mismatch, [#5218](https://github.com/Rdatatable/data.table/issues/5218). Thanks @ashbaldry for the report and @ben-schwen for the fix.
120+
119121
### NOTES
120122
121123
1. The following in-progress deprecations have proceeded:
@@ -135,6 +137,8 @@
135137
136138
4. The default `format_list_item()` method (and hence `print.data.table()`) annotates truncated list items with their length, [#605](https://github.com/Rdatatable/data.table/issues/605). Thanks Matt Dowle for the original report (2012!) and @MichaelChirico for the fix.
137139
140+
5. A GitHub Actions workflow is now in place to warn the entire maintainer team, as well as any contributor following the GitHub repository, when the package is at risk of archival on CRAN [#7008](https://github.com/Rdatatable/data.table/issues/7008). Thanks @tdhock for the original report and @Bisaloo and @TysonStanley for the fix.
141+
138142
# data.table [v1.17.8](https://github.com/Rdatatable/data.table/milestone/41) (6 July 2025)
139143
140144
1. Internal functions used to signal errors are now marked as non-returning, silencing a compiler warning about potentially unchecked allocation failure. Thanks to Prof. Brian D. Ripley for the report and @aitap for the fix, [#7070](https://github.com/Rdatatable/data.table/pull/7070).

R/bmerge.R

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
9090
next
9191
}
9292
}
93-
stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, x_merge_type, iname, i_merge_type)
93+
stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, x_merge_type, iname, i_merge_type, class="dt_join_type_mismatch_error")
9494
}
9595
if (x_merge_type == i_merge_type) {
9696
if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, x_merge_type, xname)
@@ -106,15 +106,15 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
106106
coerce_col(x, xcol, x_merge_type, i_merge_type, xname, iname, from_detail=gettext(" (all-NA)"), verbose=verbose)
107107
next
108108
}
109-
stopf("Incompatible join types: %s (%s) and %s (%s)", xname, x_merge_type, iname, i_merge_type)
109+
stopf("Incompatible join types: %s (%s) and %s (%s)", xname, x_merge_type, iname, i_merge_type, class="dt_join_type_mismatch_error")
110110
}
111111
if (x_merge_type=="integer64" || i_merge_type=="integer64") {
112112
nm = c(iname, xname)
113113
if (x_merge_type=="integer64") { w=i; wc=icol; wclass=i_merge_type; } else { w=x; wc=xcol; wclass=x_merge_type; setfrev(nm) } # w is which to coerce
114114
if (wclass=="integer" || (wclass=="double" && fitsInInt64(w[[wc]]))) {
115115
from_detail = if (wclass == "double") gettext(" (which has integer64 representation, e.g. no fractions)") else ""
116116
coerce_col(w, wc, wclass, "integer64", nm[1L], nm[2L], from_detail, verbose=verbose)
117-
} else stopf("Incompatible join types: %s is type integer64 but %s is type double and cannot be coerced to integer64 (e.g. has fractions)", nm[2L], nm[1L])
117+
} else stopf("Incompatible join types: %s is type integer64 but %s is type double and cannot be coerced to integer64 (e.g. has fractions)", nm[2L], nm[1L], class="dt_join_type_mismatch_error")
118118
} else {
119119
# just integer and double left
120120
ic_idx = which(icol == icols) # check if on is joined on multiple conditions, #6602

R/data.table.R

Lines changed: 19 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -97,34 +97,32 @@ replace_dot_alias = function(e) {
9797
}
9898

9999
.checkTypos = function(err, ref) {
100+
err_str <- conditionMessage(err)
100101
# a slightly wonky workaround so that this still works in non-English sessions, #4989
101102
# generate this at run time (as opposed to e.g. onAttach) since session language is
102103
# technically OK to update (though this should be rare), and since it's low-cost
103104
# to do so here because we're about to error anyway.
104-
missing_obj_fmt = gsub(
105-
"'missing_datatable_variable____'",
105+
missing_obj_regex = gsub(
106+
"'____missing_datatable_variable____'",
106107
"'(?<obj_name>[^']+)'",
107-
tryCatch(eval(parse(text="missing_datatable_variable____")), error=identity)$message
108-
# eval(parse()) to avoid "no visible binding for global variable" note from R CMD check
109-
# names starting with _ don't parse, so no leading _ in the name
108+
# expression() to avoid "no visible binding for global variable" note from R CMD check
109+
conditionMessage(tryCatch(eval(quote(`____missing_datatable_variable____`)), error=identity)),
110+
fixed=TRUE
110111
)
111-
idx = regexpr(missing_obj_fmt, err$message, perl=TRUE)
112-
if (idx > 0L) {
113-
start = attr(idx, "capture.start", exact=TRUE)[ , "obj_name"]
114-
used = substr(
115-
err$message,
116-
start,
117-
start + attr(idx, "capture.length", exact=TRUE)[ , "obj_name"] - 1L
118-
)
119-
found = agrep(used, ref, value=TRUE, ignore.case=TRUE, fixed=TRUE)
120-
if (length(found)) {
121-
stopf("Object '%s' not found. Perhaps you intended %s", used, brackify(found))
122-
} else {
123-
stopf("Object '%s' not found amongst %s", used, brackify(ref))
124-
}
112+
idx = regexpr(missing_obj_regex, err_str, perl=TRUE)
113+
if (idx == -1L)
114+
stopf("%s", err_str, domain=NA) # Don't use stopf() directly, since err_str might have '%', #6588
115+
start = attr(idx, "capture.start", exact=TRUE)[ , "obj_name"]
116+
used = substr(
117+
err_str,
118+
start,
119+
start + attr(idx, "capture.length", exact=TRUE)[ , "obj_name"] - 1L
120+
)
121+
found = agrep(used, ref, value=TRUE, ignore.case=TRUE, fixed=TRUE)
122+
if (length(found)) {
123+
stopf("Object '%s' not found. Perhaps you intended %s", used, brackify(found))
125124
} else {
126-
# Don't use stopf() directly, since err$message might have '%', #6588
127-
stopf("%s", err$message, domain=NA)
125+
stopf("Object '%s' not found amongst %s", used, brackify(ref))
128126
}
129127
}
130128

R/groupingsets.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ rollup = function(x, ...) {
44
rollup.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) {
55
# input data type basic validation
66
if (!is.data.table(x))
7-
stopf("Argument 'x' must be a data.table object")
7+
stopf("Argument 'x' must be a data.table object", class="dt_invalid_input_error")
88
if (!is.character(by))
99
stopf("Argument 'by' must be a character vector of column names used in grouping.")
1010
if (!is.logical(id))
@@ -22,7 +22,7 @@ cube = function(x, ...) {
2222
cube.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) {
2323
# input data type basic validation
2424
if (!is.data.table(x))
25-
stopf("Argument 'x' must be a data.table object")
25+
stopf("Argument 'x' must be a data.table object", class="dt_invalid_input_error")
2626
if (!is.character(by))
2727
stopf("Argument 'by' must be a character vector of column names used in grouping.")
2828
if (!is.logical(id))

R/merge.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
3434
warningf("Supplied both `by` and `by.x`/`by.y`. `by` argument will be ignored.")
3535
if (!is.null(by.x)) {
3636
if (length(by.x) == 0L || !is.character(by.x) || !is.character(by.y))
37-
stopf("A non-empty vector of column names is required for `by.x` and `by.y`.")
37+
stopf("A non-empty vector of column names is required for `by.x` and `by.y`.", class="dt_invalid_input_error")
3838
if (!all(idx <- by.x %chin% nm_x)) {
3939
stopf("The following columns listed in `%s` are missing from %s: %s", "by.x", "x", brackify(by.x[!idx]))
4040
}

R/onLoad.R

Lines changed: 23 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -73,31 +73,29 @@
7373
# In fread and fwrite we have moved back to using getOption's default argument since it is unlikely fread and fread will be called in a loop many times, plus they
7474
# are relatively heavy functions where the overhead in getOption() would not be noticed. It's only really [.data.table where getOption default bit.
7575
# Improvement to base::getOption() now submitted (100x; 5s down to 0.05s): https://bugs.r-project.org/bugzilla/show_bug.cgi?id=17394
76-
opts = c(
77-
"datatable.verbose"="FALSE", # datatable.<argument name>
78-
"datatable.optimize"="Inf", # datatable.<argument name>
79-
"datatable.print.nrows"="100L", # datatable.<argument name>
80-
"datatable.print.topn"="5L", # datatable.<argument name>
81-
"datatable.print.class"="TRUE", # for print.data.table
82-
"datatable.print.rownames"="TRUE", # for print.data.table
83-
"datatable.print.colnames"="'auto'", # for print.data.table
84-
"datatable.print.keys"="TRUE", # for print.data.table
85-
"datatable.print.trunc.cols"="FALSE", # for print.data.table
86-
"datatable.show.indices"="FALSE", # for print.data.table
87-
"datatable.allow.cartesian"="FALSE", # datatable.<argument name>
88-
"datatable.join.many"="TRUE", # mergelist, [.data.table #4383 #914
89-
"datatable.dfdispatchwarn"="TRUE", # not a function argument
90-
"datatable.warnredundantby"="TRUE", # not a function argument
91-
"datatable.alloccol"="1024L", # argument 'n' of alloc.col. Over-allocate 1024 spare column slots
92-
"datatable.auto.index"="TRUE", # DT[col=="val"] to auto add index so 2nd time faster
93-
"datatable.use.index"="TRUE", # global switch to address #1422
94-
"datatable.prettyprint.char" = NULL, # FR #1091
95-
"datatable.old.matrix.autoname"="TRUE", # #7145: how data.table(x=1, matrix(1)) is auto-named set to change
96-
NULL
97-
)
98-
for (i in setdiff(names(opts),names(options()))) {
99-
eval(parse(text=paste0("options(",i,"=",opts[i],")")))
100-
}
76+
opts = list(
77+
datatable.verbose=FALSE, # datatable.<argument name>
78+
datatable.optimize=Inf, # datatable.<argument name>
79+
datatable.print.nrows=100L, # datatable.<argument name>
80+
datatable.print.topn=5L, # datatable.<argument name>
81+
datatable.print.class=TRUE, # for print.data.table
82+
datatable.print.rownames=TRUE, # for print.data.table
83+
datatable.print.colnames='auto', # for print.data.table
84+
datatable.print.keys=TRUE, # for print.data.table
85+
datatable.print.trunc.cols=FALSE, # for print.data.table
86+
datatable.show.indices=FALSE, # for print.data.table
87+
datatable.allow.cartesian=FALSE, # datatable.<argument name>
88+
datatable.join.many=TRUE, # mergelist, [.data.table #4383 #914
89+
datatable.dfdispatchwarn=TRUE, # not a function argument
90+
datatable.warnredundantby=TRUE, # not a function argument
91+
datatable.alloccol=1024L, # argument 'n' of alloc.col. Over-allocate 1024 spare column slots
92+
datatable.auto.index=TRUE, # DT[col=="val"] to auto add index so 2nd time faster
93+
datatable.use.index=TRUE, # global switch to address #1422
94+
datatable.prettyprint.char=NULL, # FR #1091
95+
datatable.old.matrix.autoname=TRUE # #7145: how data.table(x=1, matrix(1)) is auto-named set to change
96+
)
97+
opts = opts[!names(opts) %chin% names(options())]
98+
options(opts)
10199

102100
# Test R behaviour that changed in v3.1 and is now depended on
103101
x = 1L:3L

R/setkey.R

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ setkeyv = function(x, cols, verbose=getOption("datatable.verbose"), physical=TRU
4343
if (!all(nzchar(cols))) stopf("cols contains some blanks.")
4444
cols = gsub("`", "", cols, fixed = TRUE)
4545
miss = !(cols %chin% colnames(x))
46-
if (any(miss)) stopf("some columns are not in the data.table: %s", brackify(cols[miss]))
46+
if (any(miss)) stopf("some columns are not in the data.table: %s", brackify(cols[miss]), class = "dt_missing_column_error")
4747

4848
if (physical && identical(head(key(x), length(cols)), cols)){ ## for !physical we need to compute groups as well #4387
4949
## key is present but x has a longer key. No sorting needed, only attribute is changed to shorter key.
@@ -54,7 +54,7 @@ setkeyv = function(x, cols, verbose=getOption("datatable.verbose"), physical=TRU
5454
if (".xi" %chin% names(x)) stopf("x contains a column called '.xi'. Conflicts with internal use by data.table.")
5555
for (i in cols) {
5656
.xi = x[[i]] # [[ is copy on write, otherwise checking type would be copying each column
57-
if (!typeof(.xi) %chin% ORDERING_TYPES) stopf("Column '%s' is type '%s' which is not supported as a key column type, currently.", i, typeof(.xi))
57+
if (!typeof(.xi) %chin% ORDERING_TYPES) stopf("Column '%s' is type '%s' which is not supported as a key column type, currently.", i, typeof(.xi), class="dt_unsortable_type_error")
5858
}
5959
if (!is.character(cols) || length(cols)<1L) internal_error("'cols' should be character at this point") # nocov
6060

@@ -266,11 +266,11 @@ setorderv = function(x, cols = colnames(x), order=1L, na.last=FALSE)
266266
# remove backticks from cols
267267
cols = gsub("`", "", cols, fixed = TRUE)
268268
miss = !(cols %chin% colnames(x))
269-
if (any(miss)) stopf("some columns are not in the data.table: %s", brackify(cols[miss]))
269+
if (any(miss)) stopf("some columns are not in the data.table: %s", brackify(cols[miss]), class = "dt_missing_column_error")
270270
if (".xi" %chin% colnames(x)) stopf("x contains a column called '.xi'. Conflicts with internal use by data.table.")
271271
for (i in cols) {
272272
.xi = x[[i]] # [[ is copy on write, otherwise checking type would be copying each column
273-
if (!typeof(.xi) %chin% ORDERING_TYPES) stopf("Column '%s' is type '%s' which is not supported for ordering currently.", i, typeof(.xi))
273+
if (!typeof(.xi) %chin% ORDERING_TYPES) stopf("Column '%s' is type '%s' which is not supported for ordering currently.", i, typeof(.xi), class="dt_unsortable_type_error")
274274
}
275275
if (!is.character(cols) || length(cols)<1L) internal_error("'cols' should be character at this point") # nocov
276276

0 commit comments

Comments
 (0)