Skip to content

Commit d79b465

Browse files
Merge branch 'master' into issue_2606
2 parents f39df47 + 48ece3b commit d79b465

File tree

12 files changed

+390
-158
lines changed

12 files changed

+390
-158
lines changed
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# Ensure that data.table condition classes in code match documentation
2+
condition_classes_documentation_linter = function(rd_file) {
3+
if (!grepl("\\name{data.table-condition-classes}", readChar(rd_file, 100L), fixed = TRUE)) return(invisible())
4+
5+
# Find condition classes in R code
6+
walk_r_ast_for_classes = function(expr) {
7+
if (is.call(expr) && is.name(e <- expr[[1L]]) && as.character(e) %in% c("stopf", "warningf", "messagef", "packageStartupMessagef") && is.character(class_arg <- expr[["class"]]) && startsWith(class_arg, "dt_")) {
8+
class_arg
9+
} else if (is.recursive(expr)) {
10+
unlist(lapply(expr, walk_r_ast_for_classes))
11+
}
12+
}
13+
14+
# Find condition classes in documentation
15+
walk_rd_ast_for_classes = function(rd_element) {
16+
if (!is.list(rd_element)) return(character())
17+
18+
result = character()
19+
if (isTRUE(attr(rd_element, "Rd_tag") == "\\code") && length(rd_element) >= 1L) {
20+
content = rd_element[[1L]]
21+
if (is.character(content) && startsWith(content, "dt_")) {
22+
result = content
23+
}
24+
}
25+
c(result, unlist(lapply(rd_element, walk_rd_ast_for_classes)))
26+
}
27+
28+
code_classes = list.files("R", pattern = "\\.R$", full.names = TRUE) |>
29+
lapply(\(f) lapply(parse(f), walk_r_ast_for_classes)) |>
30+
unlist() |>
31+
unique()
32+
33+
doc_classes = rd_file |>
34+
tools::parse_Rd() |>
35+
walk_rd_ast_for_classes() |>
36+
unique()
37+
38+
miss_in_doc = setdiff(code_classes, doc_classes)
39+
miss_in_code = setdiff(doc_classes, code_classes)
40+
41+
if (length(miss_in_doc) > 0L || length(miss_in_code) > 0L) {
42+
if (length(miss_in_doc) > 0L) {
43+
cat(sprintf("Condition classes in code but missing from docs: %s\n", toString(miss_in_doc)))
44+
}
45+
if (length(miss_in_code) > 0L) {
46+
cat(sprintf("Condition classes in docs but not in code: %s\n", toString(miss_in_code)))
47+
}
48+
stop("Please sync man/datatable-condition-classes.Rd with code condition classes")
49+
}
50+
}
51+
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
on:
2+
workflow_dispatch:
3+
schedule:
4+
- cron: '0 6 * * 1,3,5' # Runs at 06:00 on Mon/Wed/Fri
5+
6+
name: check-cran-status
7+
8+
jobs:
9+
fetch-deadlines:
10+
runs-on: ubuntu-latest
11+
env:
12+
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
13+
permissions:
14+
issues: write
15+
steps:
16+
- uses: actions/checkout@v4
17+
18+
- uses: r-lib/actions/setup-r@v2
19+
with:
20+
use-public-rspm: true
21+
22+
- uses: r-lib/actions/setup-r-dependencies@v2
23+
with:
24+
packages: |
25+
gh
26+
glue
27+
28+
- name: Check for existing CRAN issues
29+
id: check-issues
30+
env:
31+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
32+
run: |
33+
# Count open issues with CRAN-related labels
34+
ISSUE_COUNT=$(gh issue list --label "cran-deadline" --state open --json number | jq length)
35+
if [ $ISSUE_COUNT -eq 0 ]; then
36+
echo "should-run=true" >> $GITHUB_OUTPUT
37+
echo "✅ Will run CRAN check"
38+
else
39+
echo "should-run=false" >> $GITHUB_OUTPUT
40+
echo "⏭️ Skipping CRAN check - existing issues found"
41+
fi
42+
43+
- name: Fetch deadline for this package
44+
if: steps.check-issues.outputs.should-run == 'true'
45+
shell: Rscript {0}
46+
run: |
47+
pkgname <- drop(read.dcf("DESCRIPTION", "Package"))
48+
49+
deadline <- subset(tools::CRAN_package_db(), Package == pkgname, "Deadline", drop=TRUE)
50+
51+
if (is.na(deadline)) {
52+
quit()
53+
}
54+
55+
gh::gh(
56+
"POST /repos/{owner_repo}/issues",
57+
owner_repo = Sys.getenv("GITHUB_REPOSITORY"),
58+
title = paste("Fix CRAN R CMD check issues by", deadline),
59+
body = glue::glue(
60+
"This package is failing CRAN checks and is at risk of archival.",
61+
"https://cran.r-project.org/web/checks/check_results_{pkgname}.html",
62+
"This issue was opened by https://github.com/{Sys.getenv('GITHUB_REPOSITORY')}/actions/runs/{Sys.getenv('GITHUB_RUN_ID')}.",
63+
.sep = "\n\n"
64+
),
65+
labels = list("cran-deadline")
66+
)

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,8 @@
105105
106106
4. The default `format_list_item()` method (and hence `print.data.table()`) annotates truncated list items with their length, [#605](https://github.com/Rdatatable/data.table/issues/605). Thanks Matt Dowle for the original report (2012!) and @MichaelChirico for the fix.
107107
108+
5. A GitHub Actions workflow is now in place to warn the entire maintainer team, as well as any contributor following the GitHub repository, when the package is at risk of archival on CRAN [#7008](https://github.com/Rdatatable/data.table/issues/7008). Thanks @tdhock for the original report and @Bisaloo and @TysonStanley for the fix.
109+
108110
# data.table [v1.17.8](https://github.com/Rdatatable/data.table/milestone/41) (6 July 2025)
109111
110112
1. Internal functions used to signal errors are now marked as non-returning, silencing a compiler warning about potentially unchecked allocation failure. Thanks to Prof. Brian D. Ripley for the report and @aitap for the fix, [#7070](https://github.com/Rdatatable/data.table/pull/7070).

R/bmerge.R

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
9090
next
9191
}
9292
}
93-
stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, x_merge_type, iname, i_merge_type)
93+
stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, x_merge_type, iname, i_merge_type, class="dt_join_type_mismatch_error")
9494
}
9595
if (x_merge_type == i_merge_type) {
9696
if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, x_merge_type, xname)
@@ -106,15 +106,15 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
106106
coerce_col(x, xcol, x_merge_type, i_merge_type, xname, iname, from_detail=gettext(" (all-NA)"), verbose=verbose)
107107
next
108108
}
109-
stopf("Incompatible join types: %s (%s) and %s (%s)", xname, x_merge_type, iname, i_merge_type)
109+
stopf("Incompatible join types: %s (%s) and %s (%s)", xname, x_merge_type, iname, i_merge_type, class="dt_join_type_mismatch_error")
110110
}
111111
if (x_merge_type=="integer64" || i_merge_type=="integer64") {
112112
nm = c(iname, xname)
113113
if (x_merge_type=="integer64") { w=i; wc=icol; wclass=i_merge_type; } else { w=x; wc=xcol; wclass=x_merge_type; setfrev(nm) } # w is which to coerce
114114
if (wclass=="integer" || (wclass=="double" && fitsInInt64(w[[wc]]))) {
115115
from_detail = if (wclass == "double") gettext(" (which has integer64 representation, e.g. no fractions)") else ""
116116
coerce_col(w, wc, wclass, "integer64", nm[1L], nm[2L], from_detail, verbose=verbose)
117-
} else stopf("Incompatible join types: %s is type integer64 but %s is type double and cannot be coerced to integer64 (e.g. has fractions)", nm[2L], nm[1L])
117+
} else stopf("Incompatible join types: %s is type integer64 but %s is type double and cannot be coerced to integer64 (e.g. has fractions)", nm[2L], nm[1L], class="dt_join_type_mismatch_error")
118118
} else {
119119
# just integer and double left
120120
ic_idx = which(icol == icols) # check if on is joined on multiple conditions, #6602

R/groupingsets.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ rollup = function(x, ...) {
44
rollup.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) {
55
# input data type basic validation
66
if (!is.data.table(x))
7-
stopf("Argument 'x' must be a data.table object")
7+
stopf("Argument 'x' must be a data.table object", class="dt_invalid_input_error")
88
if (!is.character(by))
99
stopf("Argument 'by' must be a character vector of column names used in grouping.")
1010
if (!is.logical(id))
@@ -22,7 +22,7 @@ cube = function(x, ...) {
2222
cube.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) {
2323
# input data type basic validation
2424
if (!is.data.table(x))
25-
stopf("Argument 'x' must be a data.table object")
25+
stopf("Argument 'x' must be a data.table object", class="dt_invalid_input_error")
2626
if (!is.character(by))
2727
stopf("Argument 'by' must be a character vector of column names used in grouping.")
2828
if (!is.logical(id))

R/merge.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
3434
warningf("Supplied both `by` and `by.x`/`by.y`. `by` argument will be ignored.")
3535
if (!is.null(by.x)) {
3636
if (length(by.x) == 0L || !is.character(by.x) || !is.character(by.y))
37-
stopf("A non-empty vector of column names is required for `by.x` and `by.y`.")
37+
stopf("A non-empty vector of column names is required for `by.x` and `by.y`.", class="dt_invalid_input_error")
3838
if (!all(idx <- by.x %chin% nm_x)) {
3939
stopf("The following columns listed in `%s` are missing from %s: %s", "by.x", "x", brackify(by.x[!idx]))
4040
}

R/setkey.R

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ setkeyv = function(x, cols, verbose=getOption("datatable.verbose"), physical=TRU
4343
if (!all(nzchar(cols))) stopf("cols contains some blanks.")
4444
cols = gsub("`", "", cols, fixed = TRUE)
4545
miss = !(cols %chin% colnames(x))
46-
if (any(miss)) stopf("some columns are not in the data.table: %s", brackify(cols[miss]))
46+
if (any(miss)) stopf("some columns are not in the data.table: %s", brackify(cols[miss]), class = "dt_missing_column_error")
4747

4848
if (physical && identical(head(key(x), length(cols)), cols)){ ## for !physical we need to compute groups as well #4387
4949
## key is present but x has a longer key. No sorting needed, only attribute is changed to shorter key.
@@ -54,7 +54,7 @@ setkeyv = function(x, cols, verbose=getOption("datatable.verbose"), physical=TRU
5454
if (".xi" %chin% names(x)) stopf("x contains a column called '.xi'. Conflicts with internal use by data.table.")
5555
for (i in cols) {
5656
.xi = x[[i]] # [[ is copy on write, otherwise checking type would be copying each column
57-
if (!typeof(.xi) %chin% ORDERING_TYPES) stopf("Column '%s' is type '%s' which is not supported as a key column type, currently.", i, typeof(.xi))
57+
if (!typeof(.xi) %chin% ORDERING_TYPES) stopf("Column '%s' is type '%s' which is not supported as a key column type, currently.", i, typeof(.xi), class="dt_unsortable_type_error")
5858
}
5959
if (!is.character(cols) || length(cols)<1L) internal_error("'cols' should be character at this point") # nocov
6060

@@ -266,11 +266,11 @@ setorderv = function(x, cols = colnames(x), order=1L, na.last=FALSE)
266266
# remove backticks from cols
267267
cols = gsub("`", "", cols, fixed = TRUE)
268268
miss = !(cols %chin% colnames(x))
269-
if (any(miss)) stopf("some columns are not in the data.table: %s", brackify(cols[miss]))
269+
if (any(miss)) stopf("some columns are not in the data.table: %s", brackify(cols[miss]), class = "dt_missing_column_error")
270270
if (".xi" %chin% colnames(x)) stopf("x contains a column called '.xi'. Conflicts with internal use by data.table.")
271271
for (i in cols) {
272272
.xi = x[[i]] # [[ is copy on write, otherwise checking type would be copying each column
273-
if (!typeof(.xi) %chin% ORDERING_TYPES) stopf("Column '%s' is type '%s' which is not supported for ordering currently.", i, typeof(.xi))
273+
if (!typeof(.xi) %chin% ORDERING_TYPES) stopf("Column '%s' is type '%s' which is not supported for ordering currently.", i, typeof(.xi), class="dt_unsortable_type_error")
274274
}
275275
if (!is.character(cols) || length(cols)<1L) internal_error("'cols' should be character at this point") # nocov
276276

R/setops.R

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,11 @@ setdiff_ = function(x, y, by.x=seq_along(x), by.y=seq_along(y), use.names=FALSE)
1414
icnam = names(y)[lc]
1515
xcnam = names(x)[rc]
1616
if ( is.character(x[[rc]]) && !(is.character(y[[lc]]) || is.factor(y[[lc]])) ) {
17-
stopf("When x's column ('%s') is character, the corresponding column in y ('%s') should be factor or character, but found incompatible type '%s'.", xcnam, icnam, typeof(y[[lc]]))
17+
stopf("When x's column ('%s') is character, the corresponding column in y ('%s') should be factor or character, but found incompatible type '%s'.", xcnam, icnam, typeof(y[[lc]]), class="dt_join_type_mismatch_error")
1818
} else if ( is.factor(x[[rc]]) && !(is.character(y[[lc]]) || is.factor(y[[lc]])) ) {
19-
stopf("When x's column ('%s') is factor, the corresponding column in y ('%s') should be character or factor, but found incompatible type '%s'.", xcnam, icnam, typeof(y[[lc]]))
19+
stopf("When x's column ('%s') is factor, the corresponding column in y ('%s') should be character or factor, but found incompatible type '%s'.", xcnam, icnam, typeof(y[[lc]]), class="dt_join_type_mismatch_error")
2020
} else if ( (is.integer(x[[rc]]) || is.double(x[[rc]])) && (is.logical(y[[lc]]) || is.character(y[[lc]])) ) {
21-
stopf("When x's column ('%s') is integer or numeric, the corresponding column in y ('%s') can not be character or logical types, but found incompatible type '%s'.", xcnam, icnam, typeof(y[[lc]]))
21+
stopf("When x's column ('%s') is integer or numeric, the corresponding column in y ('%s') can not be character or logical types, but found incompatible type '%s'.", xcnam, icnam, typeof(y[[lc]]), class="dt_join_type_mismatch_error")
2222
}
2323
}
2424
ux = unique(shallow(x, by.x))
@@ -52,7 +52,7 @@ funique = function(x) {
5252
}
5353
if (!identical(sx<-sapply(x, super), sy<-sapply(y, super))) {
5454
w = which.first(sx!=sy)
55-
stopf("Item %d of x is '%s' but the corresponding item of y is '%s'.", w, class1(x[[w]]), class1(y[[w]]))
55+
stopf("Item %d of x is '%s' but the corresponding item of y is '%s'.", w, class1(x[[w]]), class1(y[[w]]), class="dt_join_type_mismatch_error")
5656
}
5757
if (.seqn && ".seqn" %chin% names(x)) stopf("None of the datasets should contain a column named '.seqn'")
5858
}

inst/tests/tests.Rraw

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21524,6 +21524,12 @@ test(2332.81, {M1[]=frev(M1); M1}, {M2[]=rev(M2); M2})
2152421524
# regression test of edge case report #4964
2152521525
test(2333, as.expression(data.table(a = 1))[["a"]], 1)
2152621526

21527+
# regression test for hexdigits subscript overrun (uint8_t wraps over 255, unsigned overflow is well defined in c)
21528+
f = tempfile()
21529+
writeLines(c('a', rep('0x1.ffffp0', 10000L), '0x1.ff\x9fp0', rep('0x1.ffffp0', 20000L)), f)
21530+
test(2334, names(fread(f)), "a")
21531+
unlink(f)
21532+
2152721533
#2606 Recursive tables() naming convention
2152821534
local({
2152921535
lst_named <- list(inner = data.table(a = 1))
@@ -21536,15 +21542,15 @@ local({
2153621542
"lst_named$inner", "lst_unnamed[[1]]", "nested$l1$l2",
2153721543
"mixed[[1]]", "mixed$y",
2153821544
"mixed_nested$A[[1]]", "mixed_nested$A$q")
21539-
test(2334.1, identical(out, sort(expected)))
21545+
test(2335.1, identical(out, sort(expected)))
2154021546
})
2154121547
local({
2154221548
dt <- data.table(val = 42)
2154321549
e <- new.env()
2154421550
e$dt <- dt
2154521551
e$self <- e
2154621552
out <- tables(recursive = TRUE, env = e)$NAME
21547-
test(2334.2, identical(out, "dt"))
21553+
test(2335.2, identical(out, "dt"))
2154821554
})
2154921555
local({
2155021556
test_obj <- local({
@@ -21555,7 +21561,7 @@ local({
2155521561
outer_list$second <- outer_list$first
2155621562
outer_list})
2155721563
out <- tables(recursive = TRUE)$NAME
21558-
test(2334.3,
21564+
test(2335.3,
2155921565
(length(out) == 2) &&
2156021566
("test_obj$unique" %in% out) &&
2156121567
(sum(grepl("\\$dt_inner$", out)) == 1))
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
\name{data.table-condition-classes}
2+
\alias{data.table-condition-classes}
3+
\title{Condition Handling with Classed Conditions}
4+
\description{
5+
\code{data.table} provides specific condition classes for common operations, making it easier to handle conditions programmatically. This is particularly useful when writing robust code or packages that use \code{data.table}. Relying on the exact text of condition messages is fragile (it is not uncommon to change the wording slightly, or for the user's session not to be in English); prefer using the signal class where possible.
6+
}
7+
\details{
8+
\subsection{Available Condition Classes}{
9+
\code{data.table} provides the following specific condition classes:
10+
11+
Error Classes:
12+
\itemize{
13+
\item \code{dt_missing_column_error}: When referencing columns that don't exist
14+
\item \code{dt_invalid_input_error}: When providing invalid input types or empty required arguments
15+
\item \code{dt_unsortable_type_error}: When trying to sort/key unsupported types
16+
\item \code{dt_join_type_mismatch_error}: When column types are incompatible in joins/set operations
17+
\item \code{dt_invalid_let_error}: When using assignment operators incorrectly
18+
}
19+
20+
Warning Classes:
21+
\itemize{
22+
\item \code{dt_missing_fun_aggregate_warning}: When aggregation function is missing in operations that require it
23+
}
24+
}
25+
26+
\subsection{Backward Compatibility}{
27+
All condition classes inherit from base R's condition system, so existing \code{tryCatch(..., error = ...)} code continues to work unchanged. The new classes simply provide more specific handling options when needed.
28+
}
29+
}
30+
\examples{
31+
32+
# Handle missing column errors specifically
33+
DT <- data.table(a = 1:3, b = 4:6)
34+
tryCatch({
35+
setkey(DT, nonexistent_col)
36+
}, dt_missing_column_error = function(e) {
37+
cat("Missing column detected:", conditionMessage(e), "\n")
38+
}, error = function(e) {
39+
cat("Other error:", conditionMessage(e), "\n")
40+
})
41+
}
42+
\seealso{
43+
\code{\link{tryCatch}}, \code{\link{test}}, \url{https://adv-r.hadley.nz/conditions.html}
44+
}

0 commit comments

Comments
 (0)