Skip to content

Commit 57effbc

Browse files
committed
Merge branch 'master' into froll2025max
2 parents b89aec7 + 053d905 commit 57effbc

File tree

146 files changed

+8830
-2276
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

146 files changed

+8830
-2276
lines changed

.ci/.lintr.R

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ linters = c(dt_linters, all_linters(
2121
message = "Use messagef to avoid fragmented translations.",
2222
warning = "Use warningf to avoid fragmented translations.",
2323
stop = "Use stopf to avoid fragmented translations.",
24+
rev = "Use frev internally, or setfrev if by-reference is safe.",
2425
NULL
2526
)),
2627
# undesirable_function_linter(modify_defaults(

.ci/README.md

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# data.table continuous integration and deployment
22

3-
On each Pull Request opened in GitHub we run GitHub Actions test jobs to provide prompt feedback about the status of PR. Our more thorough main CI pipeline runs nightly on GitLab CI. GitLab repository automatically mirrors our GitHub repository and runs pipeline on `master` branch every night. It tests more environments and different configurations. It publishes a variety of artifacts such as our [homepage](https://rdatatable.gitlab.io/data.table/) and [CRAN-like website for dev version](https://rdatatable.gitlab.io/data.table/web/packages/data.table/index.html), including windows binaries for the dev version.
3+
On each Pull Request opened in GitHub we run GitHub Actions test jobs to provide prompt feedback about the status of PR. Our more thorough main CI pipeline runs nightly on GitLab CI. In addition to branches pushed directly, the GitLab repository automatically mirrors our GitHub repository and runs pipeline on the `master` branch every night. It tests more environments and different configurations. It publishes a variety of artifacts such as our [homepage](https://rdatatable.gitlab.io/data.table/) and [CRAN-like website for dev version](https://rdatatable.gitlab.io/data.table/web/packages/data.table/index.html), including windows binaries for the dev version.
44

55
## Environments
66

@@ -50,6 +50,23 @@ Base R implemented helper script, [originally proposed to base R](https://svn.r-
5050

5151
Base R implemented helper script to orchestrate generation of most artifacts and to arrange them nicely. It is being used only in [_integration_ stage in GitLab CI pipeline](./../.gitlab-ci.yml).
5252

53+
### [`lint.R`](./lint.R)
54+
55+
Base R runner for the manual (non-`lintr`) lint checks to be run from GitHub Actions during the code quality check. The command line arguments are as follows:
56+
1. Path to the directory containing files defining the linters. A linter is a function that accepts one argument (typically the path to the file) and signals an error if it fails the lint check.
57+
2. Path to the directory containing files to check.
58+
3. A regular expression matching the files to check.
59+
60+
One of the files in the linter directory may define the `.preprocess` function, which must accept one file path and return a value that other linter functions will understand. The function may also return `NULL` to indicate that the file must be skipped.
61+
62+
Example command lines:
63+
64+
```sh
65+
Rscript .ci/lint.R .ci/linters/c src '[.][ch]$'
66+
Rscript .ci/lint.R .ci/linters/po po '[.]po$'
67+
Rscript .ci/lint.R .ci/linters/md . '[.]R?md$'
68+
```
69+
5370
## GitLab Open Source Program
5471

5572
We are currently part of the [GitLab for Open Source Program](https://about.gitlab.com/solutions/open-source/). This gives us 50,000 compute minutes per month for our GitLab CI. Our license needs to be renewed yearly (around July) and is currently managed by @ben-schwen.

.ci/atime/tests.R

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,5 +277,14 @@ test.list <- atime::atime_test_list(
277277
Slow = "73d79edf8ff8c55163e90631072192301056e336", # Parent of the first commit in the PR (https://github.com/Rdatatable/data.table/commit/8397dc3c993b61a07a81c786ca68c22bc589befc)
278278
Fast = "8397dc3c993b61a07a81c786ca68c22bc589befc"), # Commit in the PR (https://github.com/Rdatatable/data.table/pull/7019/commits) that removes inefficiency
279279

280+
"isoweek improved in #7144" = atime::atime_test(
281+
setup = {
282+
set.seed(349)
283+
x = sample(Sys.Date() - 0:5000, N, replace=TRUE)
284+
},
285+
expr = data.table::isoweek(x),
286+
Slow = "548410d23dd74b625e8ea9aeb1a5d2e9dddd2927", # Parent of the first commit in the PR (https://github.com/Rdatatable/data.table/commit/548410d23dd74b625e8ea9aeb1a5d2e9dddd2927)
287+
Fast = "c0b32a60466bed0e63420ec105bc75c34590865e"), # Commit in the PR (https://github.com/Rdatatable/data.table/pull/7144/commits) that uses a much faster implementation
288+
280289
tests=extra.test.list)
281290
# nolint end: undesirable_operator_linter.

.ci/ci.R

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -155,10 +155,11 @@ function(pkgs,
155155
db <- utils::available.packages(repos.url, type = type)
156156
allpkgs <- c(pkgs, unlist(tools::package_dependencies(unique(pkgs), db, which, recursive = TRUE), use.names = FALSE))
157157
except <- c("R", unlist(tools:::.get_standard_package_names()[except.priority], use.names = FALSE))
158-
## do not re-download existing packages, ignore version
158+
## do not re-download existing packages with the right version
159159
if (length(except.repodir) && file.exists(file.path(contrib.url(except.repodir, type = type, ver = binary.ver), "PACKAGES"))) {
160160
except.curl <- contrib.url(file.path("file:", normalizePath(except.repodir)), type = type, ver = binary.ver)
161-
except <- c(except, rownames(utils::available.packages(except.curl, type = type, fields = "Package")))
161+
except.db <- utils::available.packages(except.curl, type = type, fields = "Package")
162+
except <- c(except, merge(db, except.db, by = c("Package", "Version", "MD5sum"))[,"Package"])
162163
}
163164
newpkgs <- setdiff(allpkgs, except)
164165
if (!all(availpkgs<-newpkgs %in% rownames(db))) {
@@ -174,6 +175,13 @@ function(pkgs,
174175
"source" = "tar.gz",
175176
"mac.binary" = "tgz",
176177
"win.binary" = "zip")
178+
## clean up stale package files for which new versions will be downloaded
179+
if (file.exists(file.path(destdir, "PACKAGES"))) {
180+
repo.db <- utils::available.packages(file.path("file:", normalizePath(destdir)), type = type)
181+
oldver <- repo.db[repo.db[, "Package"] %in% newpkgs, c("Package", "Version"), drop=FALSE]
182+
oldfiles <- file.path(destdir, sprintf("%s_%s.%s", oldver[,"Package"], oldver[,"Version"], pkgsext))
183+
unlink(oldfiles[file.exists(oldfiles)])
184+
}
177185
pkgsver <- db[db[, "Package"] %in% newpkgs, c("Package", "Version"), drop=FALSE]
178186
dlfiles <- file.path(destdir, sprintf("%s_%s.%s", pkgsver[,"Package"], pkgsver[,"Version"], pkgsext))
179187
unlink(dlfiles[file.exists(dlfiles)])

.ci/lint.R

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#!/usr/bin/Rscript
2+
# Runner for the manual lint checks in .ci/linters
3+
args = commandArgs(TRUE)
4+
if (identical(args, '--help')) {
5+
writeLines(c(
6+
'Usage: Rscript .ci/lint.R .ci/linters/<KIND> <WHERE> <WHAT> [PREPROCESS]',
7+
'KIND must name the directory containing the *.R files defining the linter functions.',
8+
'WHERE must name the directory containing the files to lint, e.g. "po", or "src".',
9+
"WHAT must contain the regular expression matching the files to lint, e.g., '[.]po$', or '[.][ch]$'.",
10+
))
11+
q('no')
12+
}
13+
stopifnot(`Invalid arguments, see .ci/lint.R --help` = length(args) == 3)
14+
15+
linter_env = list2env(list(.preprocess = identity))
16+
for (f in list.files(args[[1]], full.names=TRUE)) sys.source(f, linter_env)
17+
if (!length(ls(linter_env))) stop(
18+
"No linters found after sourcing files in ", dQuote(args[[1]])
19+
)
20+
21+
sources = list.files(args[[2]], pattern = args[[3]], full.names = TRUE, recursive = TRUE)
22+
if (!length(sources)) stop(
23+
"No files to lint found in directory ", dQuote(args[[2]]), " for mask ", dQuote(args[[3]])
24+
)
25+
sources = Filter(Negate(is.null), lapply(setNames(nm = sources), linter_env$.preprocess))
26+
27+
okay = TRUE
28+
for (src in names(sources))
29+
for (linter in ls(linter_env)) tryCatch(
30+
linter_env[[linter]](sources[[src]]),
31+
error = function(e) {
32+
message('Source file ', dQuote(src), ' failed lint check ', dQuote(linter), ': ', conditionMessage(e))
33+
okay <<- FALSE
34+
}
35+
)
36+
stopifnot(`Please fix the issues above.` = okay)

.ci/linters/c/00preprocess.R

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
.preprocess = function (f) list(
2+
c_obj = f, lines = readLines(f),
3+
preprocessed = system2(
4+
"gcc", shQuote(c("-fpreprocessed", "-E", f)),
5+
stdout = TRUE, stderr = FALSE
6+
)
7+
)
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# ensure that GitHub link text & URL actually agree
2+
news_github_link_mismatch_linter = function(news) {
3+
if (!grepl("NEWS", news)) return(invisible())
4+
news = readLines(news)
5+
gh_links_info = gregexpr(
6+
"\\[#(?<md_number>[0-9]+)\\]\\(https://github.com/Rdatatable/data.table/(?<link_type>[^/]+)/(?<link_number>[0-9]+)\\)",
7+
news,
8+
perl=TRUE # required for within-group indices
9+
)
10+
gh_link_metadata = do.call(rbind, lapply(seq_along(gh_links_info), function(idx) {
11+
x = gh_links_info[[idx]]
12+
if (x[1L] <= 0L) return(NULL)
13+
match_mat = attr(x, "capture.start") # matrix seeded with the correct dimensions
14+
match_mat[] = substring(news[idx], match_mat, match_mat + attr(x, "capture.length") - 1L)
15+
match_df = data.frame(match_mat)
16+
match_df$line_number = idx
17+
match_df
18+
}))
19+
matched = gh_link_metadata$md_number == gh_link_metadata$link_number
20+
if (all(matched)) return(FALSE)
21+
22+
cat(sep = "", with(gh_link_metadata[!matched, ], sprintf(
23+
"In line %d, link pointing to %s %s is written #%s\n",
24+
line_number, link_type, link_number, md_number
25+
)))
26+
stop("Please fix the NEWS issues above.")
27+
}

.ci/linters/md/news_linter.R

Lines changed: 0 additions & 61 deletions
This file was deleted.
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# ensure that numbered list in each section is in sequence
2+
news_section_numbering_linter = function(news) {
3+
if (!grepl("NEWS", news)) return(invisible())
4+
news = readLines(news)
5+
# plain '#' catches some examples; 'd' for 'data.table'
6+
sections = grep("^#+ [A-Zd]", news)
7+
entries = grep("^[0-9]+[.]", news)
8+
entry_value = as.integer(gsub("^([0-9]+)[.].*", "\\1", news[entries]))
9+
section_id = findInterval(entries, sections)
10+
11+
any_mismatch = FALSE
12+
for (id in unique(section_id)) {
13+
section_entries = entry_value[section_id == id]
14+
intended_value = seq_along(section_entries)
15+
matched = section_entries == intended_value
16+
if (all(matched)) next
17+
any_mismatch = TRUE
18+
section_header = news[sections[id]]
19+
cat(sprintf(
20+
"In section '%s' (line %d), bad numbering:\n%s\n",
21+
section_header, sections[id],
22+
paste0(" [", section_entries[!matched], " --> ", intended_value[!matched], "]", collapse="\n")
23+
))
24+
}
25+
stopifnot("Please fix the NEWS issues above" = !any_mismatch)
26+
}
Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
any_mismatch = FALSE
2-
31
# ensure that ids are limited to alphanumerics and dashes
42
# (in particular, dots and underscores break the links)
5-
check_header_ids = function(md) {
3+
vignette_heading_id_linter = function(md) {
4+
if (!grepl('[.]Rmd$', md)) return(invisible())
5+
md = readLines(md)
66
# A bit surprisingly, some headings don't start with a letter.
77
# We're interested in those that set an id to link to, i.e., end with {#id}.
88
heading_captures = regmatches(md, regexec("^#+ \\S.*[{]#([^}]*)[}]$", md))
@@ -14,13 +14,5 @@ check_header_ids = function(md) {
1414
"On line %d, bad heading id '%s':\n%s\n",
1515
line, heading_captures[[line]][2], heading_captures[[line]][1]
1616
))
17-
!all(good_ids)
17+
stopifnot('Please fix the vignette issues above' = all(good_ids))
1818
}
19-
20-
any_error = FALSE
21-
for (vignette in list.files('vignettes', pattern = "[.]Rmd$", recursive = TRUE, full.name = TRUE)) {
22-
cat(sprintf("Checking vignette file %s...\n", vignette))
23-
rmd_lines = readLines(vignette)
24-
any_error = check_header_ids(rmd_lines) || any_error
25-
}
26-
if (any_error) stop("Please fix the vignette issues above.")

0 commit comments

Comments
 (0)