Skip to content

Commit 6b6da26

Browse files
committed
Merge branch 'master' into frev
2 parents ccd9ee6 + c8c35f3 commit 6b6da26

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

101 files changed

+2203
-1126
lines changed

.Rbuildignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
.dir-locals.el
22
^\.Rprofile$
33
^data\.table_.*\.tar\.gz$
4+
^config\.log$
45
^vignettes/plots/figures$
56
^\.Renviron$
67
^[^/]+\.R$
@@ -16,7 +17,6 @@
1617
^\.graphics$
1718
^\.github$
1819

19-
^\.appveyor\.yml$
2020
^\.gitlab-ci\.yml$
2121

2222
^Makefile$

.appveyor.yml

Lines changed: 0 additions & 71 deletions
This file was deleted.

.ci/.lintr.R

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
dt_linters = new.env()
2+
for (f in list.files('.ci/linters', full.names=TRUE)) sys.source(f, dt_linters)
3+
rm(f)
4+
5+
# NB: Could do this inside the linter definition, this separation makes those files more standardized
6+
dt_linters <- eapply(dt_linters, function(linter_factory) linter_factory())
7+
8+
linters = c(dt_linters, all_linters(
9+
packages = "lintr", # TODO(lintr->3.2.0): Remove this.
10+
# eq_assignment_linter(),
11+
brace_linter(allow_single_line = TRUE),
12+
# TODO(michaelchirico): Activate these incrementally. These are the
13+
# parameterizations that match our style guide.
14+
# implicit_assignment_linter(allow_lazy = TRUE, allow_scoped = TRUE),
15+
# implicit_integer_linter(allow_colon = TRUE),
16+
# system_time_linter = undesirable_function_linter(c(
17+
# system.time = "Only run timings in benchmark.Rraw"
18+
# )),
19+
# undesirable_function_linter(modify_defaults(
20+
# default_undesirable_functions,
21+
# ifelse = "Use fifelse instead.",
22+
# Sys.setenv = NULL,
23+
# library = NULL,
24+
# options = NULL,
25+
# par = NULL,
26+
# setwd = NULL
27+
# )),
28+
undesirable_operator_linter(),
29+
# TODO(lintr#2441): Use upstream implementation.
30+
assignment_linter = NULL,
31+
absolute_path_linter = NULL, # too many false positives
32+
# TODO(lintr#2442): Use this once x[ , j, by] is supported.
33+
commas_linter = NULL,
34+
commented_code_linter = NULL,
35+
# TODO(linter->3.2.0): Activate this.
36+
consecutive_assertion_linter = NULL,
37+
cyclocomp_linter = NULL,
38+
function_argument_linter = NULL,
39+
indentation_linter = NULL,
40+
infix_spaces_linter = NULL,
41+
line_length_linter = NULL,
42+
missing_package_linter = NULL,
43+
namespace_linter = NULL,
44+
nonportable_path_linter = NULL,
45+
object_name_linter = NULL,
46+
object_usage_linter = NULL,
47+
quotes_linter = NULL,
48+
semicolon_linter = NULL,
49+
spaces_inside_linter = NULL,
50+
spaces_left_parentheses_linter = NULL,
51+
# TODO(michaelchirico): Only exclude from vignettes, not sure what's wrong.
52+
strings_as_factors_linter = NULL,
53+
# TODO(lintr->3.2.0): Fix on a valid TODO style, enforce it, and re-activate.
54+
todo_comment_linter = NULL,
55+
# TODO(michaelchirico): Enforce these and re-activate them one-by-one.
56+
brace_linter = NULL,
57+
fixed_regex_linter = NULL,
58+
if_not_else_linter = NULL,
59+
implicit_assignment_linter = NULL,
60+
implicit_integer_linter = NULL,
61+
keyword_quote_linter = NULL,
62+
object_overwrite_linter = NULL,
63+
paren_body_linter = NULL,
64+
redundant_equals_linter = NULL,
65+
undesirable_function_linter = NULL,
66+
unnecessary_concatenation_linter = NULL,
67+
unnecessary_nesting_linter = NULL,
68+
unreachable_code_linter = NULL,
69+
unused_import_linter = NULL
70+
))
71+
rm(dt_linters)
72+
73+
# TODO(lintr#2172): Glob with lintr itself.
74+
exclusions = c(local({
75+
exclusion_for_dir <- function(dir, exclusions) {
76+
files = file.path("..", list.files(dir, pattern = "\\.(R|Rmd|Rraw)$", full.names=TRUE))
77+
stats::setNames(rep(list(exclusions), length(files)), files)
78+
}
79+
c(
80+
exclusion_for_dir("tests", list(
81+
quotes_linter = Inf,
82+
# TODO(michaelchirico): Enforce these and re-activate them one-by-one.
83+
implicit_integer_linter = Inf,
84+
infix_spaces_linter = Inf,
85+
undesirable_function_linter = Inf
86+
)),
87+
exclusion_for_dir("vignettes", list(
88+
quotes_linter = Inf,
89+
sample_int_linter = Inf
90+
# strings_as_factors_linter = Inf
91+
# system_time_linter = Inf
92+
)),
93+
exclusion_for_dir("inst/tests", list(
94+
library_call_linter = Inf,
95+
numeric_leading_zero_linter = Inf,
96+
undesirable_operator_linter = Inf, # For ':::', possibly we could be more careful to only exclude ':::'.
97+
# TODO(michaelchirico): Enforce these and re-activate them one-by-one.
98+
comparison_negation_linter = Inf,
99+
condition_call_linter = Inf,
100+
duplicate_argument_linter = Inf,
101+
equals_na_linter = Inf,
102+
missing_argument_linter = Inf,
103+
paste_linter = Inf,
104+
rep_len_linter = Inf,
105+
sample_int_linter = Inf,
106+
seq_linter = Inf,
107+
unnecessary_lambda_linter = Inf
108+
))
109+
)
110+
}),
111+
list(`../inst/tests/froll.Rraw` = list(dt_test_literal_linter = Inf)) # TODO(michaelchirico): Fix these once #5898, #5692, #5682, #5576, #5575, #5441 are merged.
112+
)

.ci/README.md

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,6 @@ Artifacts:
3535

3636
TODO document
3737

38-
### [Appveyor](./../.appveyor.yml)
39-
40-
TODO document
41-
4238
## CI tools
4339

4440
### [`ci.R`](./ci.R)

.ci/atime/tests.R

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
# A list of performance tests.
2+
#
3+
# Each entry in this list corresponds to a performance test and contains a sublist with three mandatory arguments:
4+
# - N: A numeric sequence of data sizes to vary.
5+
# - setup: An expression evaluated for every data size before measuring time/memory.
6+
# - expr: An expression that will be evaluated for benchmarking performance across different git commit versions.
7+
# This must call a function from data.table using a syntax with double or triple colon prefix.
8+
# The package name before the colons will be replaced by a new package name that uses the commit SHA hash.
9+
# (For instance, data.table:::[.data.table will become data.table.some_40_digit_SHA1_hash:::[.data.table)
10+
#
11+
# Optional parameters that may be useful to configure tests:
12+
# - times: Number of times each expression is evaluated (default is 10).
13+
# - seconds.limit: The maximum median timing (in seconds) of an expression. No timings for larger N are computed past that threshold. Default of 0.01 seconds should be sufficient for most tests.
14+
# - Historical references (short version name = commit SHA1).
15+
# For historical regressions, use version names 'Before', 'Regression', and 'Fixed'
16+
# When there was no regression, use 'Slow' and 'Fast'
17+
# @note Please check https://github.com/tdhock/atime/blob/main/vignettes/data.table.Rmd for more information.
18+
# nolint start: undesirable_operator_linter. ':::' needed+appropriate here.
19+
test.list <- atime::atime_test_list(
20+
# A function to customize R package metadata and source files to facilitate version-specific installation and testing.
21+
#
22+
# This is specifically tailored for handling data.table which requires specific changes in non-standard files (such as the object file name in Makevars and version checking code in onLoad.R)
23+
# to support testing across different versions (base and HEAD for PRs, commits associated with historical regressions, etc.) of the package.
24+
# It appends a SHA1 hash to the package name (PKG.SHA), ensuring each version can be installed and tested separately.
25+
#
26+
# @param old.Package Current name of the package.
27+
# @param new.Package New name of the package, including a SHA hash.
28+
# @param sha SHA1 hash used for differentiating versions.
29+
# @param new.pkg.path Path to the package files.
30+
#
31+
# @details
32+
# The function modifies:
33+
# - DESCRIPTION, updating the package name.
34+
# - Makevars, customizing the shared object file name and adjusting the build settings.
35+
# - R/onLoad.R, adapting custom version checking for package loading operations.
36+
# - NAMESPACE, changing namespace settings for dynamic linking.
37+
#
38+
# @examples
39+
# pkg.edit.fun("data.table", "data.table.some_SHA1_hash", "some_SHA1_hash", "/path/to/data.table")
40+
#
41+
# @return None (performs in-place file modifications)
42+
# @note This setup is typically unnecessary for most packages but essential for data.table due to its unique configuration.
43+
pkg.edit.fun = function(old.Package, new.Package, sha, new.pkg.path) {
44+
pkg_find_replace <- function(glob, FIND, REPLACE) {
45+
atime::glob_find_replace(file.path(new.pkg.path, glob), FIND, REPLACE)
46+
}
47+
Package_regex <- gsub(".", "_?", old.Package, fixed = TRUE)
48+
Package_ <- gsub(".", "_", old.Package, fixed = TRUE)
49+
new.Package_ <- paste0(Package_, "_", sha)
50+
pkg_find_replace(
51+
"DESCRIPTION",
52+
paste0("Package:\\s+", old.Package),
53+
paste("Package:", new.Package))
54+
pkg_find_replace(
55+
file.path("src", "Makevars.*in"),
56+
Package_regex,
57+
new.Package_)
58+
pkg_find_replace(
59+
file.path("R", "onLoad.R"),
60+
Package_regex,
61+
new.Package_)
62+
pkg_find_replace(
63+
file.path("R", "onLoad.R"),
64+
sprintf('packageVersion\\("%s"\\)', old.Package),
65+
sprintf('packageVersion\\("%s"\\)', new.Package))
66+
pkg_find_replace(
67+
file.path("src", "init.c"),
68+
paste0("R_init_", Package_regex),
69+
paste0("R_init_", gsub("[.]", "_", new.Package_)))
70+
pkg_find_replace(
71+
"NAMESPACE",
72+
sprintf('useDynLib\\("?%s"?', Package_regex),
73+
paste0('useDynLib(', new.Package_))
74+
},
75+
76+
# Performance regression discussed in: https://github.com/Rdatatable/data.table/issues/4311
77+
# Fixed in: https://github.com/Rdatatable/data.table/pull/4440
78+
"shallow regression fixed in #4440" = atime::atime_test(
79+
N = 10^seq(3,8),
80+
setup = {
81+
set.seed(1L)
82+
dt <- data.table(a = sample.int(N))
83+
setindexv(dt, "a")
84+
},
85+
expr = data.table:::shallow(dt),
86+
# Before = "", This needs to be updated later as there are two issues here: A) The source of regression (or the particular commit that led to it) is not clear; B) Older versions of data.table are having problems when being installed in this manner (This includes commits from before March 20 2020, when the issue that discovered or first mentioned the regression was created)
87+
Regression = "b1b1832b0d2d4032b46477d9fe6efb15006664f4", # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/0f0e7127b880df8459b0ed064dc841acd22f5b73) in the PR (https://github.com/Rdatatable/data.table/pull/4440/commits) that fixes the regression
88+
Fixed = "9d3b9202fddb980345025a4f6ac451ed26a423be"), # Merge commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/4440)
89+
90+
# Test based on: https://github.com/Rdatatable/data.table/issues/5424
91+
# Performance regression introduced from a commit in: https://github.com/Rdatatable/data.table/pull/4491
92+
# Fixed in: https://github.com/Rdatatable/data.table/pull/5463
93+
"memrecycle regression fixed in #5463" = atime::atime_test(
94+
N = 10^seq(3, 8),
95+
setup = {
96+
n <- N/100
97+
set.seed(2L)
98+
dt <- data.table(
99+
g = sample(seq_len(n), N, TRUE),
100+
x = runif(N),
101+
key = "g")
102+
dt_mod <- copy(dt)
103+
},
104+
expr = data.table:::`[.data.table`(dt_mod, , N := .N, by = g),
105+
Before = "be2f72e6f5c90622fe72e1c315ca05769a9dc854", # Parent of the regression causing commit (https://github.com/Rdatatable/data.table/commit/e793f53466d99f86e70fc2611b708ae8c601a451) in the PR that introduced the issue (https://github.com/Rdatatable/data.table/pull/4491/commits)
106+
Regression = "e793f53466d99f86e70fc2611b708ae8c601a451", # Commit responsible for regression in the PR that introduced the issue (https://github.com/Rdatatable/data.table/pull/4491/commits)
107+
Fixed = "58409197426ced4714af842650b0cc3b9e2cb842"), # Last commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/5463/commits)
108+
109+
# Issue reported in: https://github.com/Rdatatable/data.table/issues/5426
110+
# To be fixed in: https://github.com/Rdatatable/data.table/pull/5427
111+
"setDT improved in #5427" = atime::atime_test(
112+
N = 10^seq(1, 7),
113+
setup = {
114+
L <- replicate(N, 1, simplify = FALSE)
115+
setDT(L)
116+
},
117+
expr = {
118+
data.table:::setattr(L, "class", NULL)
119+
data.table:::setDT(L)
120+
},
121+
Slow = "c4a2085e35689a108d67dacb2f8261e4964d7e12", # Parent of the first commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/commit/7cc4da4c1c8e568f655ab5167922dcdb75953801)
122+
Fast = "1872f473b20fdcddc5c1b35d79fe9229cd9a1d15") # Last commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/pull/5427/commits)
123+
)
124+
# nolint end: undesirable_operator_linter.

.ci/linters/dt_lengths_linter.R

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# This is lintr::lengths_linter(), but including 'vapply_1i()' which is more common internally in data.table
2+
# alternatively, this could be defined with something like
3+
# dt_lengths_linter <- lintr::lengths_linter()
4+
# local({ # to remove 'e'
5+
# e <- environment(dt_lengths_linter)
6+
# e$function_names <- c(evalq(function_names, e), "vapply_1i")
7+
# })
8+
# but that feels more fragile than this.
9+
dt_lengths_linter <- lintr::make_linter_from_function_xpath(
10+
function_names = c("sapply", "vapply", "vapply_1i", "map_int", "map_dbl"),
11+
xpath = "parent::expr/parent::expr[expr/SYMBOL[text() = 'length']]",
12+
lint_message = "Use lengths() to find the length of each element in a list."
13+
)
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Avoid test(SYMBOL) and test(SYMBOL+...) except in rare cases.
2+
# in the latter case, prefer test(NUMBER+expr).
3+
dt_test_literal_linter <- lintr::make_linter_from_xpath(
4+
"
5+
//SYMBOL_FUNCTION_CALL[text() = 'test']
6+
/parent::expr
7+
/following-sibling::expr[1][SYMBOL or expr[1]/SYMBOL]
8+
",
9+
"Prefer test's 'num' argument to start with a numeric literal except in rare cases, e.g. test(123, ...) or test(123+x/6, ...)"
10+
)

.dev/cc.R

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,28 @@ sourceDir = function(path=getwd(), trace = TRUE, ...) {
3131
if(trace) cat("\n")
3232
}
3333

34-
cc = function(test=FALSE, clean=FALSE, debug=FALSE, omp=!debug, cc_dir, path=Sys.getenv("PROJ_PATH"), CC="gcc", quiet=FALSE) {
34+
# NB: since we only import from default packages, this is rarely needed, but useful for truly minimal dev environments (#6056)
35+
sourceImports = function(path=getwd(), quiet=FALSE) {
36+
nsFile = file.path(path, "NAMESPACE")
37+
if (!file.exists(nsFile)) {
38+
if (!quiet) warning("No NAMESPACE file found, required to guarantee imports resolve correctly")
39+
return(invisible())
40+
}
41+
suppressWarnings(rm("getRversion", envir=.GlobalEnv)) # clean up from previous cc() because parseNamespaceFile() run getRversion() in NAMESPACE in .GlobalEnv
42+
nsParsedImports = parseNamespaceFile(basename(path), "..")$imports # weird signature to this function
43+
if (!quiet && length(nsParsedImports)) cat(sprintf("Ensuring objects from %d import entries in NAMESPACE resolve correctly\n", length(nsParsedImports)))
44+
for (ii in seq_along(nsParsedImports)) {
45+
entry = nsParsedImports[[ii]]
46+
# getNamespaceExports includes weird objects but that's intentional, consider evalq(.__C__VIRTUAL, asNamespace("Rcpp")) due to import(methods) in that NAMESPACE
47+
imported = if (length(entry) == 1L) getNamespaceExports(entry) else entry[[2L]]
48+
# Assign directly to better imitate actual namespace imports. Earlier tried to require(include.only=) these objects, but R doesn't allow multiple such require, meaning we can't add more objects later in tests, see:
49+
# https://stat.ethz.ch/pipermail/r-devel/2024-April/083319.html
50+
for (import in imported) assign(import, getExportedValue(entry[[1L]], import), .GlobalEnv)
51+
}
52+
return(invisible())
53+
}
54+
55+
cc = function(test=FALSE, clean=FALSE, debug=FALSE, omp=!debug, cc_dir, path=Sys.getenv("PROJ_PATH", unset="."), CC="gcc", quiet=FALSE) {
3556
if (!missing(cc_dir)) {
3657
warning("'cc_dir' arg is deprecated, use 'path' argument or 'PROJ_PATH' env var instead")
3758
path = cc_dir
@@ -81,6 +102,7 @@ cc = function(test=FALSE, clean=FALSE, debug=FALSE, omp=!debug, cc_dir, path=Sys
81102
.GlobalEnv[[Call$name]] = Call$address
82103
for (Extern in xx$.External)
83104
.GlobalEnv[[Extern$name]] = Extern$address
105+
sourceImports(path, quiet=quiet)
84106
sourceDir(file.path(path, "R"), trace=!quiet)
85107
if (base::getRversion()<"4.0.0") rm(list=c("rbind.data.table", "cbind.data.table"), envir=.GlobalEnv) # 3968 follow up
86108
.GlobalEnv$testDir = function(x) file.path(path,"inst/tests",x)

0 commit comments

Comments
 (0)