Skip to content
Open
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
ebd152d
modular optimization paths - init
ben-schwen Oct 28, 2025
71b21ab
make linter happy
ben-schwen Oct 29, 2025
8a9e727
move tests
ben-schwen Oct 30, 2025
04e5782
add lapply(list(col1, col2, ...), fun) pattern
ben-schwen Oct 30, 2025
a8dde19
turn on optimization
ben-schwen Oct 31, 2025
67f2874
add type conversion support to GForce
ben-schwen Nov 1, 2025
2876ebe
remove stale branch
ben-schwen Nov 1, 2025
c445c38
add tests
ben-schwen Nov 2, 2025
5410e31
update man
ben-schwen Nov 2, 2025
dece1c6
merge tests
ben-schwen Nov 2, 2025
5e1789d
polish test fun
ben-schwen Nov 2, 2025
62f1c48
add arithmetic
ben-schwen Nov 2, 2025
c47ec27
add AST walker and update tests
ben-schwen Nov 2, 2025
1d324d6
add tests
ben-schwen Nov 2, 2025
6b54c1e
Merge branch 'master' into modular_gforce
ben-schwen Nov 2, 2025
22cf35e
add NEWS
ben-schwen Nov 2, 2025
25a7e2e
make function name in massageSD more expressive
ben-schwen Nov 3, 2025
eb8056c
rename levels argument to optimization
ben-schwen Nov 3, 2025
4544398
update docs
ben-schwen Nov 3, 2025
d40edb8
restore test nums
ben-schwen Nov 3, 2025
5e7efb7
remove double tests
ben-schwen Nov 3, 2025
3826927
simplify tests
ben-schwen Nov 3, 2025
982343f
phrasing
ben-schwen Nov 4, 2025
996b28c
Merge remote-tracking branch 'refs/remotes/origin/modular_gforce' int…
ben-schwen Nov 4, 2025
1e6ad03
use mget for all vector params
ben-schwen Nov 4, 2025
9e1297e
rename optimization parameter
ben-schwen Nov 4, 2025
f6981d6
rename optimization parameter also in test
ben-schwen Nov 4, 2025
9fc4734
add optimize param checks
ben-schwen Nov 4, 2025
6aaea51
Merge branch 'master' into modular_gforce
ben-schwen Nov 4, 2025
c07999a
remove trailing ws
ben-schwen Nov 4, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
702 changes: 436 additions & 266 deletions R/data.table.R

Large diffs are not rendered by default.

29 changes: 28 additions & 1 deletion R/test.data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,34 @@ gc_mem = function() {
# nocov end
}

test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,notOutput=NULL,ignore.warning=NULL,options=NULL,env=NULL) {
test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,notOutput=NULL,ignore.warning=NULL,options=NULL,env=NULL,levels=NULL) {
# if levels is provided, test across multiple optimization levels
if (!is.null(levels)) {
cl = match.call()
cl$levels = NULL # Remove levels from the recursive call

vector_params = c("error", "warning", "message", "output", "notOutput", "ignore.warning")
# Check if y was explicitly provided (not just the default)
y_provided = !missing(y)
compare = !y_provided && length(levels)>1L && !any(vapply_1b(vector_params, function(p) length(get(p, envir=environment())) > 0L))

for (i in seq_along(levels)) {
cl$num = num + (i - 1L) * 1e-6
opt_level = list(datatable.optimize = levels[i])
cl$options = if (!is.null(options)) c(as.list(options), opt_level) else opt_level
for (p in vector_params) {
val = get(p, envir=environment())
if (length(val) > 0L) {
cl[[p]] = val[((i - 1L) %% length(val)) + 1L] # cycle through values if fewer than levels
}
}

if (compare && i == 1L) cl$y = eval(cl$x, parent.frame())
eval(cl, parent.frame()) # actual test call
}
return(invisible())
}

if (!is.null(env)) {
old = Sys.getenv(names(env), names=TRUE, unset=NA)
to_unset = !lengths(env)
Expand Down
25 changes: 7 additions & 18 deletions inst/tests/benchmark.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -190,24 +190,13 @@ DT = data.table(A=1:10,B=rnorm(10),C=paste("a",1:100010,sep=""))
test(301.1, nrow(DT[,sum(B),by=C])==100010)

# Test := by key, and that := to the key by key unsets the key. Make it non-trivial in size too.
local({
old = options(datatable.optimize=0L); on.exit(options(old))
set.seed(1)
DT = data.table(a=sample(1:100, 1e6, replace=TRUE), b=sample(1:1000, 1e6, replace=TRUE), key="a")
test(637.1, DT[, m:=sum(b), by=a][1:3], data.table(a=1L, b=c(156L, 808L, 848L), m=DT[J(1), sum(b)], key="a"))
test(637.2, key(DT[J(43L), a:=99L]), NULL)
setkey(DT, a)
test(637.3, key(DT[, a:=99L, by=a]), NULL)
})
local({
options(datatable.optimize=2L); on.exit(options(old))
set.seed(1)
DT = data.table(a=sample(1:100, 1e6, replace=TRUE), b=sample(1:1000, 1e6, replace=TRUE), key="a")
test(638.1, DT[, m:=sum(b), by=a][1:3], data.table(a=1L, b=c(156L, 808L, 848L), m=DT[J(1), sum(b)], key="a"))
test(638.2, key(DT[J(43L), a:=99L]), NULL)
setkey(DT,a)
test(638.3, key(DT[, a:=99L, by=a]), NULL)
})
set.seed(1)
DT = data.table(a=sample(1:100, 1e6, replace=TRUE), b=sample(1:1000, 1e6, replace=TRUE), key="a")
opt = c(0L,2L)
test(637.1, levels=opt, copy(DT)[, m:=sum(b), by=a][1:3], data.table(a=1L, b=c(156L, 808L, 848L), m=DT[J(1), sum(b)], key="a"))
test(637.2, levels=opt, key(copy(DT)[J(43L), a:=99L]), NULL)
setkey(DT, a)
test(637.3, levels=opt, key(copy(DT)[, a:=99L, by=a]), NULL)

# Test X[Y] slowdown, #2216
# Many minutes in 1.8.2! Now well under 1s, but 10s for very wide tolerance for CRAN. We'd like CRAN to tell us if any changes
Expand Down
447 changes: 447 additions & 0 deletions inst/tests/optimize.Rraw

Large diffs are not rendered by default.

923 changes: 154 additions & 769 deletions inst/tests/tests.Rraw

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion man/test.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
test(num, x, y = TRUE,
error = NULL, warning = NULL, message = NULL,
output = NULL, notOutput = NULL, ignore.warning = NULL,
options = NULL, env = NULL)
options = NULL, env = NULL, levels = NULL)
}
\arguments{
\item{num}{ A unique identifier for a test, helpful in identifying the source of failure when testing is not working. Currently, we use a manually-incremented system with tests formatted as \code{n.m}, where essentially \code{n} indexes an issue and \code{m} indexes aspects of that issue. For the most part, your new PR should only have one value of \code{n} (scroll to the end of \code{inst/tests/tests.Rraw} to see the next available ID) and then index the tests within your PR by increasing \code{m}. Note -- \code{n.m} is interpreted as a number, so \code{123.4} and \code{123.40} are actually the same -- please \code{0}-pad as appropriate. Test identifiers are checked to be in increasing order at runtime to prevent duplicates being possible. }
Expand All @@ -22,6 +22,7 @@ test(num, x, y = TRUE,
\item{ignore.warning}{ A single character string. Any warnings emitted by \code{x} that contain this string are dropped. Remaining warnings are compared to the expected \code{warning} as normal. }
\item{options}{ A named list of options to set for the duration of the test. Any code evaluated during this call to \code{test()} (usually, \code{x}, or maybe \code{y}) will run with the named options set, and the original options will be restored on return. This is a named list since different options can have different types in general, but in typical usage, only one option is set at a time, in which case a named vector is also accepted. }
\item{env}{ A named list of environment variables to set for the duration of the test, much like \code{options}. A list entry set to \code{NULL} will unset (i.e., \code{\link{Sys.unsetenv}}) the corresponding variable. }
\item{levels}{ A vector of different optimization levels to test. The code in \code{x} will be run once for each optimization level, with \code{options(datatable.optimize=level)} set accordingly. All levels must pass the test for the overall test to pass. If no y is supplied, the results from the different levels are compared to each other for equality. If a y is supplied, the results from each level are compared to y. }
}
\note{
\code{NA_real_} and \code{NaN} are treated as equal, use \code{identical} if distinction is needed. See examples below.
Expand Down
2 changes: 1 addition & 1 deletion src/gsumm.c
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@ SEXP gsum(SEXP x, SEXP narmArg)
//Rprintf(_("gsum int took %.3f\n"), wallclock()-started);
if (overflow) {
UNPROTECT(1); // discard the result with overflow
warning(_("The sum of an integer column for a group was more than type 'integer' can hold so the result has been coerced to 'numeric' automatically for convenience."));
warning(_("The sum of an integer column for a group was more than type 'integer' can hold so the result has been coerced to 'numeric' automatically for convenience. Consider using 'as.numeric' on the column beforehand to avoid this warning."));
ans = PROTECT(allocVector(REALSXP, ngrp));
double *restrict ansp = REAL(ans);
memset(ansp, 0, ngrp*sizeof(double));
Expand Down
2 changes: 2 additions & 0 deletions tests/optimize.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
require(data.table)
test.data.table(script="optimize.Rraw")
Loading