Rdatatable · ben-schwen · Oct 28, 2025 · Oct 29, 2025 · Oct 30, 2025 · Oct 30, 2025
@@ -361,7 +361,34 @@ gc_mem = function() {
   # nocov end
 }
 
-test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,notOutput=NULL,ignore.warning=NULL,options=NULL,env=NULL) {
+test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,notOutput=NULL,ignore.warning=NULL,options=NULL,env=NULL,levels=NULL) {
+  # if levels is provided, test across multiple optimization levels
+  if (!is.null(levels)) {
+    cl = match.call()
+    cl$levels = NULL  # Remove levels from the recursive call
+
+    vector_params = c("error", "warning", "message", "output", "notOutput", "ignore.warning")
+    # Check if y was explicitly provided (not just the default)
+    y_provided = !missing(y)
+    compare = !y_provided && length(levels)>1L && !any(vapply_1b(vector_params, function(p) length(get(p, envir=environment())) > 0L))
+
+    for (i in seq_along(levels)) {
+      cl$num = num + (i - 1L) * 1e-6
+      opt_level = list(datatable.optimize = levels[i])
+      cl$options = if (!is.null(options)) c(as.list(options), opt_level) else opt_level
+      for (p in vector_params) {
+        val = get(p, envir=environment())
+        if (length(val) > 0L) {
+          cl[[p]] = val[((i - 1L) %% length(val)) + 1L] # cycle through values if fewer than levels
+        }
+      }
+
+      if (compare && i == 1L) cl$y = eval(cl$x, parent.frame())
+      eval(cl, parent.frame()) # actual test call
+    }
+    return(invisible())
+  }
+
   if (!is.null(env)) {
     old = Sys.getenv(names(env), names=TRUE, unset=NA)
     to_unset = !lengths(env)

@@ -190,24 +190,13 @@ DT = data.table(A=1:10,B=rnorm(10),C=paste("a",1:100010,sep=""))
 test(301.1, nrow(DT[,sum(B),by=C])==100010)
 
 # Test := by key, and that := to the key by key unsets the key. Make it non-trivial in size too.
-local({
-  old = options(datatable.optimize=0L); on.exit(options(old))
-  set.seed(1)
-  DT = data.table(a=sample(1:100, 1e6, replace=TRUE), b=sample(1:1000, 1e6, replace=TRUE), key="a")
-  test(637.1, DT[, m:=sum(b), by=a][1:3], data.table(a=1L, b=c(156L, 808L, 848L), m=DT[J(1), sum(b)], key="a"))
-  test(637.2, key(DT[J(43L), a:=99L]), NULL)
-  setkey(DT, a)
-  test(637.3, key(DT[, a:=99L, by=a]), NULL)
-})
-local({
-  options(datatable.optimize=2L); on.exit(options(old))
-  set.seed(1)
-  DT = data.table(a=sample(1:100, 1e6, replace=TRUE), b=sample(1:1000, 1e6, replace=TRUE), key="a")
-  test(638.1, DT[, m:=sum(b), by=a][1:3], data.table(a=1L, b=c(156L, 808L, 848L), m=DT[J(1), sum(b)], key="a"))
-  test(638.2, key(DT[J(43L), a:=99L]), NULL)
-  setkey(DT,a)
-  test(638.3, key(DT[, a:=99L, by=a]), NULL)
-})
+set.seed(1)
+DT = data.table(a=sample(1:100, 1e6, replace=TRUE), b=sample(1:1000, 1e6, replace=TRUE), key="a")
+opt = c(0L,2L)
+test(637.1, levels=opt, copy(DT)[, m:=sum(b), by=a][1:3], data.table(a=1L, b=c(156L, 808L, 848L), m=DT[J(1), sum(b)], key="a"))
+test(637.2, levels=opt, key(copy(DT)[J(43L), a:=99L]), NULL)
+setkey(DT, a)
+test(637.3, levels=opt, key(copy(DT)[, a:=99L, by=a]), NULL)
 
 # Test X[Y] slowdown, #2216
 # Many minutes in 1.8.2!  Now well under 1s, but 10s for very wide tolerance for CRAN. We'd like CRAN to tell us if any changes

@@ -8,7 +8,7 @@
 test(num, x, y = TRUE,
      error = NULL, warning = NULL, message = NULL,
      output = NULL, notOutput = NULL, ignore.warning = NULL,
-     options = NULL, env = NULL)
+     options = NULL, env = NULL, levels = NULL)
 }
 \arguments{
 \item{num}{ A unique identifier for a test, helpful in identifying the source of failure when testing is not working. Currently, we use a manually-incremented system with tests formatted as \code{n.m}, where essentially \code{n} indexes an issue and \code{m} indexes aspects of that issue. For the most part, your new PR should only have one value of \code{n} (scroll to the end of \code{inst/tests/tests.Rraw} to see the next available ID) and then index the tests within your PR by increasing \code{m}. Note -- \code{n.m} is interpreted as a number, so \code{123.4} and \code{123.40} are actually the same -- please \code{0}-pad as appropriate. Test identifiers are checked to be in increasing order at runtime to prevent duplicates being possible. }
@@ -22,6 +22,7 @@ test(num, x, y = TRUE,
 \item{ignore.warning}{ A single character string. Any warnings emitted by \code{x} that contain this string are dropped. Remaining warnings are compared to the expected \code{warning} as normal. }
 \item{options}{ A named list of options to set for the duration of the test. Any code evaluated during this call to \code{test()} (usually, \code{x}, or maybe \code{y}) will run with the named options set, and the original options will be restored on return. This is a named list since different options can have different types in general, but in typical usage, only one option is set at a time, in which case a named vector is also accepted. }
 \item{env}{ A named list of environment variables to set for the duration of the test, much like \code{options}. A list entry set to \code{NULL} will unset (i.e., \code{\link{Sys.unsetenv}}) the corresponding variable. }
+\item{levels}{ A vector of different optimization levels to test. The code in \code{x} will be run once for each optimization level, with \code{options(datatable.optimize=level)} set accordingly. All levels must pass the test for the overall test to pass. If no y is supplied, the results from the different levels are compared to each other for equality. If a y is supplied, the results from each level are compared to y. }
 }
 \note{
    \code{NA_real_} and \code{NaN} are treated as equal, use \code{identical} if distinction is needed. See examples below.

@@ -410,7 +410,7 @@ SEXP gsum(SEXP x, SEXP narmArg)
     //Rprintf(_("gsum int took %.3f\n"), wallclock()-started);
     if (overflow) {
       UNPROTECT(1); // discard the result with overflow
-      warning(_("The sum of an integer column for a group was more than type 'integer' can hold so the result has been coerced to 'numeric' automatically for convenience."));
+      warning(_("The sum of an integer column for a group was more than type 'integer' can hold so the result has been coerced to 'numeric' automatically for convenience. Consider using 'as.numeric' on the column beforehand to avoid this warning."));
       ans = PROTECT(allocVector(REALSXP, ngrp));
       double *restrict ansp = REAL(ans);
       memset(ansp, 0, ngrp*sizeof(double));

@@ -0,0 +1,2 @@
+require(data.table)
+test.data.table(script="optimize.Rraw")
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		require(data.table)
		test.data.table(script="optimize.Rraw")