Rdatatable · ben-schwen · Oct 28, 2025 · Oct 29, 2025 · Oct 30, 2025 · Oct 30, 2025
@@ -296,7 +296,16 @@ See [#2611](https://github.com/Rdatatable/data.table/issues/2611) for details. T
     #   user  system elapsed
     #  0.028   0.000   0.005
     ```
-    20. `fread()` now supports the `comment.char` argument to skip trailing comments or comment-only lines, consistent with `read.table()`, [#856](https://github.com/Rdatatable/data.table/issues/856). The default remains `comment.char = ""` (no comment parsing) for backward compatibility and performance, in contrast to `read.table(comment.char = "#")`. Thanks to @arunsrinivasan and many others for the suggestion and @ben-schwen for the implementation.
+
+20. `fread()` now supports the `comment.char` argument to skip trailing comments or comment-only lines, consistent with `read.table()`, [#856](https://github.com/Rdatatable/data.table/issues/856). The default remains `comment.char = ""` (no comment parsing) for backward compatibility and performance, in contrast to `read.table(comment.char = "#")`. Thanks to @arunsrinivasan and many others for the suggestion and @ben-schwen for the implementation.
+
+21. GForce and lapply optimization detection has been refactored to use modular optimization paths and an AST (Abstract Syntax Tree) walker for improved maintainability and extensibility. The new architecture separates optimization detection into distinct, composable phases. This makes future optimization enhancements a lot easier. Thanks to @grantmcdermott, @jangorecki, @MichaelChirico, and @HughParsonage for the suggestions and @ben-schwen for the implementation.
+
+    This rewrite also introduces several new optimizations:
+      - Enables Map instead of lapply optimizations (e.g., `Map(fun, .SD)` -> `list(fun(col1), fun(col2), ...)`) [#5336](https://github.com/Rdatatable/data.table/issues/5336)
+      - lapply optimization works without .SD (e.g., `lapply(list(col1, col2), fun)` -> `list(fun(col1), fun(col2))` [#5032](https://github.com/Rdatatable/data.table/issues/5032)
+      - Type conversion support in GForce expressions (e.g., `sum(as.numeric(x))`) [#2934](https://github.com/Rdatatable/data.table/issues/2934)
+      - Arithmetic operation support in GForce (e.g., `max(x) - min(x)`) [#3815](https://github.com/Rdatatable/data.table/issues/3815)
 
 ### BUG FIXES
 

@@ -361,7 +361,34 @@ gc_mem = function() {
   # nocov end
 }
 
-test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,notOutput=NULL,ignore.warning=NULL,options=NULL,env=NULL) {
+test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,notOutput=NULL,ignore.warning=NULL,options=NULL,env=NULL,levels=NULL) {
+  # if levels is provided, test across multiple optimization levels
+  if (!is.null(levels)) {
+    cl = match.call()
+    cl$levels = NULL  # Remove levels from the recursive call
+
+    vector_params = c("error", "warning", "message", "output", "notOutput", "ignore.warning")
+    # Check if y was explicitly provided (not just the default)
+    y_provided = !missing(y)
+    compare = !y_provided && length(levels)>1L && !any(vapply_1b(vector_params, function(p) length(get(p, envir=environment())) > 0L))
+
+    for (i in seq_along(levels)) {
+      cl$num = num + (i - 1L) * 1e-6
+      opt_level = list(datatable.optimize = levels[i])
+      cl$options = if (!is.null(options)) c(as.list(options), opt_level) else opt_level
+      for (p in vector_params) {
+        val = get(p, envir=environment())
+        if (length(val) > 0L) {
+          cl[[p]] = val[((i - 1L) %% length(val)) + 1L] # cycle through values if fewer than levels
+        }
+      }
+
+      if (compare && i == 1L) cl$y = eval(cl$x, parent.frame())
+      eval(cl, parent.frame()) # actual test call
+    }
+    return(invisible())
+  }
+
   if (!is.null(env)) {
     old = Sys.getenv(names(env), names=TRUE, unset=NA)
     to_unset = !lengths(env)

@@ -190,24 +190,13 @@ DT = data.table(A=1:10,B=rnorm(10),C=paste("a",1:100010,sep=""))
 test(301.1, nrow(DT[,sum(B),by=C])==100010)
 
 # Test := by key, and that := to the key by key unsets the key. Make it non-trivial in size too.
-local({
-  old = options(datatable.optimize=0L); on.exit(options(old))
-  set.seed(1)
-  DT = data.table(a=sample(1:100, 1e6, replace=TRUE), b=sample(1:1000, 1e6, replace=TRUE), key="a")
-  test(637.1, DT[, m:=sum(b), by=a][1:3], data.table(a=1L, b=c(156L, 808L, 848L), m=DT[J(1), sum(b)], key="a"))
-  test(637.2, key(DT[J(43L), a:=99L]), NULL)
-  setkey(DT, a)
-  test(637.3, key(DT[, a:=99L, by=a]), NULL)
-})
-local({
-  options(datatable.optimize=2L); on.exit(options(old))
-  set.seed(1)
-  DT = data.table(a=sample(1:100, 1e6, replace=TRUE), b=sample(1:1000, 1e6, replace=TRUE), key="a")
-  test(638.1, DT[, m:=sum(b), by=a][1:3], data.table(a=1L, b=c(156L, 808L, 848L), m=DT[J(1), sum(b)], key="a"))
-  test(638.2, key(DT[J(43L), a:=99L]), NULL)
-  setkey(DT,a)
-  test(638.3, key(DT[, a:=99L, by=a]), NULL)
-})
+set.seed(1)
+DT = data.table(a=sample(1:100, 1e6, replace=TRUE), b=sample(1:1000, 1e6, replace=TRUE), key="a")
+opt = c(0L,2L)
+test(637.1, levels=opt, copy(DT)[, m:=sum(b), by=a][1:3], data.table(a=1L, b=c(156L, 808L, 848L), m=DT[J(1), sum(b)], key="a"))
+test(637.2, levels=opt, key(copy(DT)[J(43L), a:=99L]), NULL)
+setkey(DT, a)
+test(637.3, levels=opt, key(copy(DT)[, a:=99L, by=a]), NULL)
 
 # Test X[Y] slowdown, #2216
 # Many minutes in 1.8.2!  Now well under 1s, but 10s for very wide tolerance for CRAN. We'd like CRAN to tell us if any changes