Rdatatable
diff --git a/‎.ci/.lintr.R‎
Lines changed: 1 addition & 0 deletions b/‎.ci/.lintr.R‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎NAMESPACE‎
Lines changed: 2 additions & 0 deletions b/‎NAMESPACE‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎NEWS.md‎
Lines changed: 7 additions & 1 deletion b/‎NEWS.md‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎R/as.data.table.R‎
Lines changed: 7 additions & 7 deletions b/‎R/as.data.table.R‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎R/bmerge.R‎
Lines changed: 1 addition & 1 deletion b/‎R/bmerge.R‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/cedta.R‎
Lines changed: 0 additions & 3 deletions b/‎R/cedta.R‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎R/data.table.R‎
Lines changed: 2 additions & 1 deletion b/‎R/data.table.R‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎R/mergelist.R‎
Lines changed: 102 additions & 4 deletions b/‎R/mergelist.R‎
Lines changed: 102 additions & 4 deletions
diff --git a/‎R/utils.R‎
Lines changed: 2 additions & 2 deletions b/‎R/utils.R‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎R/wrappers.R‎
Lines changed: 3 additions & 0 deletions b/‎R/wrappers.R‎
Lines changed: 3 additions & 0 deletions
@@ -21,6 +21,7 @@ linters = c(dt_linters, all_linters(
     message = "Use messagef to avoid fragmented translations.",
     warning = "Use warningf to avoid fragmented translations.",
     stop = "Use stopf to avoid fragmented translations.",
+    rev = "Use frev internally, or setfrev if by-reference is safe.",
     NULL
   )),
   # undesirable_function_linter(modify_defaults(
 
@@ -59,6 +59,7 @@ export(nafill)
 export(setnafill)
 export(.Last.updated)
 export(fcoalesce)
+export(mergelist, setmergelist)
 export(cbindlist, setcbindlist)
 export(substitute2)
 #export(DT)  # mtcars |> DT(i,j,by)  #4872 #5472
@@ -208,6 +209,7 @@ S3method(format_list_item, data.frame)
 
 export(fdroplevels, setdroplevels)
 S3method(droplevels, data.table)
+export(frev)
 
 # sort_by added in R 4.4.0, #6662, https://stat.ethz.ch/pipermail/r-announce/2024/000701.html
 if (getRversion() >= "4.4.0") S3method(sort_by, data.table)  
@@ -46,7 +46,13 @@
 
 10. `data.table()` and `as.data.table()` with `keep.rownames=TRUE` now extract row names from named vectors, matching `data.frame()` behavior. Names from the first named vector in the input are used to create the row names column (default name `"rn"` or custom name via `keep.rownames="column_name"`), [#1916](https://github.com/Rdatatable/data.table/issues/1916). Thanks to @richierocks for the feature request and @Mukulyadav2004 for the implementation.
 
-11. `tables()` now supports a `recursive=TRUE` argument to detect `data.table` objects nested within plain lists, such as those produced by `split()` or manual list construction, [#2606](https://github.com/Rdatatable/data.table/issues/2606). The recursive search skips data.frame and data.table objects to avoid descending into list-columns. Nested data.tables are reported with intuitive R-like names using $ and [[ ]] notation. Thanks to @MichaelChirico for the suggestion and @venom1204 for the implementation.
+11. New `frev(x)` as a faster analogue to `base::rev()` for atomic vectors/lists, [#5885](https://github.com/Rdatatable/data.table/issues/5885). Twice as fast as `base::rev()` on large inputs, and faster with more threads. Thanks to Benjamin Schwendinger for suggesting and implementing.
+
+12. New `cbindlist()` and `setcbindlist()` for concatenating a `list` of data.tables column-wise, evocative of the analogous `do.call(rbind, l)` <-> `rbindlist(l)`, [#2576](https://github.com/Rdatatable/data.table/issues/2576). `setcbindlist()` does so without making any copies. Thanks @MichaelChirico for the FR, @jangorecki for the PR, and @MichaelChirico for extensive reviews and fine-tuning.
+
+13. New `mergelist()` and `setmergelist()` similarly work _a la_ `Reduce()` to recursively merge a `list` of data.tables, [#599](https://github.com/Rdatatable/data.table/issues/599). Different join modes (_left_, _inner_, _full_, _right_, _semi_, _anti_, and _cross_) are supported through the `how` argument; duplicate handling goes through the `mult` argument. `setmergelist()` carefully avoids copies where one is not needed, e.g. in a 1:1 left join. Thanks Patrick Nicholson for the FR (in 2013!), @jangorecki for the PR, and @MichaelChirico for extensive reviews and fine-tuning.
+
+14. `tables()` now supports a `recursive=TRUE` argument to detect `data.table` objects nested within plain lists, such as those produced by `split()` or manual list construction, [#2606](https://github.com/Rdatatable/data.table/issues/2606). The recursive search skips data.frame and data.table objects to avoid descending into list-columns. Nested data.tables are reported with intuitive R-like names using $ and [[ ]] notation. Thanks to @MichaelChirico for the suggestion and @venom1204 for the implementation.
 
 ### BUG FIXES
 
 
@@ -36,11 +36,11 @@ as.data.table.table = function(x, keep.rownames=FALSE, key=NULL, ...) {
   # prevent #4179 & just cut out here
   if (any(dim(x) == 0L)) return(null.data.table())
   # Fix for bug #43 - order of columns are different when doing as.data.table(with(DT, table(x, y)))
-  val = rev(dimnames(provideDimnames(x)))
+  val = frev(dimnames(provideDimnames(x)))
   if (is.null(names(val)) || !any(nzchar(names(val))))
-    setattr(val, 'names', paste0("V", rev(seq_along(val))))
+    setattr(val, 'names', paste0("V", frev(seq_along(val))))
   ans = data.table(do.call(CJ, c(val, sorted=FALSE)), N = as.vector(x), key=key)
-  setcolorder(ans, c(rev(head(names(ans), -1L)), "N"))
+  setcolorder(ans, c(frev(head(names(ans), -1L)), "N"))
   ans
 }
 
@@ -104,18 +104,18 @@ as.data.table.array = function(x, keep.rownames=FALSE, key=NULL, sorted=TRUE, va
     dnx[nulldnx] = lapply(dx[nulldnx], seq_len) #3636
     dnx
   } else dnx
-  val = rev(val)
+  setfrev(val)
   if (is.null(names(val)) || !any(nzchar(names(val))))
-    setattr(val, 'names', paste0("V", rev(seq_along(val))))
+    setattr(val, 'names', paste0("V", frev(seq_along(val))))
   if (value.name %chin% names(val))
-    stopf("Argument 'value.name' should not overlap with column names in result: %s", brackify(rev(names(val))))
+    stopf("Argument 'value.name' should not overlap with column names in result: %s", brackify(frev(names(val))))
   N = NULL
   ans = do.call(CJ, c(val, sorted=FALSE))
   set(ans, j="N", value=as.vector(x))
   if (isTRUE(na.rm))
     ans = ans[!is.na(N)]
   setnames(ans, "N", value.name)
-  dims = rev(head(names(ans), -1L))
+  dims = frev(head(names(ans), -1L))
   setcolorder(ans, c(dims, value.name))
   if (isTRUE(sorted) && is.null(key)) key = dims
   setkeyv(ans, key)
 
@@ -110,7 +110,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
     }
     if (x_merge_type=="integer64" || i_merge_type=="integer64") {
       nm = c(iname, xname)
-      if (x_merge_type=="integer64") { w=i; wc=icol; wclass=i_merge_type; } else { w=x; wc=xcol; wclass=x_merge_type; nm=rev(nm) }  # w is which to coerce
+      if (x_merge_type=="integer64") { w=i; wc=icol; wclass=i_merge_type; } else { w=x; wc=xcol; wclass=x_merge_type; setfrev(nm) }  # w is which to coerce
       if (wclass=="integer" || (wclass=="double" && fitsInInt64(w[[wc]]))) {
         from_detail = if (wclass == "double") gettext(" (which has integer64 representation, e.g. no fractions)") else ""
         coerce_col(w, wc, wclass, "integer64", nm[1L], nm[2L], from_detail, verbose=verbose)
 
@@ -1,6 +1,4 @@
 
-cedta.override = NULL  # If no need arises, will deprecate.
-
 cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdown","knitr","ezknitr","IRkernel", "rtvs")
 # These packages run user code in their own environment and thus do not
 # themselves Depend or Import data.table. knitr's eval is passed envir=globalenv() so doesn't
@@ -72,7 +70,6 @@ cedta = function(n=2L) {
       (all(c("FUN", "X") %chin% ls(parent.frame(n))) ||
       .any_sd_queries_in_stack(sc))) ||
     (nsname %chin% cedta.pkgEvalsUserCode && .any_eval_calls_in_stack()) ||
-    nsname %chin% cedta.override ||
     isTRUE(ns$.datatable.aware) ||  # As of Sep 2018: RCAS, caretEnsemble, dtplyr, rstanarm, rbokeh, CEMiTool, rqdatatable, RImmPort, BPRMeth, rlist
     tryCatch("data.table" %chin% get(".Depends",paste("package",nsname,sep=":"),inherits=FALSE),error=function(e)FALSE)  # both ns$.Depends and get(.Depends,ns) are not sufficient
   if (!ans && getOption("datatable.verbose")) {
 
@@ -221,7 +221,7 @@ replace_dot_alias = function(e) {
     }
     return(x)
   }
-  if (!mult %chin% c("first", "last", "all")) stopf("mult argument can only be 'first', 'last' or 'all'")
+  if (!mult %chin% c("first", "last", "all", "error")) stopf("mult argument can only be 'first', 'last', 'all' or 'error'")
   missingroll = missing(roll)
   if (length(roll)!=1L || is.na(roll)) stopf("roll must be a single TRUE, FALSE, positive/negative integer/double including +Inf and -Inf or 'nearest'")
   if (is.character(roll)) {
@@ -520,6 +520,7 @@ replace_dot_alias = function(e) {
       }
       i = .shallow(i, retain.key = TRUE)
       ans = bmerge(i, x, leftcols, rightcols, roll, rollends, nomatch, mult, ops, verbose=verbose)
+      if (mult == "error") mult = "all" ## error should have been raised inside bmerge() call above already, if it wasn't continue as mult="all"
       xo = ans$xo ## to make it available for further use.
       # temp fix for issue spotted by Jan, test #1653.1. TODO: avoid this
       # 'setorder', as there's another 'setorder' in generating 'irows' below...
 
@@ -9,7 +9,7 @@ cbindlist_impl_ = function(l, copy) {
 }
 
 cbindlist = function(l) cbindlist_impl_(l, copy=TRUE)
-setcbindlist = function(l) cbindlist_impl_(l, copy=FALSE)
+setcbindlist = function(l) invisible(cbindlist_impl_(l, copy=FALSE))
 
 # when 'on' is missing then use keys, used only for inner and full join
 onkeys = function(x, y) {
@@ -157,9 +157,9 @@ mergepair = function(lhs, rhs, on, how, mult, lhs.cols=names(lhs), rhs.cols=name
         stopf("'on' is missing and necessary key is not present")
     }
     if (any(bad.on <- !on %chin% names(lhs)))
-      stopf("'on' argument specifies columns to join [%s] that are not present in %s table [%s]", brackify(on[bad.on]), "LHS", brackify(names(lhs)))
+      stopf("'on' argument specifies columns to join %s that are not present in %s table %s", brackify(on[bad.on]), "LHS", brackify(names(lhs)))
     if (any(bad.on <- !on %chin% names(rhs)))
-      stopf("'on' argument specifies columns to join [%s] that are not present in %s table [%s]", brackify(on[bad.on]), "RHS", brackify(names(rhs)))
+      stopf("'on' argument specifies columns to join %s that are not present in %s table %s", brackify(on[bad.on]), "RHS", brackify(names(rhs)))
   } else if (is.null(on)) {
     on = character() ## cross join only
   }
@@ -203,7 +203,7 @@ mergepair = function(lhs, rhs, on, how, mult, lhs.cols=names(lhs), rhs.cols=name
     copy_x = TRUE
     ## ensure no duplicated column names in merge results
     if (any(dup.i <- names(out.i) %chin% names(out.x)))
-      stopf("merge result has duplicated column names [%s], use 'cols' argument or rename columns in 'l' tables", brackify(names(out.i)[dup.i]))
+      stopf("merge result has duplicated column names %s, use 'cols' argument or rename columns in 'l' tables", brackify(names(out.i)[dup.i]))
   }
 
   ## stack i and x
@@ -257,6 +257,104 @@ mergepair = function(lhs, rhs, on, how, mult, lhs.cols=names(lhs), rhs.cols=name
   setDT(out)
 }
 
+mergelist_impl_ = function(l, on, cols, how, mult, join.many, copy) {
+  verbose = getOption("datatable.verbose")
+  if (verbose)
+    p = proc.time()[[3L]]
+
+  if (!is.list(l) || is.data.frame(l))
+    stopf("'%s' must be a list", "l")
+  if (!all(vapply_1b(l, is.data.table)))
+    stopf("Every element of 'l' list must be data.table objects")
+  if (!all(idx <- lengths(l) > 0L))
+    stopf("Tables in 'l' must all have columns, but these entries have 0: %s", brackify(which(!idx)))
+  if (any(idx <- vapply_1i(l, function(x) anyDuplicated(names(x))) > 0L))
+    stopf("Column names in individual 'l' entries must be unique, but these have some duplicates: %s", brackify(which(idx)))
+
+  n = length(l)
+  if (n < 2L) {
+    out = if (n) l[[1L]] else as.data.table(l)
+    if (copy) out = copy(out)
+    if (verbose)
+      catf("mergelist: merging %d table(s), took %.3fs\n", n, proc.time()[[3L]]-p)
+    return(out)
+  }
+
+  if (!is.list(join.many))
+    join.many = rep(list(join.many), n - 1L)
+  if (length(join.many) != n - 1L || !all(vapply_1b(join.many, isTRUEorFALSE)))
+    stopf("'join.many' must be TRUE or FALSE, or a list of such whose length must be length(l)-1L")
+
+  if (missing(mult))
+    mult = NULL
+  if (!is.list(mult))
+    mult = rep(list(mult), n - 1L)
+  if (length(mult) != n - 1L || !all(vapply_1b(mult, function(x) is.null(x) || (is.character(x) && length(x) == 1L && !anyNA(x) && x %chin% c("error", "all", "first", "last")))))
+    stopf("'mult' must be one of [error, all, first, last] or NULL, or a list of such whose length must be length(l)-1L")
+
+  if (!is.list(how))
+    how = rep(list(how), n-1L)
+  if (length(how)!=n-1L || !all(vapply_1b(how, function(x) is.character(x) && length(x)==1L && !anyNA(x) && x %chin% c("left", "inner", "full", "right", "semi", "anti", "cross"))))
+    stopf("'how' must be one of [left, inner, full, right, semi, anti, cross], or a list of such whose length must be length(l)-1L")
+
+  if (is.null(cols)) {
+    cols = vector("list", n)
+  } else {
+    if (!is.list(cols))
+      stopf("'%s' must be a list", "cols")
+    if (length(cols) != n)
+      stopf("'cols' must be same length as 'l' (%d != %d)", length(cols), n)
+    skip = vapply_1b(cols, is.null)
+    if (!all(vapply_1b(cols[!skip], function(x) is.character(x) && !anyNA(x) && !anyDuplicated(x))))
+      stopf("'cols' must be a list of non-zero length, non-NA, non-duplicated, character vectors, or eventually NULLs (all columns)")
+    if (any(mapply(function(x, icols) !all(icols %chin% names(x)), l[!skip], cols[!skip])))
+      stopf("'cols' specify columns not present in corresponding table")
+  }
+
+  if (missing(on) || is.null(on)) {
+    on = vector("list", n - 1L)
+  } else {
+    if (!is.list(on))
+      on = rep(list(on), n - 1L)
+    if (length(on) != n-1L || !all(vapply_1b(on, function(x) is.character(x) && !anyNA(x) && !anyDuplicated(x)))) ## length checked in dtmerge
+      stopf("'on' must be non-NA, non-duplicated, character vector, or a list of such which length must be length(l)-1L")
+  }
+
+  l.mem = lapply(l, vapply, address, "")
+  out = l[[1L]]
+  out.cols = cols[[1L]]
+  for (join.i in seq_len(n - 1L)) {
+    rhs.i = join.i + 1L
+    out = mergepair(
+      lhs = out, rhs = l[[rhs.i]],
+      on = on[[join.i]],
+      how = how[[join.i]], mult = mult[[join.i]],
+      lhs.cols = out.cols, rhs.cols = cols[[rhs.i]],
+      copy = FALSE, ## avoid any copies inside, will copy once below
+      join.many = join.many[[join.i]],
+      verbose = verbose
+    )
+    out.cols = copy(names(out))
+  }
+  out.mem = vapply_1c(out, address)
+  if (copy)
+    .Call(CcopyCols, out, colnamesInt(out, names(out.mem)[out.mem %chin% unique(unlist(l.mem, recursive=FALSE))]))
+  if (verbose)
+    catf("mergelist: merging %d tables, took %.3fs\n", n, proc.time()[[3L]] - p)
+  out
+}
+
+mergelist = function(l, on, cols=NULL, how=c("left", "inner", "full", "right", "semi", "anti", "cross"), mult, join.many=getOption("datatable.join.many")) {
+  if (missing(how) || is.null(how))
+    how = match.arg(how)
+  mergelist_impl_(l, on, cols, how, mult, join.many, copy=TRUE)
+}
+setmergelist = function(l, on, cols=NULL, how=c("left", "inner", "full", "right", "semi", "anti", "cross"), mult, join.many=getOption("datatable.join.many")) {
+  if (missing(how) || is.null(how))
+    how = match.arg(how)
+  invisible(mergelist_impl_(l, on, cols, how, mult, join.many, copy=FALSE))
+}
+
 # Previously, we had a custom C implementation here, which is ~2x faster,
 #   but this is fast enough we don't bother maintaining a new routine.
 #   Hopefully in the future rep() can recognize the ALTREP and use that, too.
 
@@ -86,7 +86,7 @@ which.last = function(x)
   if (!is.logical(x)) {
     stopf("x not boolean")
   }
-  length(x) - match(TRUE, rev(x)) + 1L
+  length(x) - match(TRUE, frev(x)) + 1L
 }
 
 require_bit64_if_needed = function(DT) {
@@ -226,7 +226,7 @@ fctr = function(x, levels=unique(x), ..., sort=FALSE, rev=FALSE) {
   if (!isTRUEorFALSE(rev))
     stopf("argument 'rev' must be TRUE or FALSE")
   if (sort) levels = sort(levels)
-  if (rev) levels = rev(levels)
+  if (rev) levels = frev(levels)
   factor(x, levels=levels, ...)
 }
 
 
@@ -21,3 +21,6 @@ fitsInInt32 = function(x) .Call(CfitsInInt32R, x)
 fitsInInt64 = function(x) .Call(CfitsInInt64R, x)
 
 coerceAs = function(x, as, copy=TRUE) .Call(CcoerceAs, x, as, copy)
+
+frev   = function(x) .Call(Cfrev, x, TRUE)
+setfrev = function(x) invisible(.Call(Cfrev, x, FALSE))
Original file line number	Diff line number	Diff line change
`@@ -110,7 +110,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos`
`110`	`110`	`}`
`111`	`111`	`if (x_merge_type=="integer64" \|\| i_merge_type=="integer64") {`
`112`	`112`	`nm = c(iname, xname)`
`113`		`- if (x_merge_type=="integer64") { w=i; wc=icol; wclass=i_merge_type; } else { w=x; wc=xcol; wclass=x_merge_type; nm=rev(nm) } # w is which to coerce`
	`113`	`+ if (x_merge_type=="integer64") { w=i; wc=icol; wclass=i_merge_type; } else { w=x; wc=xcol; wclass=x_merge_type; setfrev(nm) } # w is which to coerce`
`114`	`114`	`if (wclass=="integer" \|\| (wclass=="double" && fitsInInt64(w[[wc]]))) {`
`115`	`115`	`from_detail = if (wclass == "double") gettext(" (which has integer64 representation, e.g. no fractions)") else ""`
`116`	`116`	`coerce_col(w, wc, wclass, "integer64", nm[1L], nm[2L], from_detail, verbose=verbose)`
Original file line number	Diff line number	Diff line change
`@@ -221,7 +221,7 @@ replace_dot_alias = function(e) {`
`221`	`221`	`}`
`222`	`222`	`return(x)`
`223`	`223`	`}`
`224`		`- if (!mult %chin% c("first", "last", "all")) stopf("mult argument can only be 'first', 'last' or 'all'")`
	`224`	`+ if (!mult %chin% c("first", "last", "all", "error")) stopf("mult argument can only be 'first', 'last', 'all' or 'error'")`
`225`	`225`	`missingroll = missing(roll)`
`226`	`226`	`if (length(roll)!=1L \|\| is.na(roll)) stopf("roll must be a single TRUE, FALSE, positive/negative integer/double including +Inf and -Inf or 'nearest'")`
`227`	`227`	`if (is.character(roll)) {`
`@@ -520,6 +520,7 @@ replace_dot_alias = function(e) {`
`520`	`520`	`}`
`521`	`521`	`i = .shallow(i, retain.key = TRUE)`
`522`	`522`	`ans = bmerge(i, x, leftcols, rightcols, roll, rollends, nomatch, mult, ops, verbose=verbose)`
	`523`	`+ if (mult == "error") mult = "all" ## error should have been raised inside bmerge() call above already, if it wasn't continue as mult="all"`
`523`	`524`	`xo = ans$xo ## to make it available for further use.`
`524`	`525`	`# temp fix for issue spotted by Jan, test #1653.1. TODO: avoid this`
`525`	`526`	`# 'setorder', as there's another 'setorder' in generating 'irows' below...`
Original file line number	Diff line number	Diff line change
`@@ -86,7 +86,7 @@ which.last = function(x)`
`86`	`86`	`if (!is.logical(x)) {`
`87`	`87`	`stopf("x not boolean")`
`88`	`88`	`}`
`89`		`- length(x) - match(TRUE, rev(x)) + 1L`
	`89`	`+ length(x) - match(TRUE, frev(x)) + 1L`
`90`	`90`	`}`
`91`	`91`
`92`	`92`	`require_bit64_if_needed = function(DT) {`
`@@ -226,7 +226,7 @@ fctr = function(x, levels=unique(x), ..., sort=FALSE, rev=FALSE) {`
`226`	`226`	`if (!isTRUEorFALSE(rev))`
`227`	`227`	`stopf("argument 'rev' must be TRUE or FALSE")`
`228`	`228`	`if (sort) levels = sort(levels)`
`229`		`- if (rev) levels = rev(levels)`
	`229`	`+ if (rev) levels = frev(levels)`
`230`	`230`	`factor(x, levels=levels, ...)`
`231`	`231`	`}`
`232`	`232`