From 7049f9b64b5086dca855747c782581368f07c252 Mon Sep 17 00:00:00 2001
From: Benjamin Schwendinger <benjaminschwe@gmail.com>
Date: Sun, 26 Oct 2025 16:35:39 +0100
Subject: [PATCH] add setDT, setalloccol

---
 NEWS.md               |  2 ++
 R/as.data.table.R     |  6 +++---
 R/between.R           |  6 +++---
 R/data.table.R        | 49 +++++++++++++++++++++++--------------------
 R/fcast.R             |  8 +++----
 R/fmelt.R             |  2 +-
 R/foverlaps.R         |  2 +-
 R/frank.R             | 13 +++++++++---
 R/fread.R             |  2 +-
 R/frollapply.R        |  2 +-
 R/mergelist.R         |  4 ++--
 R/setkey.R            |  4 ++--
 R/test.data.table.R   |  2 +-
 R/transpose.R         |  2 +-
 R/utils.R             |  2 +-
 R/xts.R               |  2 +-
 inst/tests/tests.Rraw | 46 ++++++++++++++++++++++++++++++++++++++--
 man/setDT.Rd          |  7 +++++--
 man/truelength.Rd     |  7 +++++--
 src/assign.c          | 18 +++++++++++++---
 src/data.table.h      |  2 +-
 src/utils.c           |  4 ++--
 22 files changed, 132 insertions(+), 60 deletions(-)
diff --git a/NEWS.md b/NEWS.md
index 43dd6308e9..9a1c02416c 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -34,6 +34,8 @@
 
 5. Negative and missing values of `n` argument of adaptive rolling functions trigger an error.
 
+6. `setDT()` and `setalloccol()` gain `duplicateShared` argument (default `TRUE`). When `TRUE`, columns that are shared with other objects are duplicated to avoid unintended modification of the original data, [#2683](https://github.com/Rdatatable/data.table/issues/2683). Previously, shared columns were not duplicated, which could lead to unexpected side effects. `frank()` now uses this internally to preserve names from the input vector and avoid modifying shared vectors, [#4240](https://github.com/Rdatatable/data.table/issues/4240). Thanks to @jangorecki, @BenoitLondon, and @MichaelChirico for the report and @ben-schwen for the fix.
+
 ### NOTICE OF INTENDED FUTURE POTENTIAL BREAKING CHANGES 
 
 1. `data.table(x=1, <expr>)`, where `<expr>` is an expression resulting in a 1-column matrix without column names, will eventually have names `x` and `V2`, not `x` and `V1`, consistent with `data.table(x=1, <expr>)` where `<expr>` results in an atomic vector, for example `data.table(x=1, cbind(1))` and `data.table(x=1, 1)` will both have columns named `x` and `V2`. In this release, the matrix case continues to be named `V1`, but the new behavior can be activated by setting `options(datatable.old.matrix.autoname)` to `FALSE`. See point 5 under Bug Fixes for more context; this change will provide more internal consistency as well as more consistency with `data.frame()`.
diff --git a/R/as.data.table.R b/R/as.data.table.R
index f849aa8c10..146d580caf 100644
--- a/R/as.data.table.R
+++ b/R/as.data.table.R
@@ -69,7 +69,7 @@ as.data.table.matrix = function(x, keep.rownames=FALSE, key=NULL, ...) {
     for (i in ic) value[[i]] = as.vector(x[, i])       # to drop any row.names that would otherwise be retained inside every column of the data.table
   }
   col_labels = dimnames(x)[[2L]]
-  setDT(value)
+  setDT(value, duplicateShared=FALSE)
   if (length(col_labels) == ncols) {
     if (any(empty <- !nzchar(col_labels)))
       col_labels[empty] = paste0("V", ic[empty])
@@ -233,7 +233,7 @@ as.data.table.list = function(x,
     }
   }
   setattr(ans, "names", vnames)
-  setDT(ans, key=key) # copy ensured above; also, setDT handles naming
+  setDT(ans, key=key, duplicateShared=FALSE) # copy ensured above; also, setDT handles naming
   if (length(origListNames)==length(ans)) setattr(ans, "names", origListNames)  # PR 3854 and tests 2058.15-17
   ans
 }
@@ -282,7 +282,7 @@ as.data.table.data.frame = function(x, keep.rownames=FALSE, key=NULL, ...) {
 
   # fix for #1078 and #1128, see .resetclass() for explanation.
   setattr(ans, "class", .resetclass(x, "data.frame"))
-  setalloccol(ans)
+  setalloccol(ans, duplicateShared=FALSE)
   setkeyv(ans, key)
   ans
 }
diff --git a/R/between.R b/R/between.R
index 63877313d6..e1f60e1052 100644
--- a/R/between.R
+++ b/R/between.R
@@ -66,8 +66,8 @@ between = function(x, lower, upper, incbounds=TRUE, NAbounds=TRUE, check=FALSE,
 # issue FR #707
 # is x[i] found anywhere within [lower, upper] range?
 inrange = function(x,lower,upper,incbounds=TRUE) {
-  query = setDT(list(x=x))
-  subject = setDT(list(l=lower, u=upper))
+  query = setDT(list(x=x), duplicateShared=FALSE)
+  subject = setDT(list(l=lower, u=upper), duplicateShared=FALSE)
   ops = if (incbounds) c(4L, 2L) else c(5L, 3L) # >=,<= and >,<
   verbose = isTRUE(getOption("datatable.verbose"))
   if (verbose) {last.started.at=proc.time();catf("forderv(query) took ... ");flush.console()}
@@ -81,7 +81,7 @@ inrange = function(x,lower,upper,incbounds=TRUE) {
   )
   xo = ans$xo
   options(datatable.verbose=FALSE)
-  setDT(ans[c("starts", "lens")], key=c("starts", "lens"))
+  setDT(ans[c("starts", "lens")], key=c("starts", "lens"), duplicateShared=FALSE)
   options(datatable.verbose=verbose)
   if (verbose) {last.started.at=proc.time();catf("Generating final logical vector ... ");flush.console()}
   .Call(Cinrange, idx <- vector("logical", length(x)), xo, ans[["starts"]], ans[["lens"]])
diff --git a/R/data.table.R b/R/data.table.R
index f8f39c8f7f..7d1190361a 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -38,7 +38,7 @@ null.data.table = function() {
   ans = list()
   setattr(ans,"class",c("data.table","data.frame"))
   setattr(ans,"row.names",.set_row_names(0L))
-  setalloccol(ans)
+  setalloccol(ans, duplicateShared=FALSE)
 }
 
 data.table = function(..., keep.rownames=FALSE, check.names=FALSE, key=NULL, stringsAsFactors=FALSE)
@@ -73,7 +73,7 @@ data.table = function(..., keep.rownames=FALSE, check.names=FALSE, key=NULL, str
     for (j in which(vapply_1b(ans, is.character))) set(ans, NULL, j, as_factor(.subset2(ans, j)))
     # as_factor is internal function in fread.R currently
   }
-  setalloccol(ans)  # returns a NAMED==0 object, unlike data.frame()
+  setalloccol(ans, duplicateShared=FALSE)  # returns a NAMED==0 object, unlike data.frame()
 }
 
 replace_dot_alias = function(e) {
@@ -523,7 +523,7 @@ replace_dot_alias = function(e) {
       xo = ans$xo ## to make it available for further use.
       # temp fix for issue spotted by Jan, test #1653.1. TODO: avoid this
       # 'setorder', as there's another 'setorder' in generating 'irows' below...
-      if (length(ans$indices)) setorder(setDT(ans[1L:3L]), indices)
+      if (length(ans$indices)) setorder(setDT(ans[1L:3L], duplicateShared=FALSE), indices)
       allLen1 = ans$allLen1
       f__ = ans$starts
       len__ = ans$lens
@@ -575,7 +575,7 @@ replace_dot_alias = function(e) {
         }
         if (nqbyjoin) {
           irows = if (length(xo)) xo[irows] else irows
-          xo = setorder(setDT(list(indices=rep.int(indices__, len__), irows=irows)))[["irows"]]
+          xo = setorder(setDT(list(indices=rep.int(indices__, len__), irows=irows), duplicateShared=FALSE))[["irows"]]
           ans = .Call(CnqRecreateIndices, xo, len__, indices__, max(indices__), nomatch) # issue#4388 fix
           f__ = ans[[1L]]; len__ = ans[[2L]]
           allLen1 = FALSE # TODO; should this always be FALSE?
@@ -594,7 +594,7 @@ replace_dot_alias = function(e) {
         irows = xo[irows]   # TO DO: fsort here?
         if (mult=="all" && !allGrp1) { # following #1991 fix, !allGrp1 will always be TRUE. TODO: revisit.
           if (verbose) {last.started.at=proc.time();catf("Reorder irows for 'mult==\"all\" && !allGrp1' ... ");flush.console()}
-          irows = setorder(setDT(list(indices=rep.int(indices__, len__), irows=irows)))[["irows"]]
+          irows = setorder(setDT(list(indices=rep.int(indices__, len__), irows=irows), duplicateShared=FALSE))[["irows"]]
           if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()}
         }
       }
@@ -1245,7 +1245,7 @@ replace_dot_alias = function(e) {
               # Note also that this growing will happen for missing columns assigned NULL, too. But so rare, we
               # don't mind.
             }
-            setalloccol(x, n, verbose=verbose)   # always assigns to calling scope; i.e. this scope
+            setalloccol(x, n, verbose=verbose, duplicateShared=FALSE)   # always assigns to calling scope; i.e. this scope
             if (is.name(name)) {
               assign(as.character(name),x,parent.frame(),inherits=TRUE)
             } else if (.is_simple_extraction(name)) {
@@ -1352,7 +1352,7 @@ replace_dot_alias = function(e) {
         setattr(ans, "sorted", .join_result_key(x, i, ans, if (!missing(on)) names(on), ansvars, leftcols, rightcols, names_i, irows, roll))
         setattr(ans, "class", class(x))  # retain class that inherits from data.table, #64
         setattr(ans, "row.names", .set_row_names(length(ans[[1L]])))
-        setalloccol(ans)
+        setalloccol(ans, duplicateShared=FALSE)
       }
       if (!with || missing(j)) return(ans)
       if (!is.data.table(ans)) setattr(ans, "class", c("data.table","data.frame"))  # DF |> DT(,.SD[...]) .SD should be data.table, test 2212.013
@@ -2018,7 +2018,7 @@ replace_dot_alias = function(e) {
   } else if (.by_result_is_keyable(x, keyby, bysameorder, byjoin, allbyvars, bysub)) {
     setattr(ans, "sorted", names(ans)[seq_along(grpcols)])
   }
-  setalloccol(ans)   # TODO: overallocate in dogroups in the first place and remove this line
+  setalloccol(ans, duplicateShared=FALSE)   # TODO: overallocate in dogroups in the first place and remove this line
 }
 
 # can the specified merge of x and i be marked as sorted? return the columns for which this is true, otherwise NULL
@@ -2242,7 +2242,7 @@ tail.data.table = function(x, n=6L, ...) {
   if (!cedta()) {
     x = if (nargs()<4L) `[<-.data.frame`(x, i, value=value)
         else `[<-.data.frame`(x, i, j, value)
-    return(setalloccol(x))    # over-allocate (again).   Avoid all this by using :=.
+    return(setalloccol(x, duplicateShared=FALSE))    # over-allocate (again).   Avoid all this by using :=.
   }
   # TO DO: warningf("Please use DT[i,j:=value] syntax instead of DT[i,j]<-value, for efficiency. See ?':='")
   if (!missing(i)) {
@@ -2251,7 +2251,7 @@ tail.data.table = function(x, n=6L, ...) {
     if (is.matrix(i)) {
       if (!missing(j)) stopf("When i is a matrix in DT[i]<-value syntax, it doesn't make sense to provide j")
       x = `[<-.data.frame`(x, i, value=value)
-      return(setalloccol(x))
+      return(setalloccol(x, duplicateShared=FALSE))
     }
     i = x[i, which=TRUE]
     # Tried adding ... after value above, and passing ... in here (e.g. for mult="first") but R CMD check
@@ -2284,11 +2284,11 @@ tail.data.table = function(x, n=6L, ...) {
     reinstatekey=key(x)
   }
   if (!selfrefok(x) || truelength(x) < ncol(x)+length(newnames)) {
-    x = setalloccol(x, length(x)+length(newnames)) # because [<- copies via *tmp* and main/duplicate.c copies at length but copies truelength over too
+    x = setalloccol(x, length(x)+length(newnames), duplicateShared=FALSE) # because [<- copies via *tmp* and main/duplicate.c copies at length but copies truelength over too
     # search for one other .Call to assign in [.data.table to see how it differs
   }
   x = .Call(Cassign,copy(x),i,cols,newnames,value) # From 3.1.0, DF[2,"b"] = 7 no longer copies DF$a (so in this [<-.data.table method we need to copy)
-  setalloccol(x)  #  can maybe avoid this realloc, but this is (slow) [<- anyway, so just be safe.
+  setalloccol(x, duplicateShared=FALSE)  #  can maybe avoid this realloc, but this is (slow) [<- anyway, so just be safe.
   if (length(reinstatekey)) setkeyv(x,reinstatekey)
   invisible(x)
   # no copy at all if user calls directly; i.e. `[<-.data.table`(x,i,j,value)
@@ -2302,7 +2302,7 @@ tail.data.table = function(x, n=6L, ...) {
 "$<-.data.table" = function(x, name, value) {
   if (!cedta()) {
     ans = `$<-.data.frame`(x, name, value) # nocov
-    return(setalloccol(ans))           # nocov. over-allocate (again)
+    return(setalloccol(ans, duplicateShared=FALSE))           # nocov. over-allocate (again)
   }
   x = copy(x)
   set(x,j=name,value=value)  # important i is missing here
@@ -2359,7 +2359,7 @@ dimnames.data.table = function(x) {
     setattr(x,"names",NULL)   # e.g. plyr::melt() calls base::unname()
   else {
     setnames(x,value)
-    setalloccol(x)
+    setalloccol(x, duplicateShared=FALSE)
   }
   x   # this returned value is now shallow copied by R 3.1.0 via *tmp*. A very welcome change.
 }
@@ -2601,7 +2601,7 @@ copy = function(x) {
   reallocate = function(y) {
     if (is.data.table(y)) {
       .Call(C_unlock, y)
-      setalloccol(y)
+      setalloccol(y, duplicateShared=FALSE)
     } else if (is.list(y)) {
       oldClass = class(y)
       setattr(y, 'class', NULL)  # otherwise [[.person method (which returns itself) results in infinite recursion, #4620
@@ -2666,11 +2666,11 @@ shallow = function(x, cols=NULL) {
   ans
 }
 
-setalloccol = alloc.col = function(DT, n=getOption("datatable.alloccol"), verbose=getOption("datatable.verbose"))
+setalloccol = alloc.col = function(DT, n=getOption("datatable.alloccol"), verbose=getOption("datatable.verbose"), duplicateShared=TRUE)
 {
   name = substitute(DT)
   if (identical(name, quote(`*tmp*`))) stopf("setalloccol attempting to modify `*tmp*`")
-  ans = .Call(Calloccolwrapper, DT, eval(n), verbose)
+  ans = .Call(Calloccolwrapper, DT, eval(n), verbose, duplicateShared)
   if (is.name(name)) {
     name = as.character(name)
     assign(name,ans,parent.frame(),inherits=TRUE)
@@ -2875,7 +2875,7 @@ rbindlist = function(l, use.names="check", fill=FALSE, idcol=NULL, ignore.attr=F
   }
   ans = .Call(Crbindlist, l, use.names, fill, idcol, ignore.attr)
   if (!length(ans)) return(null.data.table())
-  setDT(ans)[]
+  setDT(ans, duplicateShared=FALSE)[]
 }
 
 vecseq = function(x,y,clamp) .Call(Cvecseq,x,y,clamp)
@@ -2953,7 +2953,7 @@ setDF = function(x, rownames=NULL) {
   invisible(x)
 }
 
-setDT = function(x, keep.rownames=FALSE, key=NULL, check.names=FALSE) {
+setDT = function(x, keep.rownames=FALSE, key=NULL, check.names=FALSE, duplicateShared=TRUE) {
   name = substitute(x)
   if (is.name(name)) {
     home = function(x, env) {
@@ -2968,12 +2968,14 @@ setDT = function(x, keep.rownames=FALSE, key=NULL, check.names=FALSE) {
       stopf("Cannot convert '%1$s' to data.table by reference because binding is locked. It is very likely that '%1$s' resides within a package (or an environment) that is locked to prevent modifying its variable bindings. Try copying the object to your current environment, ex: var <- copy(var) and then using setDT again.", cname)
     }
   }
+  if (!isTRUEorFALSE(duplicateShared))
+    stopf("'%s' must be TRUE or FALSE", "duplicateShared")
   if (is.data.table(x)) {
     # fix for #1078 and #1128, see .resetclass() for explanation.
     setattr(x, 'class', .resetclass(x, 'data.table'))
     if (!missing(key)) setkeyv(x, key) # fix for #1169
     if (check.names) setattr(x, "names", make.names(names(x), unique=TRUE))
-    if (selfrefok(x) > 0L) return(invisible(x)) else setalloccol(x)
+    if (selfrefok(x) > 0L) return(invisible(x)) else setalloccol(x, duplicateShared=duplicateShared)
   } else if (is.data.frame(x)) {
     # check no matrix-like columns, #3760. Allow a single list(matrix) is unambiguous and depended on by some revdeps, #3581
     # for performance, only warn on the first such column, #5426
@@ -2988,8 +2990,9 @@ setDT = function(x, keep.rownames=FALSE, key=NULL, check.names=FALSE) {
     setattr(x, "row.names", .set_row_names(nrow(x)))
     if (check.names) setattr(x, "names", make.names(names(x), unique=TRUE))
     # fix for #1078 and #1128, see .resetclass() for explanation.
-    setattr(x, "class", .resetclass(x, 'data.frame'))
-    setalloccol(x)
+    setalloccol(x, duplicateShared=duplicateShared)
+    setattr(x, "class", .resetclass(x, 'data.frame')) # selfrek not ok after setattr class
+    setalloccol(x, duplicateShared=duplicateShared)
     if (!is.null(rn)) {
       nm = c(if (is.character(keep.rownames)) keep.rownames[1L] else "rn", names(x))
       x[, (nm[1L]) := rn]
@@ -3017,7 +3020,7 @@ setDT = function(x, keep.rownames=FALSE, key=NULL, check.names=FALSE) {
     }
     setattr(x, "row.names", .set_row_names(nrow))
     setattr(x, "class", c("data.table", "data.frame"))
-    setalloccol(x)
+    setalloccol(x, duplicateShared = duplicateShared)
   } else {
     stopf("Argument 'x' to 'setDT' should be a 'list', 'data.frame' or 'data.table'")
   }
diff --git a/R/fcast.R b/R/fcast.R
index f059812dd0..e8b4d0bdd4 100644
--- a/R/fcast.R
+++ b/R/fcast.R
@@ -162,7 +162,7 @@ dcast.data.table = function(data, formula, fun.aggregate = NULL, sep = "_", ...,
   if (any(vapply_1b(dat[varnames], is.list))) {
     stopf("Columns specified in formula can not be of type list")
   }
-  setDT(dat)
+  setDT(dat, duplicateShared=FALSE)
 
   m = as.list(match.call()[-1L])
   subset = m[["subset"]][[2L]]
@@ -214,7 +214,7 @@ dcast.data.table = function(data, formula, fun.aggregate = NULL, sep = "_", ...,
   lhs = shallow(dat, lhsnames); rhs = shallow(dat, rhsnames); val = shallow(dat, valnames)
   # handle drop=TRUE/FALSE - Update: Logic moved to R, AND faster than previous version. Take that... old me :-).
   if (all(drop)) {
-    map = setDT(lapply(list(lhsnames, rhsnames), function(cols) frankv(dat, cols=cols, ties.method="dense", na.last=FALSE))) # #2202 fix
+    map = setDT(lapply(list(lhsnames, rhsnames), function(cols) frankv(dat, cols=cols, ties.method="dense", na.last=FALSE)), duplicateShared=FALSE) # #2202 fix
     maporder = lapply(map, order_)
     mapunique = lapply(seq_along(map), function(i) .Call(CsubsetVector, map[[i]], maporder[[i]]))
     lhs = .Call(CsubsetDT, lhs, maporder[[1L]], seq_along(lhs))
@@ -225,7 +225,7 @@ dcast.data.table = function(data, formula, fun.aggregate = NULL, sep = "_", ...,
     map = vector("list", 2L)
     .Call(Csetlistelt, map, 1L, lhs_[lhs, which=TRUE])
     .Call(Csetlistelt, map, 2L, rhs_[rhs, which=TRUE])
-    setDT(map)
+    setDT(map, duplicateShared=FALSE)
     mapunique = vector("list", 2L)
     .Call(Csetlistelt, mapunique, 1L, seq_len(nrow(lhs_)))
     .Call(Csetlistelt, mapunique, 2L, seq_len(nrow(rhs_)))
@@ -245,7 +245,7 @@ dcast.data.table = function(data, formula, fun.aggregate = NULL, sep = "_", ...,
           else c(CJ(valnames, allcols, sorted=FALSE), sep=sep))
     # removed 'setcolorder()' here, #1153
   setattr(ans, 'names', c(lhsnames, allcols))
-  setDT(ans)
+  setDT(ans, duplicateShared=FALSE)
   setattr(ans, 'sorted', lhsnames)
   ans
 }
diff --git a/R/fmelt.R b/R/fmelt.R
index c6f435578b..e17071b3b1 100644
--- a/R/fmelt.R
+++ b/R/fmelt.R
@@ -216,7 +216,7 @@ melt.data.table = function(data, id.vars, measure.vars, variable.name = "variabl
       as.logical(variable.factor), as.logical(value.factor),
       variable.name, value.name, as.logical(na.rm),
       as.logical(verbose))
-  setDT(ans)
+  setDT(ans, duplicateShared=FALSE)
   if (anyDuplicated(names(ans))) {
     catf("Duplicate column names found in molten data.table. Setting unique names using 'make.names'\n")
     setnames(ans, make.unique(names(ans)))
diff --git a/R/foverlaps.R b/R/foverlaps.R
index 7bbaf0dc13..e684d5c2e9 100644
--- a/R/foverlaps.R
+++ b/R/foverlaps.R
@@ -172,7 +172,7 @@ foverlaps = function(x, y, by.x=key(x) %||% key(y), by.y=key(y), maxgap=0L, mino
   }
   # nocov end
 
-  setDT(olaps)
+  setDT(olaps, duplicateShared=FALSE)
   setnames(olaps, c("xid", "yid"))
   yid = NULL  # for 'no visible binding for global variable' from R CMD check on i clauses below
 
diff --git a/R/frank.R b/R/frank.R
index 419f5ea414..a532eb27a0 100644
--- a/R/frank.R
+++ b/R/frank.R
@@ -5,6 +5,7 @@ frankv = function(x, cols=seq_along(x), order=1L, na.last=TRUE, ties.method=c("a
     warningf("length(na.last) > 1, only the first element will be used")
     na.last = na.last[1L]
   }
+  input_names = NULL
   keep = (na.last == "keep")
   na.last = as.logical(na.last)
   as_list = function(x) {
@@ -16,6 +17,7 @@ frankv = function(x, cols=seq_along(x), order=1L, na.last=TRUE, ties.method=c("a
     if (!missing(cols) && !is.null(cols))
       stopf("x is a single vector, non-NULL 'cols' doesn't make sense")
     cols = 1L
+    input_names = names(x) 
     x = as_list(x)
   } else {
     cols = colnamesInt(x, cols, check_dups=TRUE)
@@ -24,7 +26,7 @@ frankv = function(x, cols=seq_along(x), order=1L, na.last=TRUE, ties.method=c("a
   }
   # need to unlock for #4429
   x = .shallow(x, cols, unlock = TRUE) # shallow copy even if list..
-  setDT(x)
+  setDT(x, duplicateShared=TRUE)
   cols = seq_along(cols)
   if (is.na(na.last)) {
     if ("..na_prefix.." %chin% names(x))
@@ -66,10 +68,15 @@ frankv = function(x, cols=seq_along(x), order=1L, na.last=TRUE, ties.method=c("a
   # take care of na.last="keep"
   V1 = NULL # for R CMD CHECK warning
   if (isTRUE(keep)) {
-    ans = (setDT(as_list(ans))[which_(nas, TRUE), V1 := NA])[[1L]]
+    ans = (setDT(as_list(ans), duplicateShared=TRUE)[which_(nas, TRUE), V1 := NA])[[1L]]
   } else if (is.na(na.last)) {
-    ans = ans[which_(nas, FALSE)]
+    idx = which_(nas, FALSE)
+    if (!is.null(input_names))
+      input_names = input_names[idx]
+    ans = ans[idx]
   }
+  if (!is.null(input_names))
+    names(ans) = input_names
   ans
 }
 
diff --git a/R/fread.R b/R/fread.R
index 16a72ed24d..1c4722d697 100644
--- a/R/fread.R
+++ b/R/fread.R
@@ -301,7 +301,7 @@ yaml=FALSE, tmpdir=tempdir(), tz="UTC")
 
   if (isTRUE(data.table)) {
     setattr(ans, "class", c("data.table", "data.frame"))
-    setalloccol(ans)
+    setalloccol(ans, duplicateShared=FALSE)
   } else {
     setattr(ans, "class", "data.frame")
   }
diff --git a/R/frollapply.R b/R/frollapply.R
index ee9d785bcd..abfd901bb5 100644
--- a/R/frollapply.R
+++ b/R/frollapply.R
@@ -265,7 +265,7 @@ frollapply = function(X, N, FUN, ..., by.column=TRUE, fill=NA, align=c("right","
     } else {
       rev.d = function(d) {
         l = lapply(d, rev)
-        if (is.data.table(d)) setDT(l) else if (is.data.frame(d)) setDF(l) else l
+        if (is.data.table(d)) setDT(l, duplicateShared=FALSE) else if (is.data.frame(d)) setDF(l) else l
       }
       X = lapply(X, rev.d)
     }
diff --git a/R/mergelist.R b/R/mergelist.R
index 7ff0e87bcf..b2042bcf64 100644
--- a/R/mergelist.R
+++ b/R/mergelist.R
@@ -4,7 +4,7 @@ cbindlist_impl_ = function(l, copy) {
     setattr(ans, "sorted", NULL)
     setattr(ans, "index", NULL)
   }
-  setDT(ans)
+  setDT(ans, duplicateShared=FALSE)
   ans
 }
 
@@ -254,7 +254,7 @@ mergepair = function(lhs, rhs, on, how, mult, lhs.cols=names(lhs), rhs.cols=name
       out = rbindlist(list(out.l, out.r), use.names=TRUE, fill=TRUE)
     }
   }
-  setDT(out)
+  setDT(out, duplicateShared=FALSE)
 }
 
 mergelist_impl_ = function(l, on, cols, how, mult, join.many, copy) {
diff --git a/R/setkey.R b/R/setkey.R
index 4ba5be4d71..63a7230561 100644
--- a/R/setkey.R
+++ b/R/setkey.R
@@ -331,8 +331,8 @@ CJ = function(..., sorted = TRUE, unique = FALSE)
   nrow = prod(lengths(l))
   if (nrow > .Machine$integer.max) stopf("Cross product of elements provided to CJ() would result in %.0f rows which exceeds .Machine$integer.max == %d", nrow, .Machine$integer.max)
   l = .Call(Ccj, l)
-  setDT(l)
-  l = setalloccol(l)  # a tiny bit wasteful to over-allocate a fixed join table (column slots only), doing it anyway for consistency since
+  setDT(l, duplicateShared=FALSE)
+  l = setalloccol(l, duplicateShared=FALSE)  # a tiny bit wasteful to over-allocate a fixed join table (column slots only), doing it anyway for consistency since
                     # it's possible a user may wish to use SJ directly outside a join and would expect consistent over-allocation
   setnames(l, vnames)
   if (sorted) {
diff --git a/R/test.data.table.R b/R/test.data.table.R
index 6e264c871f..5b6a7deacb 100644
--- a/R/test.data.table.R
+++ b/R/test.data.table.R
@@ -154,7 +154,7 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F
     #   runtime test number (i.e. 'numStr') since we're just doing a static check here, though we _are_ careful to match the
     #   full test expression string, i.e., not just limited to numeric literal test numbers.
     arg_line = call_id = col1 = col2 = i.line1 = id = line1 = parent = preceding_line = test_start_line = text = token = x.line1 = x.parent = NULL # R CMD check
-    pd = setDT(utils::getParseData(parse(fn, keep.source=TRUE)))
+    pd = setDT(utils::getParseData(parse(fn, keep.source=TRUE)), duplicateShared=FALSE)
     file_lines = readLines(fn)
     # NB: a call looks like (with id/parent tracking)
     # <expr>
diff --git a/R/transpose.R b/R/transpose.R
index 0da0123b94..3423548c38 100644
--- a/R/transpose.R
+++ b/R/transpose.R
@@ -18,7 +18,7 @@ transpose = function(l, fill=NA, ignore.empty=FALSE, keep.names=NULL, make.names
   if (!is.null(make.names)) setattr(ans, "names", c(keep.names, colnames))
   else if (is.data.frame(l))  # including data.table but not plain list
     setattr(ans, "names", c(keep.names, paste0("V", seq_len(length(ans)-length(keep.names)))))
-  if (is.data.table(l)) setDT(ans)
+  if (is.data.table(l)) setDT(ans, duplicateShared=FALSE)
   else if (is.data.frame(l)) setDF(ans)
   ans[]
 }
diff --git a/R/utils.R b/R/utils.R
index 39a623b36d..ff5671d64a 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -207,7 +207,7 @@ is_utc = function(tz) {
 
 # nocov start #593 always return a data.table
 edit.data.table = function(name, ...) {
-  setDT(NextMethod('edit', name))[]
+  setDT(NextMethod('edit', name), duplicateShared=FALSE)[]
 }
 # nocov end
 
diff --git a/R/xts.R b/R/xts.R
index 582cfa5b0a..58fcb33606 100644
--- a/R/xts.R
+++ b/R/xts.R
@@ -5,7 +5,7 @@ as.data.table.xts = function(x, keep.rownames = TRUE, key=NULL, ...) {
   if (is.na(keep.rownames)) stopf("keep.rownames must not be NA")
   # as.data.frame.xts will handle copying, and
   #   the error check above ensures as.data.frame.xts is applied
-  r = setDT(as.data.frame(x, row.names=NULL))
+  r = setDT(as.data.frame(x, row.names=NULL), duplicateShared=FALSE)
   if (identical(keep.rownames, FALSE)) return(r[])
   index_nm = if (is.character(keep.rownames)) keep.rownames else "index"
   if (index_nm %chin% names(x)) stopf("Input xts object should not have '%s' column because it would result in duplicate column names. Rename '%s' column in xts or use `keep.rownames` to change the index column name.", index_nm, index_nm)
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 11a02fabad..541149286a 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -16224,13 +16224,13 @@ test(2087.01, x[a == 1L, .(b, b2=b)][ , address(b)!=address(b2)])
 # setkey detects and copies shared address columns, #3496
 x = data.frame(a=paste0(2:1), stringsAsFactors=FALSE)
 x$b = x$a
-setDT(x)
+setDT(x, duplicateShared=FALSE)
 test(2087.02, setkey(x, a, verbose=TRUE), data.table(a=paste0(1:2), b=paste0(1:2), key="a"),
               output='Found and copied 1 column with a shared memory address')
 x = data.frame(a=paste0(2:1), stringsAsFactors=FALSE)
 x$b = x$a
 x$c = x$a
-setDT(x)
+setDT(x, duplicateShared=FALSE)
 test(2087.03, setkey(x, a, verbose=TRUE), data.table(a=paste0(1:2), b=paste0(1:2), c=paste0(1:2), key="a"),
               output='Found and copied 2 columns with a shared memory address')
 # follow-up from #3890; function body and variable in calling scope
@@ -21837,3 +21837,45 @@ DT[, V1000 := 20:1]
 test(2343.1, forderv(DT, by=names(DT), sort=FALSE, retGrp=TRUE), forderv(DT, by=c("V1", "V1000"), sort=FALSE, retGrp=TRUE))
 x = c(rep(0, 7e5), 1e6)
 test(2343.2, forderv(list(x)), integer(0))
+
+# setDT should not modify shared vectors #2683
+sDT = function(x) setalloccol(setattr(x, "class", c("data.table", "data.frame")), duplicateShared=TRUE)
+y = c(a=4, b=5)
+x = list(c=y)
+test(2344.01, {sDT(x); y}, c(a=4, b=5))
+y = c(a=4, b=5)
+x = list(c=y)
+test(2344.02, {setDT(x, duplicateShared=TRUE); y}, c(a=4, b=5))
+y = c(a=4, b=5)
+x = list(c=y)
+setattr(x, "class", c("data.table", "data.frame"))
+test(2344.03, {sDT(x)[1, c := 0L]; y}, c(a=4, b=5))
+y = c(a=4, b=5)
+x = list(c=y)
+test(2344.04, {setDT(x, duplicateShared=TRUE)[1, c := 0L]; y}, c(a=4, b=5))
+# frank should not modify shared vectors #4240
+x = setNames(c(3,1,4,1,5,9,2,6,5,3,5), letters[1:11])
+test(2344.11, {frank(y<-copy(x)); names(y)}, names(x))
+test(2344.12, frank(copy(x)), rank(copy(x)))
+# consistent semantics after setDT #4783
+df = data.frame(a=c(1,2,3,4,5), b=c(2,3,4,5,6))
+d1 = copy(df)
+d2 = d1
+setDT(d2)
+test(2344.21, {setDT(d2); class(d1)}, "data.frame")
+d1 = copy(df)
+d2 = d1
+setDT(d2)
+test(2344.22, {d2[, b:=3:7]; d1}, df)
+d1 = copy(df)
+d2 = d1
+setDT(d2)
+test(2344.23, {d2[, c:=4:8]; d1}, df)
+d1 = copy(df)
+d2 = d1
+setDT(d2)
+test(2344.24, {d2[!is.na(a), b:=5:9]; d1}, df)
+d1 = copy(df)
+d2 = d1
+setDT(d2)
+test(2344.25, {d2[, b:=30]; d1}, df)
diff --git a/man/setDT.Rd b/man/setDT.Rd
index 6dfb7ac701..0d852047de 100644
--- a/man/setDT.Rd
+++ b/man/setDT.Rd
@@ -8,19 +8,22 @@
 
 }
 \usage{
-setDT(x, keep.rownames=FALSE, key=NULL, check.names=FALSE)
+setDT(x, keep.rownames=FALSE, key=NULL, check.names=FALSE, duplicateShared=TRUE)
 }
 \arguments{
   \item{x}{ A named or unnamed \code{list}, \code{data.frame} or \code{data.table}. }
-  \item{keep.rownames}{ For \code{data.frame}s, \code{TRUE} retains the \code{data.frame}'s row names under a new column \code{rn}. \code{keep.rownames = "id"} names the column \code{"id"} instead. } 
+  \item{keep.rownames}{ For \code{data.frame}s, \code{TRUE} retains the \code{data.frame}'s row names under a new column \code{rn}. \code{keep.rownames = "id"} names the column \code{"id"} instead. }
   \item{key}{ Character vector of one or more column names which is passed to \code{\link{setkeyv}}. }
   \item{check.names}{ Just as \code{check.names} in \code{\link{data.frame}}. }
+  \item{duplicateShared}{ Logical. When \code{TRUE}, columns that are shared with other objects (i.e., \code{MAYBE_SHARED} is true) are duplicated to avoid unintended modification of the original data. Default is \code{TRUE}. }
 }
 
 \details{
   When working on large \code{list}s or \code{data.frame}s, it might be both time- and memory-consuming to convert them to a \code{data.table} using \code{as.data.table(.)}, which will make a complete copy of the input object before converting it to a \code{data.table}. \code{setDT} takes care of this issue by converting any \code{list} (named or unnamed, data.frame or not) \emph{by reference} instead. That is, the input object is modified in place with no copy.
 
   This should come with low overhead, but note that \code{setDT} does check that the input is valid by looking for inconsistent input lengths and inadmissible column types (e.g. matrix).
+
+  The \code{duplicateShared} parameter controls whether columns that might be shared with other R objects are duplicated. When \code{TRUE} (the default), \code{setDT} checks each column to see if it is shared (using R's internal \code{MAYBE_SHARED} macro) and duplicates any shared columns. This prevents unintended side effects where modifying the data.table would also modify the original objects. For example, if a list contains a vector \code{y} that is also referenced elsewhere, operations on the resulting data.table would not affect \code{y} when \code{duplicateShared=TRUE}. Set to \code{FALSE} only if you are certain no columns are shared or if you explicitly want shared column behavior for performance reasons.
 }
 
 \value{
diff --git a/man/truelength.Rd b/man/truelength.Rd
index a85f78b1b6..02455b82af 100644
--- a/man/truelength.Rd
+++ b/man/truelength.Rd
@@ -10,16 +10,19 @@
 truelength(x)
 setalloccol(DT,
     n = getOption("datatable.alloccol"),        # default: 1024L
-    verbose = getOption("datatable.verbose"))   # default: FALSE
+    verbose = getOption("datatable.verbose"),   # default: FALSE
+    duplicateShared = TRUE)
 alloc.col(DT,
     n = getOption("datatable.alloccol"),        # default: 1024L
-    verbose = getOption("datatable.verbose"))   # default: FALSE
+    verbose = getOption("datatable.verbose"),   # default: FALSE
+    duplicateShared = TRUE)
 }
 \arguments{
 \item{x}{ Any type of vector, including \code{data.table} which is a \code{list} vector of column pointers. }
 \item{DT}{ A \code{data.table}. }
 \item{n}{ The number of spare column pointer slots to ensure are available. If \code{DT} is a 1,000 column \code{data.table} with 24 spare slots remaining, \code{n=1024L} means grow the 24 spare slots to be 1024. \code{truelength(DT)} will then be 2024 in this example. }
 \item{verbose}{ Output status and information. }
+\item{duplicateShared}{ Logical. When \code{TRUE} (the default), columns that are shared with other objects (i.e., \code{MAYBE_SHARED} is true) are duplicated to avoid unintended modification of the original data. Set to \code{FALSE} only if you are certain no columns are shared or if you explicitly want shared column behavior for performance reasons. }
 }
 \details{
     When adding columns by reference using \code{:=}, we \emph{could} simply create a new column list vector (one longer) and memcpy over the old vector,
diff --git a/src/assign.c b/src/assign.c
index eee00cc0f5..5e5e1b0215 100644
--- a/src/assign.c
+++ b/src/assign.c
@@ -285,15 +285,27 @@ int checkOverAlloc(SEXP x)
   return ans;
 }
 
-SEXP alloccolwrapper(SEXP dt, SEXP overAllocArg, SEXP verbose) {
+SEXP alloccolwrapper(SEXP dt, SEXP overAllocArg, SEXP verbose, SEXP duplicateSharedArg) {
   if (!IS_TRUE_OR_FALSE(verbose))
     error(_("%s must be TRUE or FALSE"), "verbose");
-  int overAlloc = checkOverAlloc(overAllocArg);
+  bool duplicateShared = false;
+  if (duplicateSharedArg != R_NilValue) {
+    if (!IS_TRUE_OR_FALSE(duplicateSharedArg))
+      error(_("%s must be TRUE or FALSE"), "duplicateShared");
+    duplicateShared = LOGICAL(duplicateSharedArg)[0];
+  }
+  const int overAlloc = checkOverAlloc(overAllocArg);
   SEXP ans = PROTECT(alloccol(dt, length(dt)+overAlloc, LOGICAL(verbose)[0]));
 
   for(R_len_t i = 0; i < LENGTH(ans); i++) {
+    SEXP col = VECTOR_ELT(ans, i);
+    // Check if column is shared with another object and duplicate on request (#2683)
+    if (duplicateShared && MAYBE_SHARED(col)) {
+      col = duplicate(col);
+      SET_VECTOR_ELT(ans, i, col);
+    }
     // clear names; also excluded by copyMostAttrib(). Primarily for data.table and as.data.table, but added here centrally (see #103).
-    setAttrib(VECTOR_ELT(ans, i), R_NamesSymbol, R_NilValue);
+    setAttrib(col, R_NamesSymbol, R_NilValue);
 
     // But don't clear dim and dimnames. Because as from 1.12.4 we keep the matrix column as-is and ask user to use as.data.table to
     // unpack matrix columns when they really need to; test 2089.2
diff --git a/src/data.table.h b/src/data.table.h
index 663f0adb46..8e8fbac87b 100644
--- a/src/data.table.h
+++ b/src/data.table.h
@@ -354,7 +354,7 @@ SEXP setattrib(SEXP, SEXP, SEXP);
 SEXP assign(SEXP, SEXP, SEXP, SEXP, SEXP);
 SEXP copy(SEXP);
 SEXP setdt_nrows(SEXP);
-SEXP alloccolwrapper(SEXP, SEXP, SEXP);
+SEXP alloccolwrapper(SEXP, SEXP, SEXP, SEXP);
 SEXP selfrefokwrapper(SEXP, SEXP);
 SEXP truelength(SEXP);
 SEXP setcharvec(SEXP, SEXP, SEXP);
diff --git a/src/utils.c b/src/utils.c
index b159fa0e60..c81756b1b0 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -500,8 +500,8 @@ int n_columns(SEXP x) {
  error("internal error: C setDT should be called only on a list"); // # nocov
  setAttrib(x, R_ClassSymbol, char2_dtdf());
  setAttrib(x, sym_rownames, set_row_names(n_rows(x)));
- return alloccolwrapper(x, GetOption(sym_alloccol, R_NilValue), GetOption(sym_verbose, R_NilValue));
- }*/
+ return alloccolwrapper(x, GetOption(sym_alloccol, R_NilValue), GetOption(sym_verbose, R_NilValue), R_NilValue);
+}*/
 
 // inherits(x, "data.table")
 bool isDataTable(SEXP x) {