Keep existing key(s) in [ even when new column(s) are added before them (#7391)

czeildi · ben-schwen · web-flow · commit bc7f5aff7618 · 2025-10-27T09:28:45.000+01:00
* add tests and fix

* extract DT setup from test

* add news item

* do not use \(x) fun style

* remove unnecessary local()

* apply formatting suggestion

* add test case for join result and multiple keys

* clarify test comment

* style change in fix

* rephrase news

---------

Co-authored-by: Benjamin Schwendinger &lt;52290390+ben-schwen@users.noreply.github.com&gt;
diff --git a/NEWS.md b/NEWS.md
@@ -340,6 +340,8 @@ See [#2611](https://github.com/Rdatatable/data.table/issues/2611) for details. T
 
 20. `forderv` could segfault on keys with long runs of identical bytes (e.g., many duplicate columns) because the single-group branch tail-recursed radix-by-radix until the C stack ran out, [#4300](https://github.com/Rdatatable/data.table/issues/4300). This is a major problem since sorting is extensively used in `data.table`. Thanks @quantitative-technologies for the report and @ben-schwen for the fix.
 
+21. `[` now preserves existing key(s) when new columns are added before them, instead of incorrectly setting a new column as key, [#7364](https://github.com/Rdatatable/data.table/issues/7364). Thanks @czeildi for the bug report and the fix.
+
 ### NOTES
 
 1. The following in-progress deprecations have proceeded:
diff --git a/R/data.table.R b/R/data.table.R
@@ -1448,7 +1448,7 @@ replace_dot_alias = function(e) {
         if (SD_only)
           jvnames = jnames = sdvars
         else
-          jnames = as.character(Filter(is.name, jsub)[-1L])
+          jnames = vapply_1c(jsub, function(x) if (is.name(x)) as.character(x) else NA_character_)[-1L]
         key_idx = chmatch(key, jnames)
         missing_keys = which(is.na(key_idx))
         if (length(missing_keys) && missing_keys[1L] == 1L) return(NULL)
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
@@ -21837,3 +21837,21 @@ DT[, V1000 := 20:1]
 test(2343.1, forderv(DT, by=names(DT), sort=FALSE, retGrp=TRUE), forderv(DT, by=c("V1", "V1000"), sort=FALSE, retGrp=TRUE))
 x = c(rep(0, 7e5), 1e6)
 test(2343.2, forderv(list(x)), integer(0))
+
+# Keep key when new column added before existing key in j
+# Incorrect key can lead to incorrect join result #7364
+DT = data.table(V1 = 1:2, key = "V1")
+test(2344.00, key(DT[, .(V2 = c("b", "a"), V1)]), "V1")
+test(2344.01, key(DT[, .(V2 = -V1, V1)]), "V1")
+
+d1 = data.table(V1 = c(1L, 0L, 1L), V2 = c("a", "a", "b"), key = "V2")
+d2 = d1[, .(V1, label = c("one", "zero", "one"), V2)]
+r = d2[data.table(label = "one"), on = "label", allow.cartesian = TRUE]
+test(2344.02, nrow(r), 2L)
+# join result of keyed input is the same as unkeyed input
+test(2344.03, setkey(d1[, .(V1, label = c("one", "zero", "one"), V2)][data.table(label = "one"), on = "label", allow.cartesian = TRUE], NULL),
+              setkey(d1, NULL)[, .(V1, label = c("one", "zero", "one"), V2)][data.table(label = "one"), on = "label", allow.cartesian = TRUE])
+
+# keep sub-key in case of multiple keys, even with new columns and changing column order
+DT = data.table(V1 = 1:2, V2 = 3:4, V3 = 5:6, key = c("V1", "V2", "V3"))
+test(2344.04, key(DT[, .(V4 = c("b", "a"), V2, V5 = c("y", "x"), V1)]), c("V1", "V2"))