Row Name Extraction for data.table() with keep.rownames (#7136)

Mukulyadav2004 · MichaelChirico · web-flow · commit bfa049ca5f3f · 2025-07-10T10:00:49.000-07:00
* Added logic to �s.data.table.list() to preserve names from vectors

* remove trail whit space

* add coverage tests

* replace X and Y

* result test

* tests

* corrected test output

* use as.data.table in tests

* use isFalse

* add classed error conditions

* ws style

* rm redundant condition

* missed ws change

* invalid_input in place of invalid_type

* right place invalid_input

* typo and unsortable in place of unsupported

* specify join type

* merge our loop which checks for vector rowname extraction in below

* added logic for handling data.frame

* add tests

* add tests

* merger master

* remove duplicate

* remove list from as.data.table

* added vignettes

* rm ws

* try to simplify

* fix tests

* restore

* try and handle "inner" row names from matrix case

* rm vestigial

* fix

* simplify tests

* remove any(nzchar(nm))

* remove test condition about any(nzchar(nm))

* update test number

* update description , news.md and add tests

* remove unwanted changes

---------

Co-authored-by: Michael Chirico &lt;michaelchirico4@gmail.com&gt;
Co-authored-by: Michael Chirico &lt;chiricom@google.com&gt;
diff --git a/NEWS.md b/NEWS.md
@@ -44,6 +44,8 @@
 
 9. `isoweek()` is much faster (e.g. 20x) by re-using an implementation from {base}, [#5111](https://github.com/Rdatatable/data.table/issues/5111). Thanks @MichaelChirico for the report and PR.
 
+10. `data.table()` and `as.data.table()` with `keep.rownames=TRUE` now extract row names from named vectors, matching `data.frame()` behavior. Names from the first named vector in the input are used to create the row names column (default name `"rn"` or custom name via `keep.rownames="column_name"`), [#1916](https://github.com/Rdatatable/data.table/issues/1916). Thanks to @richierocks for the feature request and @Mukulyadav2004 for the implementation.
+
 ### BUG FIXES
 
 1. Custom binary operators from the `lubridate` package now work with objects of class `IDate` as with a `Date` subclass, [#6839](https://github.com/Rdatatable/data.table/issues/6839). Thanks @emallickhossain for the report and @aitap for the fix.
diff --git a/R/as.data.table.R b/R/as.data.table.R
@@ -136,9 +136,26 @@ as.data.table.list = function(x,
   missing.check.names = missing(check.names)
   origListNames = if (missing(.named)) names(x) else NULL  # as.data.table called directly, not from inside data.table() which provides .named, #3854
   empty_atomic = FALSE
+
+  # Handle keep.rownames for vectors (mimicking data.frame behavior)
+  rownames_ = NULL
+  check_rownames = !isFALSE(keep.rownames)
+
   for (i in seq_len(n)) {
     xi = x[[i]]
     if (is.null(xi)) next    # eachncol already initialized to 0 by integer() above
+    if (check_rownames && is.null(rownames_)) {
+      if (is.null(dim(xi))) {
+        if (!is.null(nm <- names(xi))) {
+          rownames_ = nm
+          x[[i]] = unname(xi)
+        }
+      } else {
+        if (!is.null(nm <- rownames(xi))) {
+          rownames_ = nm
+        }
+      }
+    }
     if (!is.null(dim(xi)) && missing.check.names) check.names=TRUE
     if ("POSIXlt" %chin% class(xi)) {
       warningf("POSIXlt column type detected and converted to POSIXct. We do not recommend use of POSIXlt at all because it uses 40 bytes to store one date.")
@@ -203,6 +220,18 @@ as.data.table.list = function(x,
   }
   if (any(vnames==".SD")) stopf("A column may not be called .SD. That has special meaning.")
   if (check.names) vnames = make.names(vnames, unique=TRUE)
+
+  # Add rownames column when vector names were found
+  if (!is.null(rownames_)) {
+    rn_name = if (is.character(keep.rownames)) keep.rownames[1L] else "rn"
+    if (!is.na(idx <- chmatch(rn_name, vnames)[1L])) {
+      ans = c(list(ans[[idx]]), ans[-idx])
+      vnames = c(vnames[idx], vnames[-idx])
+    } else {
+      ans = c(list(recycle(rownames_, nrow)), ans)
+      vnames = c(rn_name, vnames)
+    }
+  }
   setattr(ans, "names", vnames)
   setDT(ans, key=key) # copy ensured above; also, setDT handles naming
   if (length(origListNames)==length(ans)) setattr(ans, "names", origListNames)  # PR 3854 and tests 2058.15-17
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
@@ -21406,3 +21406,23 @@ dt = data.table(x = 123456, y = "wide_string")
 test(2329.2, print(dt, col.names = "none"), output = "1: 123456 wide_string\n")
 dt = data.table(a = NA_integer_, b = NaN)
 test(2329.3, print(dt, col.names = "none"), output = "1: NA NaN\n")
+
+# Row name extraction from multiple vectors, #7136
+x <- 1:3 
+y <- setNames(4:6, c("A", "B", "C"))  
+test(2330.1, as.data.table(list(x, y), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=1:3, V2=4:6))
+test(2330.2, as.data.table(list(x, y), keep.rownames="custom"), data.table(custom=c("A", "B", "C"), V1=1:3, V2=4:6))
+test(2330.3, as.data.table(list(y, x), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=4:6, V2=1:3)) 
+
+# Behavior under data.frame()
+test(2330.4, as.data.table(data.frame(x, y), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), x=1:3, y=4:6))
+test(2330.5, as.data.table(data.frame(y, x), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), y=4:6, x=1:3))
+
+DF <- data.frame(row.names = letters[1:6], V = 1:6)     # Test data.frame with explicit rownames
+test(2330.6, as.data.table(list(a = 6:1, DF), keep.rownames=TRUE), data.table(rn=letters[1:6], a=6:1, V=1:6))
+
+z <- setNames(1:3, rep("", 3))  # vector with all-empty names     # behaviour with all-empty row names
+test(2330.7, as.data.table(list(z), keep.rownames=TRUE), data.table(rn=rep("", 3), V1=1:3))
+
+M <- matrix(1:6, nrow=3, dimnames=list(rep("", 3), c("V1", "V2")))   #  test of list(M) for empty-rowname'd matrix input
+test(2330.8, as.data.table(list(M), keep.rownames=TRUE), data.table(rn=rep("", 3), V1=1:3, V2=4:6))
diff --git a/man/as.data.table.Rd b/man/as.data.table.Rd
@@ -31,7 +31,7 @@ is.data.table(x)
 }
 \arguments{
   \item{x}{An R object.}
-  \item{keep.rownames}{Default is \code{FALSE}. If \code{TRUE}, adds the input object's names as a separate column named \code{"rn"}. \code{keep.rownames = "id"} names the column \code{"id"} instead.}
+  \item{keep.rownames}{Default is \code{FALSE}. If \code{TRUE}, adds the input object's names as a separate column named \code{"rn"}. \code{keep.rownames = "id"} names the column \code{"id"} instead. For lists and when calling \code{data.table()}, names from the first named vector are extracted and used as row names, similar to \code{data.frame()} behavior.}
   \item{key}{ Character vector of one or more column names which is passed to \code{\link{setkeyv}}. }
   \item{sorted}{logical used in \emph{array} method, default \code{TRUE} is overridden when \code{key} is provided. }
   \item{value.name}{character scalar used in \emph{array} method, default \code{"value"}.}

Original file line number	Diff line number	Diff line change
`@@ -31,7 +31,7 @@ is.data.table(x)`
`31`	`31`	`}`
`32`	`32`	`\arguments{`
`33`	`33`	`\item{x}{An R object.}`
`34`		`- \item{keep.rownames}{Default is \code{FALSE}. If \code{TRUE}, adds the input object's names as a separate column named \code{"rn"}. \code{keep.rownames = "id"} names the column \code{"id"} instead.}`
	`34`	`+ \item{keep.rownames}{Default is \code{FALSE}. If \code{TRUE}, adds the input object's names as a separate column named \code{"rn"}. \code{keep.rownames = "id"} names the column \code{"id"} instead. For lists and when calling \code{data.table()}, names from the first named vector are extracted and used as row names, similar to \code{data.frame()} behavior.}`
`35`	`35`	`\item{key}{ Character vector of one or more column names which is passed to \code{\link{setkeyv}}. }`
`36`	`36`	`\item{sorted}{logical used in \emph{array} method, default \code{TRUE} is overridden when \code{key} is provided. }`
`37`	`37`	`\item{value.name}{character scalar used in \emph{array} method, default \code{"value"}.}`