diff --git a/NEWS.md b/NEWS.md index 449d20cb5b..eb2fbd047b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -44,6 +44,8 @@ 9. `isoweek()` is much faster (e.g. 20x) by re-using an implementation from {base}, [#5111](https://github.com/Rdatatable/data.table/issues/5111). Thanks @MichaelChirico for the report and PR. +10. `data.table()` and `as.data.table()` with `keep.rownames=TRUE` now extract row names from named vectors, matching `data.frame()` behavior. Names from the first named vector in the input are used to create the row names column (default name `"rn"` or custom name via `keep.rownames="column_name"`), [#1916](https://github.com/Rdatatable/data.table/issues/1916). Thanks to @richierocks for the feature request and @Mukulyadav2004 for the implementation. + ### BUG FIXES 1. Custom binary operators from the `lubridate` package now work with objects of class `IDate` as with a `Date` subclass, [#6839](https://github.com/Rdatatable/data.table/issues/6839). Thanks @emallickhossain for the report and @aitap for the fix. diff --git a/R/as.data.table.R b/R/as.data.table.R index c68819450f..bd7f97fa79 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -136,9 +136,26 @@ as.data.table.list = function(x, missing.check.names = missing(check.names) origListNames = if (missing(.named)) names(x) else NULL # as.data.table called directly, not from inside data.table() which provides .named, #3854 empty_atomic = FALSE + + # Handle keep.rownames for vectors (mimicking data.frame behavior) + rownames_ = NULL + check_rownames = !isFALSE(keep.rownames) + for (i in seq_len(n)) { xi = x[[i]] if (is.null(xi)) next # eachncol already initialized to 0 by integer() above + if (check_rownames && is.null(rownames_)) { + if (is.null(dim(xi))) { + if (!is.null(nm <- names(xi))) { + rownames_ = nm + x[[i]] = unname(xi) + } + } else { + if (!is.null(nm <- rownames(xi))) { + rownames_ = nm + } + } + } if (!is.null(dim(xi)) && missing.check.names) check.names=TRUE if ("POSIXlt" %chin% class(xi)) { warningf("POSIXlt column type detected and converted to POSIXct. We do not recommend use of POSIXlt at all because it uses 40 bytes to store one date.") @@ -203,6 +220,18 @@ as.data.table.list = function(x, } if (any(vnames==".SD")) stopf("A column may not be called .SD. That has special meaning.") if (check.names) vnames = make.names(vnames, unique=TRUE) + + # Add rownames column when vector names were found + if (!is.null(rownames_)) { + rn_name = if (is.character(keep.rownames)) keep.rownames[1L] else "rn" + if (!is.na(idx <- chmatch(rn_name, vnames)[1L])) { + ans = c(list(ans[[idx]]), ans[-idx]) + vnames = c(vnames[idx], vnames[-idx]) + } else { + ans = c(list(recycle(rownames_, nrow)), ans) + vnames = c(rn_name, vnames) + } + } setattr(ans, "names", vnames) setDT(ans, key=key) # copy ensured above; also, setDT handles naming if (length(origListNames)==length(ans)) setattr(ans, "names", origListNames) # PR 3854 and tests 2058.15-17 diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index fcd0827010..aceeb77f89 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21406,3 +21406,23 @@ dt = data.table(x = 123456, y = "wide_string") test(2329.2, print(dt, col.names = "none"), output = "1: 123456 wide_string\n") dt = data.table(a = NA_integer_, b = NaN) test(2329.3, print(dt, col.names = "none"), output = "1: NA NaN\n") + +# Row name extraction from multiple vectors, #7136 +x <- 1:3 +y <- setNames(4:6, c("A", "B", "C")) +test(2330.1, as.data.table(list(x, y), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=1:3, V2=4:6)) +test(2330.2, as.data.table(list(x, y), keep.rownames="custom"), data.table(custom=c("A", "B", "C"), V1=1:3, V2=4:6)) +test(2330.3, as.data.table(list(y, x), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=4:6, V2=1:3)) + +# Behavior under data.frame() +test(2330.4, as.data.table(data.frame(x, y), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), x=1:3, y=4:6)) +test(2330.5, as.data.table(data.frame(y, x), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), y=4:6, x=1:3)) + +DF <- data.frame(row.names = letters[1:6], V = 1:6) # Test data.frame with explicit rownames +test(2330.6, as.data.table(list(a = 6:1, DF), keep.rownames=TRUE), data.table(rn=letters[1:6], a=6:1, V=1:6)) + +z <- setNames(1:3, rep("", 3)) # vector with all-empty names # behaviour with all-empty row names +test(2330.7, as.data.table(list(z), keep.rownames=TRUE), data.table(rn=rep("", 3), V1=1:3)) + +M <- matrix(1:6, nrow=3, dimnames=list(rep("", 3), c("V1", "V2"))) # test of list(M) for empty-rowname'd matrix input +test(2330.8, as.data.table(list(M), keep.rownames=TRUE), data.table(rn=rep("", 3), V1=1:3, V2=4:6)) diff --git a/man/as.data.table.Rd b/man/as.data.table.Rd index 6c4db54887..fbec798c81 100644 --- a/man/as.data.table.Rd +++ b/man/as.data.table.Rd @@ -31,7 +31,7 @@ is.data.table(x) } \arguments{ \item{x}{An R object.} - \item{keep.rownames}{Default is \code{FALSE}. If \code{TRUE}, adds the input object's names as a separate column named \code{"rn"}. \code{keep.rownames = "id"} names the column \code{"id"} instead.} + \item{keep.rownames}{Default is \code{FALSE}. If \code{TRUE}, adds the input object's names as a separate column named \code{"rn"}. \code{keep.rownames = "id"} names the column \code{"id"} instead. For lists and when calling \code{data.table()}, names from the first named vector are extracted and used as row names, similar to \code{data.frame()} behavior.} \item{key}{ Character vector of one or more column names which is passed to \code{\link{setkeyv}}. } \item{sorted}{logical used in \emph{array} method, default \code{TRUE} is overridden when \code{key} is provided. } \item{value.name}{character scalar used in \emph{array} method, default \code{"value"}.}