Skip to content

Commit bfa049c

Browse files
Row Name Extraction for data.table() with keep.rownames (#7136)
* Added logic to �s.data.table.list() to preserve names from vectors * remove trail whit space * add coverage tests * replace X and Y * result test * tests * corrected test output * use as.data.table in tests * use isFalse * add classed error conditions * ws style * rm redundant condition * missed ws change * invalid_input in place of invalid_type * right place invalid_input * typo and unsortable in place of unsupported * specify join type * merge our loop which checks for vector rowname extraction in below * added logic for handling data.frame * add tests * add tests * merger master * remove duplicate * remove list from as.data.table * added vignettes * rm ws * try to simplify * fix tests * restore * try and handle "inner" row names from matrix case * rm vestigial * fix * simplify tests * remove any(nzchar(nm)) * remove test condition about any(nzchar(nm)) * update test number * update description , news.md and add tests * remove unwanted changes --------- Co-authored-by: Michael Chirico <[email protected]> Co-authored-by: Michael Chirico <[email protected]>
1 parent ed2df98 commit bfa049c

File tree

4 files changed

+52
-1
lines changed

4 files changed

+52
-1
lines changed

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@
4444

4545
9. `isoweek()` is much faster (e.g. 20x) by re-using an implementation from {base}, [#5111](https://github.com/Rdatatable/data.table/issues/5111). Thanks @MichaelChirico for the report and PR.
4646

47+
10. `data.table()` and `as.data.table()` with `keep.rownames=TRUE` now extract row names from named vectors, matching `data.frame()` behavior. Names from the first named vector in the input are used to create the row names column (default name `"rn"` or custom name via `keep.rownames="column_name"`), [#1916](https://github.com/Rdatatable/data.table/issues/1916). Thanks to @richierocks for the feature request and @Mukulyadav2004 for the implementation.
48+
4749
### BUG FIXES
4850

4951
1. Custom binary operators from the `lubridate` package now work with objects of class `IDate` as with a `Date` subclass, [#6839](https://github.com/Rdatatable/data.table/issues/6839). Thanks @emallickhossain for the report and @aitap for the fix.

R/as.data.table.R

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,9 +136,26 @@ as.data.table.list = function(x,
136136
missing.check.names = missing(check.names)
137137
origListNames = if (missing(.named)) names(x) else NULL # as.data.table called directly, not from inside data.table() which provides .named, #3854
138138
empty_atomic = FALSE
139+
140+
# Handle keep.rownames for vectors (mimicking data.frame behavior)
141+
rownames_ = NULL
142+
check_rownames = !isFALSE(keep.rownames)
143+
139144
for (i in seq_len(n)) {
140145
xi = x[[i]]
141146
if (is.null(xi)) next # eachncol already initialized to 0 by integer() above
147+
if (check_rownames && is.null(rownames_)) {
148+
if (is.null(dim(xi))) {
149+
if (!is.null(nm <- names(xi))) {
150+
rownames_ = nm
151+
x[[i]] = unname(xi)
152+
}
153+
} else {
154+
if (!is.null(nm <- rownames(xi))) {
155+
rownames_ = nm
156+
}
157+
}
158+
}
142159
if (!is.null(dim(xi)) && missing.check.names) check.names=TRUE
143160
if ("POSIXlt" %chin% class(xi)) {
144161
warningf("POSIXlt column type detected and converted to POSIXct. We do not recommend use of POSIXlt at all because it uses 40 bytes to store one date.")
@@ -203,6 +220,18 @@ as.data.table.list = function(x,
203220
}
204221
if (any(vnames==".SD")) stopf("A column may not be called .SD. That has special meaning.")
205222
if (check.names) vnames = make.names(vnames, unique=TRUE)
223+
224+
# Add rownames column when vector names were found
225+
if (!is.null(rownames_)) {
226+
rn_name = if (is.character(keep.rownames)) keep.rownames[1L] else "rn"
227+
if (!is.na(idx <- chmatch(rn_name, vnames)[1L])) {
228+
ans = c(list(ans[[idx]]), ans[-idx])
229+
vnames = c(vnames[idx], vnames[-idx])
230+
} else {
231+
ans = c(list(recycle(rownames_, nrow)), ans)
232+
vnames = c(rn_name, vnames)
233+
}
234+
}
206235
setattr(ans, "names", vnames)
207236
setDT(ans, key=key) # copy ensured above; also, setDT handles naming
208237
if (length(origListNames)==length(ans)) setattr(ans, "names", origListNames) # PR 3854 and tests 2058.15-17

inst/tests/tests.Rraw

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21406,3 +21406,23 @@ dt = data.table(x = 123456, y = "wide_string")
2140621406
test(2329.2, print(dt, col.names = "none"), output = "1: 123456 wide_string\n")
2140721407
dt = data.table(a = NA_integer_, b = NaN)
2140821408
test(2329.3, print(dt, col.names = "none"), output = "1: NA NaN\n")
21409+
21410+
# Row name extraction from multiple vectors, #7136
21411+
x <- 1:3
21412+
y <- setNames(4:6, c("A", "B", "C"))
21413+
test(2330.1, as.data.table(list(x, y), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=1:3, V2=4:6))
21414+
test(2330.2, as.data.table(list(x, y), keep.rownames="custom"), data.table(custom=c("A", "B", "C"), V1=1:3, V2=4:6))
21415+
test(2330.3, as.data.table(list(y, x), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=4:6, V2=1:3))
21416+
21417+
# Behavior under data.frame()
21418+
test(2330.4, as.data.table(data.frame(x, y), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), x=1:3, y=4:6))
21419+
test(2330.5, as.data.table(data.frame(y, x), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), y=4:6, x=1:3))
21420+
21421+
DF <- data.frame(row.names = letters[1:6], V = 1:6) # Test data.frame with explicit rownames
21422+
test(2330.6, as.data.table(list(a = 6:1, DF), keep.rownames=TRUE), data.table(rn=letters[1:6], a=6:1, V=1:6))
21423+
21424+
z <- setNames(1:3, rep("", 3)) # vector with all-empty names # behaviour with all-empty row names
21425+
test(2330.7, as.data.table(list(z), keep.rownames=TRUE), data.table(rn=rep("", 3), V1=1:3))
21426+
21427+
M <- matrix(1:6, nrow=3, dimnames=list(rep("", 3), c("V1", "V2"))) # test of list(M) for empty-rowname'd matrix input
21428+
test(2330.8, as.data.table(list(M), keep.rownames=TRUE), data.table(rn=rep("", 3), V1=1:3, V2=4:6))

man/as.data.table.Rd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ is.data.table(x)
3131
}
3232
\arguments{
3333
\item{x}{An R object.}
34-
\item{keep.rownames}{Default is \code{FALSE}. If \code{TRUE}, adds the input object's names as a separate column named \code{"rn"}. \code{keep.rownames = "id"} names the column \code{"id"} instead.}
34+
\item{keep.rownames}{Default is \code{FALSE}. If \code{TRUE}, adds the input object's names as a separate column named \code{"rn"}. \code{keep.rownames = "id"} names the column \code{"id"} instead. For lists and when calling \code{data.table()}, names from the first named vector are extracted and used as row names, similar to \code{data.frame()} behavior.}
3535
\item{key}{ Character vector of one or more column names which is passed to \code{\link{setkeyv}}. }
3636
\item{sorted}{logical used in \emph{array} method, default \code{TRUE} is overridden when \code{key} is provided. }
3737
\item{value.name}{character scalar used in \emph{array} method, default \code{"value"}.}

0 commit comments

Comments
 (0)