Skip to content

Commit 0909048

Browse files
Ensure Consistent Key Handling in as.data.table S3 Methods (#6865)
* fixing key setting order * fix key setting order * document argument keep.rownames and key * add tests and news.md * add rownames and key in .Rd * Extract keep.rownames and key from ... * eliminate %>% operator * update test 2309.2 * update tests * set key * update key handling * update test * corrected last test * correct test case * correct 2309.04 * update fix * update changes * Update R/as.data.table.R * Update R/as.data.table.R * update tests * correct signature of as.data.table.data.table --------- Co-authored-by: Benjamin Schwendinger <[email protected]>
1 parent 04b9bdf commit 0909048

File tree

4 files changed

+29
-6
lines changed

4 files changed

+29
-6
lines changed

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414

1515
3. `fread(keepLeadingZeros=TRUE)` now correctly parses dates with components with leading zeros as dates instead of strings, [#6851](https://github.com/Rdatatable/data.table/issues/6851). Thanks @TurnaevEvgeny for the report and @ben-schwen for the fix.
1616

17+
4. `as.data.table()` now properly handles keys: specifying keys sets them, omitting keys preserves existing ones, and setting `key=NULL` clears them, [#6859](https://github.com/Rdatatable/data.table/issues/6859). Thanks @brookslogan for the report and @Mukulyadav2004 for the fix.
18+
1719
## NOTES
1820

1921
1. Continued work to remove non-API C functions, [#6180](https://github.com/Rdatatable/data.table/issues/6180). Thanks Ivan Krylov for the PRs and for writing a clear and concise guide about the R API: https://aitap.codeberg.page/R-api/.

R/as.data.table.R

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -214,8 +214,8 @@ as.data.table.list = function(x,
214214
}
215215

216216
as.data.table.data.frame = function(x, keep.rownames=FALSE, key=NULL, ...) {
217-
if (is.data.table(x)) return(as.data.table.data.table(x)) # S3 is weird, #6739. Also # nocov; this is tested in 2302.{2,3}, not sure why it doesn't show up in coverage.
218-
if (!identical(class(x), "data.frame")) return(as.data.table(as.data.frame(x)))
217+
if (is.data.table(x)) return(as.data.table.data.table(x, key=key)) # S3 is weird, #6739. Also # nocov; this is tested in 2302.{2,3}, not sure why it doesn't show up in coverage.
218+
if (!identical(class(x), "data.frame")) return(as.data.table(as.data.frame(x), keep.rownames=keep.rownames, key=key, ...))
219219
if (!isFALSE(keep.rownames)) {
220220
# can specify col name to keep.rownames, #575; if it's the same as key,
221221
# kludge it to 'rn' since we only apply the new name afterwards, #4468
@@ -228,7 +228,7 @@ as.data.table.data.frame = function(x, keep.rownames=FALSE, key=NULL, ...) {
228228
if (any(cols_with_dims(x))) {
229229
# a data.frame with a column that is data.frame needs to be expanded; test 2013.4
230230
# x may be a class with [[ method that behaves differently, so as.list first for default [[, #4526
231-
return(as.data.table.list(as.list(x), keep.rownames=keep.rownames, ...))
231+
return(as.data.table.list(as.list(x), keep.rownames=keep.rownames, key = key,...))
232232
}
233233
ans = copy(x) # TO DO: change this deep copy to be shallow.
234234
setattr(ans, "row.names", .set_row_names(nrow(x)))
@@ -245,13 +245,14 @@ as.data.table.data.frame = function(x, keep.rownames=FALSE, key=NULL, ...) {
245245
ans
246246
}
247247

248-
as.data.table.data.table = function(x, ...) {
248+
as.data.table.data.table = function(x, ..., key=NULL) {
249249
# as.data.table always returns a copy, automatically takes care of #473
250250
if (any(cols_with_dims(x))) { # for test 2089.2
251-
return(as.data.table.list(x, ...))
251+
return(as.data.table.list(x, key = key, ...))
252252
}
253253
x = copy(x) # #1681
254254
# fix for #1078 and #1128, see .resetclass() for explanation.
255255
setattr(x, 'class', .resetclass(x, "data.table"))
256+
if (!missing(key)) setkeyv(x, key)
256257
x
257258
}

inst/tests/tests.Rraw

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21084,3 +21084,23 @@ test(2307, { capture.output(print(DT, class = TRUE, show.indices = TRUE)); TRUE
2108421084
dt = data.table(date=as.IDate(c(NA, "2014-12-05")))
2108521085
test(2308.01, fread("date\nNA\n2014-12-05", keepLeadingZeros=TRUE), dt)
2108621086
test(2308.02, fread("date\nNA\n2014-12-05", keepLeadingZeros=FALSE), dt)
21087+
21088+
# Test that as.data.table.data.table preserves key when explicitly specified but not when omitted
21089+
DF = data.frame(t = c(3:1, 4:5), y = 1:5)
21090+
# data.frame to data.table with key
21091+
test(2309.01, key(as.data.table(DF, key="t")), "t")
21092+
# tibble to data.table with key
21093+
class(DF) = c("tbl_df", "tbl", "data.frame")
21094+
test(2309.02, key(as.data.table(DF, key="t")), "t")
21095+
21096+
# data.table keyed with "b"
21097+
DT = data.table(a = 1:5, b = 1:5, x = 1:5, key = "b")
21098+
test(2309.03, key(as.data.table(DT, key="a")), "a")
21099+
test(2309.04, key(as.data.table(DT)), "b")
21100+
test(2309.05, key(as.data.table(DT, key=NULL)), NULL)
21101+
21102+
# non-keyed data.table
21103+
DT = data.table(a = 1:5, b = 1:5, x = 1:5)
21104+
test(2309.06, key(as.data.table(DT, key="a")), "a")
21105+
test(2309.07, key(as.data.table(DT)), NULL)
21106+
test(2309.08, key(as.data.table(DT, key=NULL)), NULL)

man/as.data.table.Rd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ Functions to check if an object is \code{data.table}, or coerce it if possible.
2121
\usage{
2222
as.data.table(x, keep.rownames=FALSE, \dots)
2323

24-
\method{as.data.table}{data.table}(x, \dots)
24+
\method{as.data.table}{data.table}(x, \dots, key=NULL)
2525

2626
\method{as.data.table}{array}(x, keep.rownames=FALSE, key=NULL, sorted=TRUE,
2727
value.name="value", na.rm=TRUE, \dots)

0 commit comments

Comments
 (0)