Skip to content

Commit e849fe6

Browse files
authored
Merge branch 'master' into issue6556
2 parents 186cbd5 + 6641ca0 commit e849fe6

File tree

4 files changed

+63
-3
lines changed

4 files changed

+63
-3
lines changed

NEWS.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,10 @@ rowwiseDT(
129129

130130
17. Assignment with `:=` to an S4 slot of an under-allocated data.table now works, [#6704](https://github.com/Rdatatable/data.table/issues/6704). Thanks @MichaelChirico for the report and fix.
131131

132+
18. `as.data.table()` method for `data.frame`s (especially those with extended classes) is more consistent with `as.data.frame()` with respect to rention of attributes, [#5699](https://github.com/Rdatatable/data.table/issues/5699). Thanks @jangorecki for the report and fix.
133+
134+
19. Grouped queries on keyed tables no longer return an incorrectly keyed result if the _ad hoc_ `by=` list has some function call (in particular, a function which happens to return a strictly decreasing function of the keys), e.g. `by=.(a = rev(a))`, [#5583](https://github.com/Rdatatable/data.table/issues/5583). Thanks @AbrJA for the report and @MichaelChirico for the fix.
135+
132136
## NOTES
133137

134138
1. There is a new vignette on joins! See `vignette("datatable-joins")`. Thanks to Angel Feliz for authoring it! Feedback welcome. This vignette has been highly requested since 2017: [#2181](https://github.com/Rdatatable/data.table/issues/2181).

R/as.data.table.R

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,7 @@ as.data.table.list = function(x,
214214
}
215215

216216
as.data.table.data.frame = function(x, keep.rownames=FALSE, key=NULL, ...) {
217+
if (!identical(class(x), "data.frame")) return(as.data.table(as.data.frame(x)))
217218
if (!isFALSE(keep.rownames)) {
218219
# can specify col name to keep.rownames, #575; if it's the same as key,
219220
# kludge it to 'rn' since we only apply the new name afterwards, #4468

R/data.table.R

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2014,8 +2014,8 @@ replace_dot_alias = function(e) {
20142014
if (verbose) {last.started.at=proc.time();catf("setkey() afterwards for keyby=.EACHI ... ");flush.console()}
20152015
setkeyv(ans,names(ans)[seq_along(byval)])
20162016
if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()}
2017-
} else if (keyby || (haskey(x) && bysameorder && (byjoin || (length(allbyvars) && identical(allbyvars,head(key(x),length(allbyvars))))))) {
2018-
setattr(ans,"sorted",names(ans)[seq_along(grpcols)])
2017+
} else if (.by_result_is_keyable(x, keyby, bysameorder, byjoin, allbyvars, bysub)) {
2018+
setattr(ans, "sorted", names(ans)[seq_along(grpcols)])
20192019
}
20202020
setalloccol(ans) # TODO: overallocate in dogroups in the first place and remove this line
20212021
}
@@ -2987,6 +2987,12 @@ setDT = function(x, keep.rownames=FALSE, key=NULL, check.names=FALSE) {
29872987
} else if (isS4(k)) {
29882988
.Call(CsetS4elt, k, as.character(name[[3L]]), x)
29892989
}
2990+
} else if (name %iscall% "get") { # #6725
2991+
# edit 'get(nm, env)' call to be 'assign(nm, x, envir=env)'
2992+
name = match.call(get, name)
2993+
name[[1L]] = quote(assign)
2994+
name$value = x
2995+
eval(name, parent.frame(), parent.frame())
29902996
}
29912997
.Call(CexpandAltRep, x) # issue#2866 and PR#2882
29922998
invisible(x)
@@ -3045,6 +3051,21 @@ rleidv = function(x, cols=seq_along(x), prefix=NULL) {
30453051
ids
30463052
}
30473053

3054+
.by_result_is_keyable = function(x, keyby, bysameorder, byjoin, byvars, bysub) {
3055+
if (keyby) return(TRUE)
3056+
k = key(x)
3057+
if (is.null(k)) return(FALSE) # haskey(x) but saving 'k' for below
3058+
if (!bysameorder) return(FALSE)
3059+
if (byjoin) return(TRUE)
3060+
if (!length(byvars)) return(FALSE)
3061+
if (!identical(byvars, head(k, length(byvars)))) return(FALSE) # match key exactly, in order
3062+
# For #5583, we also ensure there are no function calls in by (which might break sortedness)
3063+
if (is.name(bysub)) return(TRUE)
3064+
if (identical(bysub[[1L]], quote(list))) bysub = bysub[-1L]
3065+
if (length(all.names(bysub)) > length(byvars)) return(FALSE)
3066+
TRUE
3067+
}
3068+
30483069
.is_withFALSE_range = function(e, x, root=root_name(e), vars=all.vars(e)) {
30493070
if (root != ":") return(FALSE)
30503071
if (!length(vars)) return(TRUE) # e.g. 1:10

inst/tests/tests.Rraw

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10581,7 +10581,11 @@ test(1702.1, isoweek(test_cases), test_values)
1058110581
# but calculating from Date brings these into play, #2407
1058210582
test(1702.2, isoweek(as.Date(test_cases)), test_values)
1058310583

10584-
# *** OBSCURE ERROR WHEN Sys.timezone() = 'America/Argentina/Buenos_Aires' ***
10584+
# *** OBSCURE DST ERROR WHEN Sys.timezone() = 'America/Argentina/Buenos_Aires' ***
10585+
# *** 00:00 does not exist, h/t rikivillalba@ who worked through this ***
10586+
# *** https://techcommunity.microsoft.com/blog/dstblog/argentina-is-changing-their-daylight-saving-time-on-december-30/311020 ***
10587+
# *** https://mm.icann.org/pipermail/tz/2007-December/014743.html ***
10588+
# *** Official IANA sources: https://www.iana.org/time-zones ***
1058510589
test(1702.3, isoweek(as.POSIXct(test_cases)), test_values)
1058610590

1058710591
# 1% sample of a 400-year cycle of dates for extra robustness
@@ -20659,6 +20663,18 @@ setDT(d2)
2065920663
test(2295.1, !is.data.table(d1))
2066020664
test(2295.2, rownames(d1), 'b')
2066120665
test(2295.3, is.data.table(d2))
20666+
# Ensure against regression noted in #6725
20667+
x = data.frame(a=1)
20668+
e = environment()
20669+
foo = function(nm, env) {
20670+
setDT(get(nm, envir=env))
20671+
}
20672+
foo('x', e)
20673+
test(2295.4, is.data.table(x))
20674+
e = new.env(parent=topenv())
20675+
e$x = data.frame(a=1)
20676+
foo('x', e)
20677+
test(2295.5, is.data.table(e$x))
2066220678

2066320679
# #6588: .checkTypos used to give arbitrary strings to stopf as the first argument
2066420680
test(2296, d2[x %no such operator% 1], error = '%no such operator%')
@@ -20736,3 +20752,21 @@ test(2301.1, DT[order(a, method="auto")], error="no support for sorting by metho
2073620752
test(2301.2, DT[order(a, b, decreasing=c(TRUE, FALSE))], DT[order(-a, b)])
2073720753
test(2301.3, DT[order(a, -b, decreasing=c(TRUE, TRUE))], error="Mixing '-' with vector decreasing")
2073820754
test(2301.4, DT[order(a, b, decreasing=c(TRUE, TRUE, FALSE))], error="decreasing= has length 3")
20755+
20756+
# as.data.table should remove extra attributes from extended data.frames #5699
20757+
x = data.frame(a=c(1,5,3), b=c(2,4,6))
20758+
class(x) = c("tbl", "data.frame")
20759+
attr(x, "t1") = "a"
20760+
as.data.frame.tbl = function(x) {
20761+
attr(x, "t1") = NULL
20762+
class(x) = "data.frame"
20763+
x
20764+
}
20765+
test(2302, attr(as.data.table(y), "t1"), attr(as.data.frame(y), "t1"))
20766+
20767+
# by=foo(KEY) does not retain key (no way to guarantee monotonic transformation), #5583
20768+
DT = data.table(a=1:2, key='a')
20769+
test(2303.1, DT[, .N, by=.(b=rev(a))], data.table(b=2:1, N=1L))
20770+
test(2303.2, DT[, .(N=1L), by=.(b=rev(a))], data.table(b=2:1, N=1L)) # ensure no interaction with GForce
20771+
DT = data.table(a=2:3, b=1:0, key=c('a', 'b'))
20772+
test(2303.3, DT[, .N, by=.(ab=a^b, d=c(1L, 1L))], data.table(ab=c(2, 1), d=1L, N=1L))

0 commit comments

Comments
 (0)