Skip to content

Commit 6641ca0

Browse files
Fix incorrect keying of by= results involving functions of keys (#6708)
* add test * Implement a fix * Another test with multiple keys * NEWS * redundant return * fix numbering
1 parent d782232 commit 6641ca0

File tree

3 files changed

+26
-2
lines changed

3 files changed

+26
-2
lines changed

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,8 @@ rowwiseDT(
131131

132132
18. `as.data.table()` method for `data.frame`s (especially those with extended classes) is more consistent with `as.data.frame()` with respect to rention of attributes, [#5699](https://github.com/Rdatatable/data.table/issues/5699). Thanks @jangorecki for the report and fix.
133133

134+
19. Grouped queries on keyed tables no longer return an incorrectly keyed result if the _ad hoc_ `by=` list has some function call (in particular, a function which happens to return a strictly decreasing function of the keys), e.g. `by=.(a = rev(a))`, [#5583](https://github.com/Rdatatable/data.table/issues/5583). Thanks @AbrJA for the report and @MichaelChirico for the fix.
135+
134136
## NOTES
135137

136138
1. There is a new vignette on joins! See `vignette("datatable-joins")`. Thanks to Angel Feliz for authoring it! Feedback welcome. This vignette has been highly requested since 2017: [#2181](https://github.com/Rdatatable/data.table/issues/2181).

R/data.table.R

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2014,8 +2014,8 @@ replace_dot_alias = function(e) {
20142014
if (verbose) {last.started.at=proc.time();catf("setkey() afterwards for keyby=.EACHI ... ");flush.console()}
20152015
setkeyv(ans,names(ans)[seq_along(byval)])
20162016
if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()}
2017-
} else if (keyby || (haskey(x) && bysameorder && (byjoin || (length(allbyvars) && identical(allbyvars,head(key(x),length(allbyvars))))))) {
2018-
setattr(ans,"sorted",names(ans)[seq_along(grpcols)])
2017+
} else if (.by_result_is_keyable(x, keyby, bysameorder, byjoin, allbyvars, bysub)) {
2018+
setattr(ans, "sorted", names(ans)[seq_along(grpcols)])
20192019
}
20202020
setalloccol(ans) # TODO: overallocate in dogroups in the first place and remove this line
20212021
}
@@ -3051,6 +3051,21 @@ rleidv = function(x, cols=seq_along(x), prefix=NULL) {
30513051
ids
30523052
}
30533053

3054+
.by_result_is_keyable = function(x, keyby, bysameorder, byjoin, byvars, bysub) {
3055+
if (keyby) return(TRUE)
3056+
k = key(x)
3057+
if (is.null(k)) return(FALSE) # haskey(x) but saving 'k' for below
3058+
if (!bysameorder) return(FALSE)
3059+
if (byjoin) return(TRUE)
3060+
if (!length(byvars)) return(FALSE)
3061+
if (!identical(byvars, head(k, length(byvars)))) return(FALSE) # match key exactly, in order
3062+
# For #5583, we also ensure there are no function calls in by (which might break sortedness)
3063+
if (is.name(bysub)) return(TRUE)
3064+
if (identical(bysub[[1L]], quote(list))) bysub = bysub[-1L]
3065+
if (length(all.names(bysub)) > length(byvars)) return(FALSE)
3066+
TRUE
3067+
}
3068+
30543069
.is_withFALSE_range = function(e, x, root=root_name(e), vars=all.vars(e)) {
30553070
if (root != ":") return(FALSE)
30563071
if (!length(vars)) return(TRUE) # e.g. 1:10

inst/tests/tests.Rraw

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20757,3 +20757,10 @@ as.data.frame.tbl = function(x) {
2075720757
x
2075820758
}
2075920759
test(2302, attr(as.data.table(y), "t1"), attr(as.data.frame(y), "t1"))
20760+
20761+
# by=foo(KEY) does not retain key (no way to guarantee monotonic transformation), #5583
20762+
DT = data.table(a=1:2, key='a')
20763+
test(2303.1, DT[, .N, by=.(b=rev(a))], data.table(b=2:1, N=1L))
20764+
test(2303.2, DT[, .(N=1L), by=.(b=rev(a))], data.table(b=2:1, N=1L)) # ensure no interaction with GForce
20765+
DT = data.table(a=2:3, b=1:0, key=c('a', 'b'))
20766+
test(2303.3, DT[, .N, by=.(ab=a^b, d=c(1L, 1L))], data.table(ab=c(2, 1), d=1L, N=1L))

0 commit comments

Comments
 (0)