Skip to content

Commit ba5773d

Browse files
better match base::order(method=) and decreasing= (#6655)
* better match base::order(method=) and decreasing= * another 0 for consistency * coverage * bad copy-paste
1 parent b150ab5 commit ba5773d

File tree

3 files changed

+27
-7
lines changed

3 files changed

+27
-7
lines changed

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,8 @@ rowwiseDT(
121121

122122
15. Joins of `integer64` and `double` columns succeed when the `double` column has lossless `integer64` representation, [#4167](https://github.com/Rdatatable/data.table/issues/4167) and [#6625](https://github.com/Rdatatable/data.table/issues/6625). Previously, this only worked when the double column had lossless _32-bit_ integer representation. Thanks @MichaelChirico for the reports and fix.
123123

124+
17. `DT[order(...)]` better matches `base::order()` behavior by (1) recognizing the `method=` argument (and erroring since this is not supported) and (2) accepting a vector of `TRUE`/`FALSE` in `decreasing=` as an alternative to using `-a` to convey "sort `a` decreasing", [#4456](https://github.com/Rdatatable/data.table/issues/4456). Thanks @jangorecki for the FR and @MichaelChirico for the PR.
125+
124126
## NOTES
125127

126128
1. There is a new vignette on joins! See `vignette("datatable-joins")`. Thanks to Angel Feliz for authoring it! Feedback welcome. This vignette has been highly requested since 2017: [#2181](https://github.com/Rdatatable/data.table/issues/2181).

R/setkey.R

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -155,17 +155,19 @@ forderv = function(x, by=seq_along(x), retGrp=FALSE, retStats=retGrp, sort=TRUE,
155155
.Call(CforderReuseSorting, x, by, retGrp, retStats, sort, order, na.last, reuseSorting) # returns integer() if already sorted, regardless of sort=TRUE|FALSE
156156
}
157157

158-
forder = function(..., na.last=TRUE, decreasing=FALSE)
158+
forder = function(..., na.last=TRUE, decreasing=FALSE, method="radix")
159159
{
160+
if (method != "radix") stopf("data.table has no support for sorting by method='%s'. Use base::order(), not order(), if you really need this.", method)
161+
stopifnot(is.logical(decreasing), length(decreasing) > 0L, !is.na(decreasing))
160162
sub = substitute(list(...))
161163
tt = vapply_1b(sub, function(x) is.null(x) || (is.symbol(x) && !nzchar(x)))
162164
if (any(tt)) sub[tt] = NULL # remove any NULL or empty arguments; e.g. test 1962.052: forder(DT, NULL) and forder(DT, )
163165
if (length(sub)<2L) return(NULL) # forder() with no arguments returns NULL consistent with base::order
164166
asc = rep.int(1L, length(sub)-1L) # ascending (1) or descending (-1) per column
165167
# the idea here is to intercept - (and unusual --+ deriving from built expressions) before vectors in forder(DT, -colA, colB) so that :
166168
# 1) - on character vector works; ordinarily in R that fails with type error
167-
# 2) each column/expression can have its own +/- more easily that having to use a separate decreasing=TRUE/FALSE
168-
# 3) we can pass the decreasing (-) flag to C and avoid what normally happens in R; i.e. allocate a new vector and apply - to every element first
169+
# 2) each column/expression can have its own +/- more easily than having to use a separate decreasing=TRUE/FALSE
170+
# 3) we can pass the decreasing (-) flag to C and avoid what normally happens in R; i.e. allocate a new vector and negate every element first
169171
# We intercept the unevaluated expressions and massage them before evaluating in with(DT) scope or not depending on the first item.
170172
for (i in seq.int(2L, length(sub))) {
171173
v = sub[[i]]
@@ -188,8 +190,16 @@ forder = function(..., na.last=TRUE, decreasing=FALSE)
188190
} else {
189191
data = eval(sub, parent.frame(), parent.frame())
190192
}
191-
stopifnot(isTRUEorFALSE(decreasing))
192-
o = forderv(data, seq_along(data), retGrp=FALSE, retStats=FALSE, sort=TRUE, order=if (decreasing) -asc else asc, na.last=na.last)
193+
if (length(decreasing) > 1L) {
194+
if (any(asc < 0L)) stopf("Mixing '-' with vector decreasing= is not supported.")
195+
if (length(decreasing) != length(asc)) stopf("decreasing= has length %d applied to sorting %d columns.", length(decreasing), length(asc))
196+
orderArg = fifelse(decreasing, -asc, asc)
197+
} else if (decreasing) {
198+
orderArg = -asc
199+
} else {
200+
orderArg = asc
201+
}
202+
o = forderv(data, seq_along(data), retGrp=FALSE, retStats=FALSE, sort=TRUE, order=orderArg, na.last=na.last)
193203
if (!length(o) && length(data)>=1L) o = seq_along(data[[1L]]) else o
194204
o
195205
}

inst/tests/tests.Rraw

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13607,8 +13607,9 @@ test(1962.0482, forder(L), 3:1)
1360713607
test(1962.0483, forder(), NULL)
1360813608
setDT(DT)
1360913609
test(1962.049, forder(DT[ , 0L]), error = 'Attempting to order a 0-column')
13610-
test(1962.050, forder(DT, decreasing = NA), error = base_messages$stopifnot('isTRUEorFALSE(decreasing)'))
13611-
test(1962.051, forder(DT, decreasing = 1.4), error = base_messages$stopifnot('isTRUEorFALSE(decreasing)'))
13610+
test(1962.0500, forder(DT, decreasing = NA), error = base_messages$stopifnot('!is.na(decreasing)'))
13611+
test(1962.0510, forder(DT, decreasing = 1.4), error = base_messages$stopifnot('is.logical(decreasing)'))
13612+
test(1962.0511, forder(DT, decreasing=logical()), error=base_messages$stopifnot('length(decreasing) > 0L'))
1361213613
test(1962.052, forder(DT, NULL), 3:1)
1361313614
test(1962.053, forder(DT), 3:1)
1361413615
test(1962.054, forder(DT, ), 3:1)
@@ -20702,3 +20703,10 @@ if (test_bit64) {
2070220703
test(2300.3, DT1[DT2, on='id'], error="Incompatible join types")
2070320704
test(2300.4, DT2[DT1, on='id'], error="Incompatible join types")
2070420705
}
20706+
20707+
# interpret more arguments to order() correctly when translating to forder(), #4456
20708+
DT = data.table(a=rep(1:3, 4), b=rep(1:2, 6))
20709+
test(2301.1, DT[order(a, method="auto")], error="no support for sorting by method='auto'")
20710+
test(2301.2, DT[order(a, b, decreasing=c(TRUE, FALSE))], DT[order(-a, b)])
20711+
test(2301.3, DT[order(a, -b, decreasing=c(TRUE, TRUE))], error="Mixing '-' with vector decreasing")
20712+
test(2301.4, DT[order(a, b, decreasing=c(TRUE, TRUE, FALSE))], error="decreasing= has length 3")

0 commit comments

Comments
 (0)