Skip to content

Commit a5d68f3

Browse files
committed
added a warning
1 parent c27ec26 commit a5d68f3

File tree

4 files changed

+36
-2
lines changed

4 files changed

+36
-2
lines changed

NEWS.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,21 @@
107107
108108
18. `fwrite` now allows `dec` to be the same as `sep` for edge cases where only one will be written, e.g. 0-row or 1-column tables. [#7227](https://github.com/Rdatatable/data.table/issues/7227). Thanks @MichaelChirico for the report and @venom1204 for the fix.
109109
110+
22. Using `by=` or `keyby=` with a simple numeric or character vector in `j` (e.g. `DT[, 1:2, by=grp]`) used to silently ignore the grouping argument. This now issues a warning to alert the user that grouping is not applied in this syntax and guides them to use the `.SD` idiom instead. [#5397](https://github.com/Rdatatable/data.table/issues/5397). Thanks to @mcol for the report and @venom1204 for the fix.
111+
112+
```r
113+
DT = data.table(a=1:4, grp=c(1,1,2,2))
114+
DT[, 1, by = grp]
115+
# a
116+
# <int>
117+
# 1: 1
118+
# 2: 2
119+
# 3: 3
120+
# 4: 4
121+
# Warning message:
122+
# `by` or `keyby` is ignored when `j` is a numeric vector...
123+
```
124+
110125
### NOTES
111126
112127
1. The following in-progress deprecations have proceeded:

R/data.table.R

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -739,6 +739,12 @@ replace_dot_alias = function(e) {
739739
if (!length(j) && !notj) return( null.data.table() )
740740
if (is.factor(j)) j = as.character(j) # fix for FR: #358
741741
if (is.character(j)) {
742+
if (!missingby && (missing(with) || isTRUE(with))) {
743+
warning(
744+
"`by` or `keyby` is ignored when `j` is a character vector used for column selection. ",
745+
"Perhaps you intended to use `.SD`? For example: DT[, .SD[, ", deparse(jsub), "], by = ...]"
746+
)
747+
}
742748
if (notj) {
743749
if (anyNA(idx <- chmatch(j, names_x)))
744750
warningf(ngettext(sum(is.na(idx)), "column not removed because not found: %s", "columns not removed because not found: %s"),
@@ -762,6 +768,10 @@ replace_dot_alias = function(e) {
762768
# else the NA in ansvals are for join inherited scope (test 1973), and NA could be in irows from join and data in i should be returned (test 1977)
763769
# in both cases leave to the R-level subsetting of i and x together further below
764770
} else if (is.numeric(j)) {
771+
if (!missingby) {
772+
warning(
773+
"`by` or `keyby` is ignored when `j` is a numeric vector used for column selection. ", "Perhaps you intended to use `.SD`? For example: DT[, .SD[, ", deparse(jsub), "], by = ...]")
774+
}
765775
j = as.integer(j)
766776
if (any(w <- (j>ncol(x)))) stopf("Item %d of j is %d which is outside the column number range [1,ncol=%d]", idx <- which.first(w), j[idx], ncol(x))
767777
j = j[j!=0L]

inst/tests/tests.Rraw

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21620,3 +21620,12 @@ local({
2162021620
test(2338.9, {fwrite(dd, f, forceDecimal=FALSE); fread(f)}, di)
2162121621
})
2162221622

21623+
# 5397 - keyby/key ignored if numeric indices used in j
21624+
DT = data.table(a=1:4, b=5:8, g=c(1,1,2,2))
21625+
test(2339.1, DT[, 1:2, by=g], DT[, 1:2], warning="`by` or `keyby` is ignored")
21626+
test(2339.2, DT[, 2:1, keyby=g], DT[, 2:1], warning="`by` or `keyby` is ignored")
21627+
test(2339.3, DT[, c("b", "a"), by=g, with=FALSE], DT[, c("b", "a")])
21628+
expected_sd = data.table(g=c(1,1,2,2), a=1:4, b=5:8)
21629+
test(2339.4, DT[, .SD[, 1:2], by=g], expected_sd)
21630+
expected_single_int = data.table(g=c(1,2), V1=c(1,1))
21631+
test(2339.5, DT[, 1, by=g], expected_single_int)

man/data.table.Rd

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,8 +97,8 @@ data.table(\dots, keep.rownames=FALSE, check.names=FALSE, key=NULL, stringsAsFac
9797
9898
See \href{../doc/datatable-intro.html}{\code{vignette("datatable-intro")}} and \code{example(data.table)}.}
9999
100-
\item{by}{ Column names are seen as if they are variables (as in \code{j} when \code{with=TRUE}). The \code{data.table} is then grouped by the \code{by} and \code{j} is evaluated within each group. The order of the rows within each group is preserved, as is the order of the groups. \code{by} accepts:
101-
100+
\item{by}{ Column names are seen as if they are variables (as in \code{j} when \code{with=TRUE}). \emph{Note that `by` and `keyby` are ignored when `j` is a character or numeric vector used for selecting columns (i.e., when the internal `with=FALSE` is triggered).} The \code{data.table} is then grouped by the \code{by} and \code{j} is evaluated within each group. The order of the rows within each group is preserved, as is the order of the groups. \code{by} accepts:
101+
102102
\itemize{
103103
\item A single unquoted column name: e.g., \code{DT[, .(sa=sum(a)), by=x]}
104104

0 commit comments

Comments
 (0)