Skip to content

Commit 6c15481

Browse files
committed
groupingsets(): introduce and make use of 'enclos'
When forwarding NSE arguments via jj=, also forward the environment containing the symbols they may be referencing. Now groupingsets() substitutes all the local arguments into the call and then evaluates x[, jj, ...] in the specified environment, making it possible for jj to refer to local symbols without confusing them to variables belonging to groupingsets() or its caller. To avoid cedta() problems, set .datatable.aware = TRUE in the environment where the call is evaluated. (A column named .datatable.aware would be shadowed. So it goes.)
1 parent bbdf158 commit 6c15481

File tree

3 files changed

+38
-6
lines changed

3 files changed

+38
-6
lines changed

R/groupingsets.R

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ rollup.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) {
1313
sets = lapply(length(by):0L, function(i) by[0L:i])
1414
# redirect to workhorse function
1515
jj = substitute(j)
16-
groupingsets.data.table(x, by=by, sets=sets, .SDcols=.SDcols, id=id, jj=jj, label=label)
16+
groupingsets.data.table(x, by=by, sets=sets, .SDcols=.SDcols, id=id, jj=jj, label=label, enclos = parent.frame())
1717
}
1818

1919
cube = function(x, ...) {
@@ -35,13 +35,13 @@ cube.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) {
3535
sets = lapply((2L^n):1L, function(jj) by[keepBool[jj, ]])
3636
# redirect to workhorse function
3737
jj = substitute(j)
38-
groupingsets.data.table(x, by=by, sets=sets, .SDcols=.SDcols, id=id, jj=jj, label=label)
38+
groupingsets.data.table(x, by=by, sets=sets, .SDcols=.SDcols, id=id, jj=jj, label=label, enclos = parent.frame())
3939
}
4040

4141
groupingsets = function(x, ...) {
4242
UseMethod("groupingsets")
4343
}
44-
groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, label = NULL, ...) {
44+
groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, label = NULL, enclos = parent.frame(), ...) {
4545
# input data type basic validation
4646
if (!is.data.table(x))
4747
stopf("Argument 'x' must be a data.table object")
@@ -112,7 +112,10 @@ groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, labe
112112
.SDcols = if (".SD" %chin% av) setdiff(names(x), by) else NULL
113113
if (length(names(by))) by = unname(by)
114114
# 0 rows template data.table to keep colorder and type
115-
empty = if (length(.SDcols)) x[0L, eval(jj), by, .SDcols=.SDcols] else x[0L, eval(jj), by]
115+
# inline all arguments that might clash with enclosing environment
116+
pcall = substitute(x[0L, jj, by], list(x = x, jj = jj, by = by))
117+
if (length(.SDcols)) pcall$.SDcols = .SDcols
118+
empty = eval(pcall, list(.datatable.aware = TRUE), enclos)
116119
if (id && "grouping" %chin% names(empty)) # `j` could have been evaluated to `grouping` field
117120
stopf("When using `id=TRUE` the 'j' expression must not evaluate to a column named 'grouping'.")
118121
if (anyDuplicated(names(empty)) > 0L)
@@ -150,8 +153,12 @@ groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, labe
150153
stopf("Using integer64 class columns require to have 'bit64' package installed.") # nocov
151154
int64.by.cols = intersect(int64.cols, by)
152155
# aggregate function called for each grouping set
156+
# inline all arguments that might clash with enclosing environment
157+
pcall = substitute(x[, jj], list(x = x, jj = jj))
158+
if (length(.SDcols)) pcall$.SDcols = .SDcols
153159
aggregate.set = function(by.set) {
154-
r = if (length(.SDcols)) x[, eval(jj), by.set, .SDcols=.SDcols] else x[, eval(jj), by.set]
160+
pcall$by = by.set
161+
r = eval(pcall, list(.datatable.aware = TRUE), enclos)
155162
if (id) {
156163
# integer bit mask of aggregation levels: http://www.postgresql.org/docs/9.5/static/functions-aggregate.html#FUNCTIONS-GROUPING-TABLE
157164
# 3267: strtoi("", base = 2L) output apparently unstable across platforms

inst/tests/tests.Rraw

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21129,3 +21129,27 @@ test(2311.2, nlevels(DT$V1), 2L) # used to be 3
2112921129
# avoid translateChar*() in OpenMP threads, #6883
2113021130
DF = list(rep(iconv("\uf8", from = "UTF-8", to = "latin1"), 2e6))
2113121131
test(2312, fwrite(DF, nullfile(), encoding = "UTF-8", nThread = 2L), NULL)
21132+
21133+
# 'sets' is a local variable in groupingsets(), cube(), rollup() and shouldn't leak into the 'j' expression
21134+
n = 24L
21135+
set.seed(25)
21136+
DT = data.table(
21137+
color = sample(c("green","yellow","red"), n, TRUE),
21138+
year = as.Date(sample(paste0(2011:2015,"-01-01"), n, TRUE)),
21139+
status = as.factor(sample(c("removed","active","inactive","archived"), n, TRUE)),
21140+
amount = sample(1:5, n, TRUE),
21141+
value = sample(c(3, 3.5, 2.5, 2), n, TRUE)
21142+
)
21143+
sets = 0
21144+
test(2313.0,
21145+
groupingsets(DT, j = c(list(count=.N + ..sets)), by = c("color","year","status"), sets = list("color", c("year","status"), character()), id=TRUE),
21146+
groupingsets(DT, j = c(list(count=.N + 0)), by = c("color","year","status"), sets = list("color", c("year","status"), character()), id=TRUE)
21147+
)
21148+
test(2313.1,
21149+
cube(DT, j = sum(value) + ..sets, by = c("color","year","status"), id=TRUE),
21150+
cube(DT, j = sum(value), by = c("color","year","status"), id=TRUE)
21151+
)
21152+
test(2313.2,
21153+
rollup(DT, j = sum(value) + ..sets, by=c("color","year","status"), label="total"),
21154+
rollup(DT, j = sum(value), by=c("color","year","status"), label="total")
21155+
)

man/groupingsets.Rd

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ rollup(x, \dots)
1515
cube(x, \dots)
1616
\method{cube}{data.table}(x, j, by, .SDcols, id = FALSE, label = NULL, \dots)
1717
groupingsets(x, \dots)
18-
\method{groupingsets}{data.table}(x, j, by, sets, .SDcols, id = FALSE, jj, label = NULL, \dots)
18+
\method{groupingsets}{data.table}(x, j, by, sets, .SDcols, id = FALSE, jj, label = NULL, enclos = parent.frame(), \dots)
1919
}
2020
\arguments{
2121
\item{x}{\code{data.table}.}
@@ -27,6 +27,7 @@ groupingsets(x, \dots)
2727
\item{id}{logical default \code{FALSE}. If \code{TRUE} it will add leading column with bit mask of grouping sets.}
2828
\item{jj}{quoted version of \code{j} argument, for convenience. When provided function will ignore \code{j} argument.}
2929
\item{label}{label(s) to be used in the 'total' rows in the grouping variable columns of the output, that is, in rows where the grouping variable has been aggregated. Can be a named list of scalars, or a scalar, or \code{NULL}. Defaults to \code{NULL}, which results in the grouping variables having \code{NA} in their 'total' rows. See Details.}
30+
\item{enclos}{the environment containing the symbols referenced by \code{jj}. When writing functions that accept a \code{j} environment for non-standard evaluation by \pkg{data.table}, \code{\link[base]{substitute}()} it and forward it to \code{groupingsets} using the \code{jj} argument, set this to the \code{\link[base]{parent.frame}()} of the function that captures \code{j}.}
3031
}
3132
\details{
3233
All three functions \code{rollup, cube, groupingsets} are generic methods, \code{data.table} methods are provided.

0 commit comments

Comments
 (0)