Skip to content

Commit 5a47ce0

Browse files
committed
changes made
1 parent 6a4fe65 commit 5a47ce0

File tree

1 file changed

+18
-20
lines changed

1 file changed

+18
-20
lines changed

R/fcast.R

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -129,24 +129,20 @@ dcast.data.table = function(data, formula, fun.aggregate = NULL, sep = "_", ...,
129129
stopf("Argument 'value.var.in.dots' should be logical TRUE/FALSE")
130130
if (!isTRUEorFALSE(value.var.in.LHSdots) || !isTRUEorFALSE(value.var.in.RHSdots))
131131
stopf("Arguments 'value.var.in.LHSdots', 'value.var.in.RHSdots' should be logical TRUE/FALSE")
132-
handle_empty_strings = function(names) {
133-
names[names == ""] = "empty_string"
134-
names
135-
}
136-
ensure_unique_names = function(names) {
137-
if (any(duplicated(names))) {
138-
names = make.unique(names, sep = "_")
139-
}
140-
names
141-
}
132+
# #2980 if explicitly providing fun.aggregate=length but not a value.var,
133+
# just use the last column (as guess(data) would do) because length will be
134+
# the same on all columns
142135
if (missing(value.var) && !missing(fun.aggregate) && identical(fun.aggregate, length))
143136
value.var = names(data)[ncol(data)]
144137
lvals = value_vars(value.var, names(data))
145138
valnames = unique(unlist(lvals))
146-
valnames = handle_empty_strings(valnames)
147-
valnames = ensure_unique_names(valnames)
139+
valnames[valnames == ""] = "empty_string"
140+
if (any(duplicated(valnames))) {
141+
valnames = make.unique(valnames, sep = "_")
142+
}
148143
lvars = check_formula(formula, names(data), valnames, value.var.in.LHSdots, value.var.in.RHSdots)
149144
lvars = lapply(lvars, function(x) if (length(x)) x else quote(`.`))
145+
# tired of lapply and the way it handles environments!
150146
allcols = c(unlist(lvars), lapply(valnames, as.name))
151147
dat = vector("list", length(allcols))
152148
for (i in seq_along(allcols)) {
@@ -156,6 +152,7 @@ dcast.data.table = function(data, formula, fun.aggregate = NULL, sep = "_", ...,
156152
stopf("Column [%s] not found or of unknown type.", deparse(x))
157153
}
158154
setattr(lvars, 'names', c("lhs", "rhs"))
155+
# Have to take care of duplicate names, and provide names for expression columns properly.
159156
varnames = make.unique(vapply_1c(unlist(lvars), all.vars, max.names=1L), sep=sep)
160157
dupidx = which(valnames %chin% varnames)
161158
if (length(dupidx)) {
@@ -170,6 +167,7 @@ dcast.data.table = function(data, formula, fun.aggregate = NULL, sep = "_", ...,
170167
stopf("Columns specified in formula can not be of type list")
171168
}
172169
setDT(dat)
170+
173171
m = as.list(match.call()[-1L])
174172
subset = m[["subset"]][[2L]]
175173
if (!is.null(subset)) {
@@ -192,7 +190,7 @@ dcast.data.table = function(data, formula, fun.aggregate = NULL, sep = "_", ...,
192190
maybe_err = function(list.of.columns) {
193191
if (!all(lengths(list.of.columns) == 1L)) {
194192
msg <- gettext("Aggregating functions should take a vector as input and return a single value (length=1), but they do not, so the result is undefined. Please fix by modifying your function so that a single value is always returned.")
195-
if (is.null(fill)) {
193+
if (is.null(fill)) { # TODO change to always stopf #6329
196194
stop(msg, domain=NA, call. = FALSE)
197195
} else {
198196
warning(msg, domain=NA, call. = FALSE)
@@ -206,7 +204,7 @@ dcast.data.table = function(data, formula, fun.aggregate = NULL, sep = "_", ...,
206204
o = forderv(x, retGrp=TRUE, sort=TRUE)
207205
idx = attr(o, 'starts', exact=TRUE)
208206
if (!length(o)) o = seq_along(x)
209-
o[idx]
207+
o[idx] # subsetVector retains attributes, using R's subset for now
210208
}
211209
cj_uniq = function(DT) {
212210
do.call(CJ, lapply(DT, function(x)
@@ -217,10 +215,12 @@ dcast.data.table = function(data, formula, fun.aggregate = NULL, sep = "_", ...,
217215
} else .Call(CsubsetVector, x, order_(x))
218216
))}
219217
valnames = setdiff(names(dat), varnames)
218+
# 'dat' != 'data'? then setkey to speed things up (slightly), else ad-hoc (for now). Still very fast!
220219
if (!is.null(fun.call) || !is.null(subset))
221220
setkeyv(dat, varnames)
222221
if (length(rhsnames)) {
223222
lhs = shallow(dat, lhsnames); rhs = shallow(dat, rhsnames); val = shallow(dat, valnames)
223+
# handle drop=TRUE/FALSE - Update: Logic moved to R, AND faster than previous version. Take that... old me :-).
224224
if (all(drop)) {
225225
map = setDT(lapply(list(lhsnames, rhsnames), function(cols) frankv(dat, cols=cols, ties.method="dense", na.last=FALSE)))
226226
maporder = lapply(map, order_)
@@ -240,7 +240,7 @@ dcast.data.table = function(data, formula, fun.aggregate = NULL, sep = "_", ...,
240240
lhs = lhs_; rhs = rhs_
241241
}
242242
maplen = lengths(mapunique)
243-
idx = do.call(CJ, mapunique)[map, 'I' := .I][["I"]]
243+
idx = do.call(CJ, mapunique)[map, 'I' := .I][["I"]] # TO DO: move this to C and avoid materialising the Cross Join.
244244
some_fill = anyNA(idx)
245245
fill.default = if (run_agg_funs && is.null(fill) && some_fill) dat_for_default_fill[, maybe_err(eval(fun.call))]
246246
if (run_agg_funs && is.null(fill) && some_fill) {
@@ -251,11 +251,9 @@ dcast.data.table = function(data, formula, fun.aggregate = NULL, sep = "_", ...,
251251
if (length(valnames) > 1L)
252252
allcols = do.call(paste, if (identical(".", allcols)) list(valnames, sep=sep)
253253
else c(CJ(valnames, allcols, sorted=FALSE), sep=sep))
254-
if (length(lhsnames) + length(allcols) != length(ans)) {
255-
stopf("Length mismatch: 'names' attribute [%d] must match the vector length [%d].", length(lhsnames) + length(allcols), length(ans))
256-
}
254+
# removed 'setcolorder()' here, #1153
257255
setattr(ans, 'names', c(lhsnames, allcols))
258256
setDT(ans); setattr(ans, 'sorted', lhsnames)
259-
} else internal_error("empty rhsnames")
257+
} else internal_error("empty rhsnames") # nocov
260258
return(ans)
261-
}
259+
}

0 commit comments

Comments
 (0)