@@ -129,24 +129,20 @@ dcast.data.table = function(data, formula, fun.aggregate = NULL, sep = "_", ...,
129129 stopf(" Argument 'value.var.in.dots' should be logical TRUE/FALSE" )
130130 if (! isTRUEorFALSE(value.var.in.LHSdots ) || ! isTRUEorFALSE(value.var.in.RHSdots ))
131131 stopf(" Arguments 'value.var.in.LHSdots', 'value.var.in.RHSdots' should be logical TRUE/FALSE" )
132- handle_empty_strings = function (names ) {
133- names [names == " " ] = " empty_string"
134- names
135- }
136- ensure_unique_names = function (names ) {
137- if (any(duplicated(names ))) {
138- names = make.unique(names , sep = " _" )
139- }
140- names
141- }
132+ # #2980 if explicitly providing fun.aggregate=length but not a value.var,
133+ # just use the last column (as guess(data) would do) because length will be
134+ # the same on all columns
142135 if (missing(value.var ) && ! missing(fun.aggregate ) && identical(fun.aggregate , length ))
143136 value.var = names(data )[ncol(data )]
144137 lvals = value_vars(value.var , names(data ))
145138 valnames = unique(unlist(lvals ))
146- valnames = handle_empty_strings(valnames )
147- valnames = ensure_unique_names(valnames )
139+ valnames [valnames == " " ] = " empty_string"
140+ if (any(duplicated(valnames ))) {
141+ valnames = make.unique(valnames , sep = " _" )
142+ }
148143 lvars = check_formula(formula , names(data ), valnames , value.var.in.LHSdots , value.var.in.RHSdots )
149144 lvars = lapply(lvars , function (x ) if (length(x )) x else quote(`.` ))
145+ # tired of lapply and the way it handles environments!
150146 allcols = c(unlist(lvars ), lapply(valnames , as.name ))
151147 dat = vector(" list" , length(allcols ))
152148 for (i in seq_along(allcols )) {
@@ -156,6 +152,7 @@ dcast.data.table = function(data, formula, fun.aggregate = NULL, sep = "_", ...,
156152 stopf(" Column [%s] not found or of unknown type." , deparse(x ))
157153 }
158154 setattr(lvars , ' names' , c(" lhs" , " rhs" ))
155+ # Have to take care of duplicate names, and provide names for expression columns properly.
159156 varnames = make.unique(vapply_1c(unlist(lvars ), all.vars , max.names = 1L ), sep = sep )
160157 dupidx = which(valnames %chin % varnames )
161158 if (length(dupidx )) {
@@ -170,6 +167,7 @@ dcast.data.table = function(data, formula, fun.aggregate = NULL, sep = "_", ...,
170167 stopf(" Columns specified in formula can not be of type list" )
171168 }
172169 setDT(dat )
170+
173171 m = as.list(match.call()[- 1L ])
174172 subset = m [[" subset" ]][[2L ]]
175173 if (! is.null(subset )) {
@@ -192,7 +190,7 @@ dcast.data.table = function(data, formula, fun.aggregate = NULL, sep = "_", ...,
192190 maybe_err = function (list.of.columns ) {
193191 if (! all(lengths(list.of.columns ) == 1L )) {
194192 msg <- gettext(" Aggregating functions should take a vector as input and return a single value (length=1), but they do not, so the result is undefined. Please fix by modifying your function so that a single value is always returned." )
195- if (is.null(fill )) {
193+ if (is.null(fill )) { # TODO change to always stopf #6329
196194 stop(msg , domain = NA , call. = FALSE )
197195 } else {
198196 warning(msg , domain = NA , call. = FALSE )
@@ -206,7 +204,7 @@ dcast.data.table = function(data, formula, fun.aggregate = NULL, sep = "_", ...,
206204 o = forderv(x , retGrp = TRUE , sort = TRUE )
207205 idx = attr(o , ' starts' , exact = TRUE )
208206 if (! length(o )) o = seq_along(x )
209- o [idx ]
207+ o [idx ] # subsetVector retains attributes, using R's subset for now
210208 }
211209 cj_uniq = function (DT ) {
212210 do.call(CJ , lapply(DT , function (x )
@@ -217,10 +215,12 @@ dcast.data.table = function(data, formula, fun.aggregate = NULL, sep = "_", ...,
217215 } else .Call(CsubsetVector , x , order_(x ))
218216 ))}
219217 valnames = setdiff(names(dat ), varnames )
218+ # 'dat' != 'data'? then setkey to speed things up (slightly), else ad-hoc (for now). Still very fast!
220219 if (! is.null(fun.call ) || ! is.null(subset ))
221220 setkeyv(dat , varnames )
222221 if (length(rhsnames )) {
223222 lhs = shallow(dat , lhsnames ); rhs = shallow(dat , rhsnames ); val = shallow(dat , valnames )
223+ # handle drop=TRUE/FALSE - Update: Logic moved to R, AND faster than previous version. Take that... old me :-).
224224 if (all(drop )) {
225225 map = setDT(lapply(list (lhsnames , rhsnames ), function (cols ) frankv(dat , cols = cols , ties.method = " dense" , na.last = FALSE )))
226226 maporder = lapply(map , order_ )
@@ -240,7 +240,7 @@ dcast.data.table = function(data, formula, fun.aggregate = NULL, sep = "_", ...,
240240 lhs = lhs_ ; rhs = rhs_
241241 }
242242 maplen = lengths(mapunique )
243- idx = do.call(CJ , mapunique )[map , ' I' : = .I ][[" I" ]]
243+ idx = do.call(CJ , mapunique )[map , ' I' : = .I ][[" I" ]] # TO DO: move this to C and avoid materialising the Cross Join.
244244 some_fill = anyNA(idx )
245245 fill.default = if (run_agg_funs && is.null(fill ) && some_fill ) dat_for_default_fill [, maybe_err(eval(fun.call ))]
246246 if (run_agg_funs && is.null(fill ) && some_fill ) {
@@ -251,11 +251,9 @@ dcast.data.table = function(data, formula, fun.aggregate = NULL, sep = "_", ...,
251251 if (length(valnames ) > 1L )
252252 allcols = do.call(paste , if (identical(" ." , allcols )) list (valnames , sep = sep )
253253 else c(CJ(valnames , allcols , sorted = FALSE ), sep = sep ))
254- if (length(lhsnames ) + length(allcols ) != length(ans )) {
255- stopf(" Length mismatch: 'names' attribute [%d] must match the vector length [%d]." , length(lhsnames ) + length(allcols ), length(ans ))
256- }
254+ # removed 'setcolorder()' here, #1153
257255 setattr(ans , ' names' , c(lhsnames , allcols ))
258256 setDT(ans ); setattr(ans , ' sorted' , lhsnames )
259- } else internal_error(" empty rhsnames" )
257+ } else internal_error(" empty rhsnames" ) # nocov
260258 return (ans )
261- }
259+ }
0 commit comments