Skip to content
Closed
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 21 additions & 15 deletions R/fcast.R
Original file line number Diff line number Diff line change
Expand Up @@ -129,16 +129,24 @@
stopf("Argument 'value.var.in.dots' should be logical TRUE/FALSE")
if (!isTRUEorFALSE(value.var.in.LHSdots) || !isTRUEorFALSE(value.var.in.RHSdots))
stopf("Arguments 'value.var.in.LHSdots', 'value.var.in.RHSdots' should be logical TRUE/FALSE")
# #2980 if explicitly providing fun.aggregate=length but not a value.var,
# just use the last column (as guess(data) would do) because length will be
# the same on all columns
handle_empty_strings = function(names) {
names[names == ""] = "empty_string"

Check warning on line 133 in R/fcast.R

View workflow job for this annotation

GitHub Actions / lint-r

file=R/fcast.R,line=133,col=11,[nzchar_linter] Use !nzchar(x) instead of x == "". Note that unlike nzchar(), EQ coerces to character, so you'll have to use as.character() if x is a factor. Whenever missing data is possible, please take care to use nzchar(., keepNA = TRUE); nzchar(NA) is TRUE by default.
names
}
ensure_unique_names = function(names) {
if (any(duplicated(names))) {

Check warning on line 137 in R/fcast.R

View workflow job for this annotation

GitHub Actions / lint-r

file=R/fcast.R,line=137,col=9,[any_duplicated_linter] anyDuplicated(x, ...) > 0 is better than any(duplicated(x), ...).
names = make.unique(names, sep = "_")

Check warning on line 138 in R/fcast.R

View check run for this annotation

Codecov / codecov/patch

R/fcast.R#L138

Added line #L138 was not covered by tests
}
names
}
if (missing(value.var) && !missing(fun.aggregate) && identical(fun.aggregate, length))
value.var = names(data)[ncol(data)]
lvals = value_vars(value.var, names(data))
valnames = unique(unlist(lvals))
valnames = handle_empty_strings(valnames)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please avoid re-writing the same variable (valnames) which can be confusing.
Either use unique names or don't use multiple lines/variables.

Also are these helper functions used only once? If so please delete the helper functions, and just use the code here instead of in a separate function. (helper functions should only be introduced if the same code is used in more than one place, to avoid repetition)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes these helper function used only once , so that I remove helper functions and used code directly instead of separate function.

valnames = ensure_unique_names(valnames)
lvars = check_formula(formula, names(data), valnames, value.var.in.LHSdots, value.var.in.RHSdots)
lvars = lapply(lvars, function(x) if (length(x)) x else quote(`.`))
# tired of lapply and the way it handles environments!
allcols = c(unlist(lvars), lapply(valnames, as.name))
dat = vector("list", length(allcols))
for (i in seq_along(allcols)) {
Expand All @@ -148,7 +156,6 @@
stopf("Column [%s] not found or of unknown type.", deparse(x))
}
setattr(lvars, 'names', c("lhs", "rhs"))
# Have to take care of duplicate names, and provide names for expression columns properly.
varnames = make.unique(vapply_1c(unlist(lvars), all.vars, max.names=1L), sep=sep)
dupidx = which(valnames %chin% varnames)
if (length(dupidx)) {
Expand All @@ -163,7 +170,6 @@
stopf("Columns specified in formula can not be of type list")
}
setDT(dat)

m = as.list(match.call()[-1L])
subset = m[["subset"]][[2L]]
if (!is.null(subset)) {
Expand All @@ -186,7 +192,7 @@
maybe_err = function(list.of.columns) {
if (!all(lengths(list.of.columns) == 1L)) {
msg <- gettext("Aggregating functions should take a vector as input and return a single value (length=1), but they do not, so the result is undefined. Please fix by modifying your function so that a single value is always returned.")
if (is.null(fill)) { # TODO change to always stopf #6329
if (is.null(fill)) {
stop(msg, domain=NA, call. = FALSE)
} else {
warning(msg, domain=NA, call. = FALSE)
Expand All @@ -200,7 +206,7 @@
o = forderv(x, retGrp=TRUE, sort=TRUE)
idx = attr(o, 'starts', exact=TRUE)
if (!length(o)) o = seq_along(x)
o[idx] # subsetVector retains attributes, using R's subset for now
o[idx]
}
cj_uniq = function(DT) {
do.call(CJ, lapply(DT, function(x)
Expand All @@ -211,14 +217,12 @@
} else .Call(CsubsetVector, x, order_(x))
))}
valnames = setdiff(names(dat), varnames)
# 'dat' != 'data'? then setkey to speed things up (slightly), else ad-hoc (for now). Still very fast!
if (!is.null(fun.call) || !is.null(subset))
setkeyv(dat, varnames)
if (length(rhsnames)) {
lhs = shallow(dat, lhsnames); rhs = shallow(dat, rhsnames); val = shallow(dat, valnames)
# handle drop=TRUE/FALSE - Update: Logic moved to R, AND faster than previous version. Take that... old me :-).
if (all(drop)) {
map = setDT(lapply(list(lhsnames, rhsnames), function(cols) frankv(dat, cols=cols, ties.method="dense", na.last=FALSE))) # #2202 fix
map = setDT(lapply(list(lhsnames, rhsnames), function(cols) frankv(dat, cols=cols, ties.method="dense", na.last=FALSE)))
maporder = lapply(map, order_)
mapunique = lapply(seq_along(map), function(i) .Call(CsubsetVector, map[[i]], maporder[[i]]))
lhs = .Call(CsubsetDT, lhs, maporder[[1L]], seq_along(lhs))
Expand All @@ -236,7 +240,7 @@
lhs = lhs_; rhs = rhs_
}
maplen = lengths(mapunique)
idx = do.call(CJ, mapunique)[map, 'I' := .I][["I"]] # TO DO: move this to C and avoid materialising the Cross Join.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please undo all deletions which are not relevant to your PR.

Click "Files changed" tab in github, and make sure there are only changes relevant to your PR.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I undo all deletions and made minimal changes in a code which are relevant .

idx = do.call(CJ, mapunique)[map, 'I' := .I][["I"]]
some_fill = anyNA(idx)
fill.default = if (run_agg_funs && is.null(fill) && some_fill) dat_for_default_fill[, maybe_err(eval(fun.call))]
if (run_agg_funs && is.null(fill) && some_fill) {
Expand All @@ -247,9 +251,11 @@
if (length(valnames) > 1L)
allcols = do.call(paste, if (identical(".", allcols)) list(valnames, sep=sep)
else c(CJ(valnames, allcols, sorted=FALSE), sep=sep))
# removed 'setcolorder()' here, #1153
if (length(lhsnames) + length(allcols) != length(ans)) {
stopf("Length mismatch: 'names' attribute [%d] must match the vector length [%d].", length(lhsnames) + length(allcols), length(ans))

Check warning on line 255 in R/fcast.R

View check run for this annotation

Codecov / codecov/patch

R/fcast.R#L255

Added line #L255 was not covered by tests
}
setattr(ans, 'names', c(lhsnames, allcols))
setDT(ans); setattr(ans, 'sorted', lhsnames)
} else internal_error("empty rhsnames") # nocov
} else internal_error("empty rhsnames")

Check warning on line 259 in R/fcast.R

View check run for this annotation

Codecov / codecov/patch

R/fcast.R#L259

Added line #L259 was not covered by tests
return(ans)
}
}

Check warning on line 261 in R/fcast.R

View workflow job for this annotation

GitHub Actions / lint-r

file=R/fcast.R,line=261,col=2,[trailing_blank_lines_linter] Add a terminal newline.
Loading