From 331f5db29a1cd129db51d25f4b04c1163d3943f7 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sat, 22 Feb 2025 21:39:55 -0800 Subject: [PATCH 1/3] remove more explicit returns --- R/data.table.R | 13 +++------ R/fcast.R | 76 ++++++++++++++++++++++++------------------------- R/fdroplevels.R | 2 +- R/foverlaps.R | 29 +++++++++---------- R/setkey.R | 30 +++++++++---------- R/xts.R | 2 +- 6 files changed, 72 insertions(+), 80 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 34e9958074..cbe6484e58 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1939,7 +1939,7 @@ replace_dot_alias = function(e) { if (inherits(x, 'data.table')) .Call(C_unlock, x) else return(lapply(x, runlock, current_depth = current_depth + 1L)) } - return(invisible()) + invisible() } runlock(ans) if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()} @@ -3118,7 +3118,7 @@ is_constantish = function(q, check_singleton=FALSE) { return(FALSE) } # calls are allowed <=> there's no SYMBOLs in the sub-AST - return(length(all.vars(q, max.names=1L, unique=FALSE)) == 0L) + length(all.vars(q, max.names=1L, unique=FALSE)) == 0L } .gshift_ok = function(q) { q = match.call(shift, q) @@ -3192,7 +3192,7 @@ is_constantish = function(q, check_singleton=FALSE) { stopf("It looks like you re-used `:=` in argument %d a functional assignment call -- use `=` instead: %s(col1=val1, col2=val2, ...)", jj-1L, call_name) } -.prepareFastSubset = function(isub, x, enclos, notjoin, verbose = FALSE){ +.prepareFastSubset = function(isub, x, enclos, notjoin, verbose=FALSE) { ## helper that decides, whether a fast binary search can be performed, if i is a call ## For details on the supported queries, see \code{\link{datatable-optimize}} ## Additional restrictions are imposed if x is .SD, or if options indicate that no optimization @@ -3342,14 +3342,9 @@ is_constantish = function(q, check_singleton=FALSE) { setkeyv(i, idxCols) on = on[idxCols] ## make sure 'on' is in the correct order. Otherwise the logic won't recognise that a key / index already exists. } - return(list(i = i, - on = on, - notjoin = notjoin - ) - ) + list(i=i, on=on, notjoin=notjoin) } - .parse_on = function(onsub, isnull_inames) { ## helper that takes the 'on' string(s) and extracts comparison operators and column names from it. #' @param onsub the substituted on diff --git a/R/fcast.R b/R/fcast.R index 4a4235bbc4..2e0aee57da 100644 --- a/R/fcast.R +++ b/R/fcast.R @@ -214,42 +214,42 @@ dcast.data.table = function(data, formula, fun.aggregate = NULL, sep = "_", ..., # 'dat' != 'data'? then setkey to speed things up (slightly), else ad-hoc (for now). Still very fast! if (!is.null(fun.call) || !is.null(subset)) setkeyv(dat, varnames) - if (length(rhsnames)) { - lhs = shallow(dat, lhsnames); rhs = shallow(dat, rhsnames); val = shallow(dat, valnames) - # handle drop=TRUE/FALSE - Update: Logic moved to R, AND faster than previous version. Take that... old me :-). - if (all(drop)) { - map = setDT(lapply(list(lhsnames, rhsnames), function(cols) frankv(dat, cols=cols, ties.method="dense", na.last=FALSE))) # #2202 fix - maporder = lapply(map, order_) - mapunique = lapply(seq_along(map), function(i) .Call(CsubsetVector, map[[i]], maporder[[i]])) - lhs = .Call(CsubsetDT, lhs, maporder[[1L]], seq_along(lhs)) - rhs = .Call(CsubsetDT, rhs, maporder[[2L]], seq_along(rhs)) - } else { - lhs_ = if (!drop[1L]) cj_uniq(lhs) else setkey(unique(lhs, by=names(lhs))) - rhs_ = if (!drop[2L]) cj_uniq(rhs) else setkey(unique(rhs, by=names(rhs))) - map = vector("list", 2L) - .Call(Csetlistelt, map, 1L, lhs_[lhs, which=TRUE]) - .Call(Csetlistelt, map, 2L, rhs_[rhs, which=TRUE]) - setDT(map) - mapunique = vector("list", 2L) - .Call(Csetlistelt, mapunique, 1L, seq_len(nrow(lhs_))) - .Call(Csetlistelt, mapunique, 2L, seq_len(nrow(rhs_))) - lhs = lhs_; rhs = rhs_ - } - maplen = lengths(mapunique) - idx = do.call(CJ, mapunique)[map, 'I' := .I][["I"]] # TO DO: move this to C and avoid materialising the Cross Join. - some_fill = anyNA(idx) - fill.default = if (run_agg_funs && is.null(fill) && some_fill) dat_for_default_fill[, maybe_err(eval(fun.call))] - if (run_agg_funs && is.null(fill) && some_fill) { - fill.default = dat_for_default_fill[0L][, maybe_err(eval(fun.call))] - } - ans = .Call(Cfcast, lhs, val, maplen[[1L]], maplen[[2L]], idx, fill, fill.default, is.null(fun.call), some_fill) - allcols = do.call(paste, c(rhs, sep=sep)) - if (length(valnames) > 1L) - allcols = do.call(paste, if (identical(".", allcols)) list(valnames, sep=sep) - else c(CJ(valnames, allcols, sorted=FALSE), sep=sep)) - # removed 'setcolorder()' here, #1153 - setattr(ans, 'names', c(lhsnames, allcols)) - setDT(ans); setattr(ans, 'sorted', lhsnames) - } else internal_error("empty rhsnames") # nocov - return(ans) + if (!length(rhsnames)) internal_error("empty rhsnames") # nocov + lhs = shallow(dat, lhsnames); rhs = shallow(dat, rhsnames); val = shallow(dat, valnames) + # handle drop=TRUE/FALSE - Update: Logic moved to R, AND faster than previous version. Take that... old me :-). + if (all(drop)) { + map = setDT(lapply(list(lhsnames, rhsnames), function(cols) frankv(dat, cols=cols, ties.method="dense", na.last=FALSE))) # #2202 fix + maporder = lapply(map, order_) + mapunique = lapply(seq_along(map), function(i) .Call(CsubsetVector, map[[i]], maporder[[i]])) + lhs = .Call(CsubsetDT, lhs, maporder[[1L]], seq_along(lhs)) + rhs = .Call(CsubsetDT, rhs, maporder[[2L]], seq_along(rhs)) + } else { + lhs_ = if (!drop[1L]) cj_uniq(lhs) else setkey(unique(lhs, by=names(lhs))) + rhs_ = if (!drop[2L]) cj_uniq(rhs) else setkey(unique(rhs, by=names(rhs))) + map = vector("list", 2L) + .Call(Csetlistelt, map, 1L, lhs_[lhs, which=TRUE]) + .Call(Csetlistelt, map, 2L, rhs_[rhs, which=TRUE]) + setDT(map) + mapunique = vector("list", 2L) + .Call(Csetlistelt, mapunique, 1L, seq_len(nrow(lhs_))) + .Call(Csetlistelt, mapunique, 2L, seq_len(nrow(rhs_))) + lhs = lhs_; rhs = rhs_ + } + maplen = lengths(mapunique) + idx = do.call(CJ, mapunique)[map, 'I' := .I][["I"]] # TO DO: move this to C and avoid materialising the Cross Join. + some_fill = anyNA(idx) + fill.default = if (run_agg_funs && is.null(fill) && some_fill) dat_for_default_fill[, maybe_err(eval(fun.call))] + if (run_agg_funs && is.null(fill) && some_fill) { + fill.default = dat_for_default_fill[0L][, maybe_err(eval(fun.call))] + } + ans = .Call(Cfcast, lhs, val, maplen[[1L]], maplen[[2L]], idx, fill, fill.default, is.null(fun.call), some_fill) + allcols = do.call(paste, c(rhs, sep=sep)) + if (length(valnames) > 1L) + allcols = do.call(paste, if (identical(".", allcols)) list(valnames, sep=sep) + else c(CJ(valnames, allcols, sorted=FALSE), sep=sep)) + # removed 'setcolorder()' here, #1153 + setattr(ans, 'names', c(lhsnames, allcols)) + setDT(ans) + setattr(ans, 'sorted', lhsnames) + ans } diff --git a/R/fdroplevels.R b/R/fdroplevels.R index d71c4e4b35..db8fb623f2 100644 --- a/R/fdroplevels.R +++ b/R/fdroplevels.R @@ -5,7 +5,7 @@ fdroplevels = function(x, exclude = if (anyNA(levels(x))) NULL else NA, ...) { ans = match(as.integer(x), lev) setattr(ans, 'levels', levels(x)[lev]) setattr(ans, 'class', class(x)) - return(ans) + ans } droplevels.data.table = function(x, except=NULL, exclude, in.place=NULL, ...){ diff --git a/R/foverlaps.R b/R/foverlaps.R index 303ffc3fc4..7e996e7905 100644 --- a/R/foverlaps.R +++ b/R/foverlaps.R @@ -181,23 +181,22 @@ foverlaps = function(x, y, by.x=if (!is.null(key(x))) key(x) else key(y), by.y=k if (which) { if (mult %chin% c("first", "last")) return(olaps$yid) - else if (!is.na(nomatch)) - return(.Call(CsubsetDT, olaps, which(olaps$yid > 0L), seq_along(olaps))) - else return(olaps) - } else { if (!is.na(nomatch)) - olaps = .Call(CsubsetDT, olaps, which(olaps$yid > 0L), seq_along(olaps)) - ycols = setdiff(names(origy), head(by.y, -2L)) - idx = chmatch(ycols, names(origx), nomatch=0L) - ans = .Call(CsubsetDT, origx, olaps$xid, seq_along(origx)) - if (any(idx>0L)) - setnames(ans, names(ans)[idx], paste0("i.", names(ans)[idx])) - xcols1 = head(by.x, -2L) - xcols2 = setdiff(names(ans), xcols1) - ans[, (ycols) := .Call(CsubsetDT, origy, olaps$yid, chmatch(ycols, names(origy)))] - setcolorder(ans, c(xcols1, ycols, xcols2)) - return(ans[]) + return(.Call(CsubsetDT, olaps, which(olaps$yid > 0L), seq_along(olaps))) + return(olaps) } + if (!is.na(nomatch)) + olaps = .Call(CsubsetDT, olaps, which(olaps$yid > 0L), seq_along(olaps)) + ycols = setdiff(names(origy), head(by.y, -2L)) + idx = chmatch(ycols, names(origx), nomatch=0L) + ans = .Call(CsubsetDT, origx, olaps$xid, seq_along(origx)) + if (any(idx > 0L)) + setnames(ans, names(ans)[idx], paste0("i.", names(ans)[idx])) + xcols1 = head(by.x, -2L) + xcols2 = setdiff(names(ans), xcols1) + ans[, (ycols) := .Call(CsubsetDT, origy, olaps$yid, chmatch(ycols, names(origy)))] + setcolorder(ans, c(xcols1, ycols, xcols2)) + ans[] } # Notes: (If there's a better way than the solution I propose here, I'd be glad to apply it.) diff --git a/R/setkey.R b/R/setkey.R index d1de12fdae..94ad3d4faf 100644 --- a/R/setkey.R +++ b/R/setkey.R @@ -12,7 +12,7 @@ setindex = function(...) setkey(..., physical=FALSE) setindexv = function(x, cols, verbose=getOption("datatable.verbose")) { if (is.list(cols)) { sapply(cols, setkeyv, x=x, verbose=verbose, physical=FALSE) - return(invisible(x)) + invisible(x) } else { setkeyv(x, cols, verbose=verbose, physical=FALSE) } @@ -207,23 +207,21 @@ forder = function(..., na.last=TRUE, decreasing=FALSE, method="radix") fsort = function(x, decreasing=FALSE, na.last=FALSE, internal=FALSE, verbose=FALSE, ...) { containsNAs = FALSE - if (typeof(x)=="double" && !decreasing && !(containsNAs <- anyNA(x))) { - if (internal) stopf("Internal code should not be being called on type double") - return(.Call(Cfsort, x, verbose)) + if (typeof(x) == "double" && !decreasing && !(containsNAs <- anyNA(x))) { + if (internal) stopf("Internal code should not be being called on type double") + return(.Call(Cfsort, x, verbose)) } - else { - # fsort is now exported for testing. Trying to head off complaints "it's slow on integer" - # The only places internally we use fsort internally (3 calls, all on integer) have had internal=TRUE added for now. - # TODO: implement integer and character in Cfsort and remove this branch and warning - if (!internal){ - if (typeof(x)!="double") warningf("Input is not a vector of type double. New parallel sort has only been done for double vectors so far. Using one thread.") - if (decreasing) warningf("New parallel sort has not been implemented for decreasing=TRUE so far. Using one thread.") - if (containsNAs) warningf("New parallel sort has not been implemented for vectors containing NA values so far. Using one thread.") - } - orderArg = if (decreasing) -1L else 1L - o = forderv(x, order=orderArg, na.last=na.last) - return( if (length(o)) x[o] else x ) + # fsort is now exported for testing. Trying to head off complaints "it's slow on integer" + # The only places internally we use fsort internally (3 calls, all on integer) have had internal=TRUE added for now. + # TODO: implement integer and character in Cfsort and remove this branch and warning + if (!internal) { + if (typeof(x) != "double") warningf("Input is not a vector of type double. New parallel sort has only been done for double vectors so far. Using one thread.") + if (decreasing) warningf("New parallel sort has not been implemented for decreasing=TRUE so far. Using one thread.") + if (containsNAs) warningf("New parallel sort has not been implemented for vectors containing NA values so far. Using one thread.") } + orderArg = if (decreasing) -1L else 1L + o = forderv(x, order=orderArg, na.last=na.last) + if (length(o)) x[o] else x } setorder = function(x, ..., na.last=FALSE) diff --git a/R/xts.R b/R/xts.R index 05db1940d0..582cfa5b0a 100644 --- a/R/xts.R +++ b/R/xts.R @@ -25,6 +25,6 @@ as.xts.data.table = function(x, numeric.only = TRUE, ...) { if (!all(colsNumeric)) warningf("Following columns are not numeric and will be omitted: %s", brackify(names(colsNumeric)[!colsNumeric])) r = r[, .SD, .SDcols = names(colsNumeric)[colsNumeric]] } - return(xts::xts(as.matrix(r), order.by = if (inherits(x[[1L]], "IDate")) as.Date(x[[1L]]) else x[[1L]])) + xts::xts(as.matrix(r), order.by = if (inherits(x[[1L]], "IDate")) as.Date(x[[1L]]) else x[[1L]]) } # nocov end From 60193c29b6d754ef83199b7908e7c89ffad5cd15 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sat, 22 Feb 2025 23:16:12 -0800 Subject: [PATCH 2/3] use new globbing support from upstream --- .ci/.lintr.R | 75 ++++++++++++++++++++++++---------------------------- 1 file changed, 35 insertions(+), 40 deletions(-) diff --git a/.ci/.lintr.R b/.ci/.lintr.R index 10adaa0f65..05906ba7d4 100644 --- a/.ci/.lintr.R +++ b/.ci/.lintr.R @@ -71,45 +71,40 @@ linters = c(dt_linters, all_linters( )) rm(dt_linters) -# TODO(lintr#2172): Glob with lintr itself. -exclusions = c(local({ - exclusion_for_dir <- function(dir, exclusions) { - files = file.path("..", list.files(dir, pattern = "\\.(R|Rmd|Rraw)$", full.names=TRUE)) - stats::setNames(rep(list(exclusions), length(files)), files) - } - c( - exclusion_for_dir("tests", list( - quotes_linter = Inf, - # TODO(michaelchirico): Enforce these and re-activate them one-by-one. - implicit_integer_linter = Inf, - infix_spaces_linter = Inf, - undesirable_function_linter = Inf - )), - exclusion_for_dir(c("vignettes", "vignettes/fr", "vignettes/ru"), list( - # assignment_linter = Inf, - implicit_integer_linter = Inf, - quotes_linter = Inf, - sample_int_linter = Inf - # strings_as_factors_linter = Inf - # system_time_linter = Inf - )), - exclusion_for_dir("inst/tests", list( - library_call_linter = Inf, - numeric_leading_zero_linter = Inf, - undesirable_operator_linter = Inf, # For ':::', possibly we could be more careful to only exclude ':::'. - # TODO(michaelchirico): Enforce these and re-activate them one-by-one. - comparison_negation_linter = Inf, - condition_call_linter = Inf, - duplicate_argument_linter = Inf, - equals_na_linter = Inf, - missing_argument_linter = Inf, - paste_linter = Inf, - rep_len_linter = Inf, - sample_int_linter = Inf, - seq_linter = Inf, - unnecessary_lambda_linter = Inf - )) +# NB: paths are relative to this file. See r-lib/lintr#2780 +exclusions = list( + `../tests` = list( + quotes_linter = Inf, + # TODO(michaelchirico): Enforce these and re-activate them one-by-one. + implicit_integer_linter = Inf, + infix_spaces_linter = Inf, + undesirable_function_linter = Inf + ), + `../vignettes*` = list( + # assignment_linter = Inf, + implicit_integer_linter = Inf, + quotes_linter = Inf, + sample_int_linter = Inf + # strings_as_factors_linter = Inf + # system_time_linter = Inf + ), + `../inst/tests` = list( + library_call_linter = Inf, + numeric_leading_zero_linter = Inf, + undesirable_operator_linter = Inf, # For ':::', possibly we could be more careful to only exclude ':::'. + # TODO(michaelchirico): Enforce these and re-activate them one-by-one. + comparison_negation_linter = Inf, + condition_call_linter = Inf, + duplicate_argument_linter = Inf, + equals_na_linter = Inf, + missing_argument_linter = Inf, + paste_linter = Inf, + rep_len_linter = Inf, + sample_int_linter = Inf, + seq_linter = Inf, + unnecessary_lambda_linter = Inf + ), + `../inst/tests/froll.Rraw` = list( + dt_test_literal_linter = Inf # TODO(michaelchirico): Fix these once #5898, #5692, #5682, #5576, #5575, #5441 are merged. ) -}), - list(`../inst/tests/froll.Rraw` = list(dt_test_literal_linter = Inf)) # TODO(michaelchirico): Fix these once #5898, #5692, #5682, #5576, #5575, #5441 are merged. ) From 02c8cefb5b768b11ecb772f7e399c7861aa22d71 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sun, 23 Feb 2025 16:27:24 -0800 Subject: [PATCH 3/3] comment not needed --- .ci/.lintr.R | 1 - 1 file changed, 1 deletion(-) diff --git a/.ci/.lintr.R b/.ci/.lintr.R index 05906ba7d4..14257ba4c7 100644 --- a/.ci/.lintr.R +++ b/.ci/.lintr.R @@ -71,7 +71,6 @@ linters = c(dt_linters, all_linters( )) rm(dt_linters) -# NB: paths are relative to this file. See r-lib/lintr#2780 exclusions = list( `../tests` = list( quotes_linter = Inf,