Skip to content

Commit 5878084

Browse files
Merge branch 'master' into dot-dot-1
2 parents 878baf7 + c446f2e commit 5878084

File tree

15 files changed

+55
-24
lines changed

15 files changed

+55
-24
lines changed

.ci/linters/rd/backtick_linter.R

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# ensure no markdown-style backticks wind up in Rd where \code is intended
2+
options_documentation_linter = function(rd_file) {
3+
rd = tools::parse_Rd(rd_file)
4+
5+
error_if_backtick = function(rd_obj) {
6+
if (!is.recursive(rd_obj)) {
7+
if (any(grepl("`", rd_obj, fixed=TRUE))) {
8+
stop(sprintf(
9+
"Rd is not markdown -- backticks (`) don't render as code! Use \\code{...}.\nObserved in string '%s' in file %s",
10+
trimws(rd_obj), rd_file
11+
))
12+
}
13+
return(invisible())
14+
}
15+
tags = vapply(rd_obj, \(x) attr(x, "Rd_tag") %||% "", FUN.VALUE="")
16+
# backtick is valid inside R code (e.g. \examples, \code, \preformatted)
17+
rd_obj = rd_obj[!tags %in% c("RCODE", "VERB")]
18+
lapply(rd_obj, error_if_backtick)
19+
}
20+
21+
invisible(error_if_backtick(rd))
22+
}
File renamed without changes.

NEWS.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,9 @@
8282
8383
19. Spurious warnings from internal code in `cube()`, `rollup()`, and `groupingsets()` are no longer surfaced to the caller, [#6964](https://github.com/Rdatatable/data.table/issues/6964). Thanks @ferenci-tamas for the report and @venom1204 for the fix.
8484
85-
20. Ellipsis elements like `..1` are correctly excluded when searching for variables in "up-a-level" syntax inside `[`, [#5460](https://github.com/Rdatatable/data.table/issues/5460). Thanks @ggrothendieck for the report and @MichaelChirico for the fix.
85+
20. `droplevels()` works on 0-row data.tables, [#7043](https://github.com/Rdatatable/data.table/issues/7043). The result will have factor columns `factor(character())`, consistent with the data.frame method. Thanks @advieser for the report and @MichaelChirico for the fix.
86+
87+
21. Ellipsis elements like `..1` are correctly excluded when searching for variables in "up-a-level" syntax inside `[`, [#5460](https://github.com/Rdatatable/data.table/issues/5460). Thanks @ggrothendieck for the report and @MichaelChirico for the fix.
8688
8789
### NOTES
8890

R/fdroplevels.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# 647 fast droplevels.data.table method
22
fdroplevels = function(x, exclude = if (anyNA(levels(x))) NULL else NA, ...) {
33
stopifnot(inherits(x, "factor"))
4+
if (!length(x)) return(structure(integer(), class='factor', levels=character())) # skip factor() overhead
45
lev = which(tabulate(x, nlevels(x)) & (!match(levels(x), exclude, 0L)))
56
ans = match(as.integer(x), lev)
67
setattr(ans, 'levels', levels(x)[lev])
@@ -15,7 +16,6 @@ droplevels.data.table = function(x, except=NULL, exclude, ...){
1516
}
1617

1718
setdroplevels = function(x, except=NULL, exclude=NULL) {
18-
if (!nrow(x)) return(invisible(x))
1919
ix = vapply_1b(x, is.factor)
2020
if (!is.null(except)) {
2121
stopifnot(is.numeric(except), except >= 1L, except <= length(x))

inst/tests/tests.Rraw

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21359,18 +21359,25 @@ test(2327.2, cube(DT, .(as.numeric(base::min(value, na.rm=TRUE))), "var"),
2135921359
data.table(var = c("a", "b", "c", "d", NA), V1 = c(1.0, 2.0, 3.0, Inf, 1.0)),
2136021360
warning="no non-missing arguments to min")
2136121361

21362+
# droplevels should still work on a 0-row table, #7043
21363+
DT = data.table(f=factor(character(), levels='a'))
21364+
test(2328.1, levels(droplevels(DT)$f), character())
21365+
DT[, i := integer()]
21366+
DT[, f2 := factor()]
21367+
test(2328.2, droplevels(DT), data.table(f=factor(), i=integer(), f2=factor()))
21368+
2136221369
# '..1' is ...elt(1), not a variable named '1', when doing "up-a-level" search (#5460)
2136321370
DT = data.table(a=1.0)
2136421371
sqrt_dot_sym = function(...) sqrt(..1)
21365-
test(2328.1, lapply(DT, function(...) sqrt(..1)), list(a=1.0))
21366-
test(2328.2, lapply(DT, sqrt_dot_sym), list(a=1.0))
21367-
test(2328.3, DT[, lapply(.SD, function(...) sqrt(..1))], data.table(a=1.0))
21368-
test(2328.4, DT[, lapply(.SD, sqrt_dot_sym)], data.table(a=1.0))
21372+
test(2329.1, lapply(DT, function(...) sqrt(..1)), list(a=1.0))
21373+
test(2329.2, lapply(DT, sqrt_dot_sym), list(a=1.0))
21374+
test(2329.3, DT[, lapply(.SD, function(...) sqrt(..1))], data.table(a=1.0))
21375+
test(2329.4, DT[, lapply(.SD, sqrt_dot_sym)], data.table(a=1.0))
2136921376
sqrt_elt_sym = function(...) sqrt(...elt(1))
2137021377
# TODO(R>=3.5.0): run this unconditionally
2137121378
if (!inherits(tryCatch(sqrt_elt_sym(1), error=identity), "error")) {
21372-
test(2328.5, lapply(DT, sqrt_elt_sym), list(a=1.0))
21373-
test(2328.6, lapply(DT, function(...) sqrt(...elt(1L))), list(a=1.0))
21374-
test(2328.7, DT[, lapply(.SD, sqrt_elt_sym)], data.table(a=1.0))
21375-
test(2328.8, DT[, lapply(.SD, function(...) sqrt(...elt(1L)))], data.table(a=1.0))
21379+
test(2329.5, lapply(DT, sqrt_elt_sym), list(a=1.0))
21380+
test(2329.6, lapply(DT, function(...) sqrt(...elt(1L))), list(a=1.0))
21381+
test(2329.7, DT[, lapply(.SD, sqrt_elt_sym)], data.table(a=1.0))
21382+
test(2329.8, DT[, lapply(.SD, function(...) sqrt(...elt(1L)))], data.table(a=1.0))
2137621383
}

man/IDateTime.Rd

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ accounting for both year transitions and varying day counts per week.
205205
for second, minute, hour, day of year, day of week,
206206
day of month, week, month, quarter, and year, respectively.
207207
\code{yearmon} and \code{yearqtr} return double values representing
208-
respectively `year + (month-1) / 12` and `year + (quarter-1) / 4`.
208+
respectively \code{year + (month-1) / 12} and \code{year + (quarter-1) / 4}.
209209
210210
\code{second}, \code{minute}, \code{hour} are taken directly from
211211
the \code{POSIXlt} representation.
@@ -217,7 +217,7 @@ accounting for both year transitions and varying day counts per week.
217217
}
218218
\references{
219219
220-
G. Grothendieck and T. Petzoldt, ``Date and Time Classes in R,''
220+
G. Grothendieck and T. Petzoldt, \dQuote{Date and Time Classes in R},
221221
R News, vol. 4, no. 1, June 2004.
222222
223223
H. Wickham, https://gist.github.com/hadley/10238.

man/as.matrix.Rd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ be used.}
2323
\code{rownames} in the returned \code{matrix}. It must be the same length
2424
as \code{nrow(x)}.}
2525

26-
\item{\dots}{ Required to be present because the generic `as.matrix` generic has it. Arguments here are not currently used or passed on by this method. }
26+
\item{\dots}{ Required to be present because the generic \code{as.matrix} generic has it. Arguments here are not currently used or passed on by this method. }
2727

2828
}
2929

man/assign.Rd

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ For additional resources, please read \href{../doc/datatable-faq.html}{\code{vig
7575
7676
When \code{LHS} is a factor column and \code{RHS} is a character vector with items missing from the factor levels, the new level(s) are automatically added (by reference, efficiently), unlike base methods.
7777
78-
Unlike \code{<-} for \code{data.frame}, the (potentially large) LHS is not coerced to match the type of the (often small) RHS. Instead the RHS is coerced to match the type of the LHS, if necessary. Where this involves double precision values being coerced to an integer column, a warning is given when fractional data is truncated. It is best to get the column types correct up front and stick to them. Changing a column type is possible but deliberately harder: provide a whole column as the RHS. This RHS is then \emph{plonked} into that column slot and we call this \emph{plonk syntax}, or \emph{replace column syntax} if you prefer. By needing to construct a full length vector of a new type, you as the user are more aware of what is happening and it is clearer to readers of your code that you really do intend to change the column type; e.g., \code{DT[, colA:=as.integer(colA)]}. A plonk occurs whenever you provide a RHS value to `:=` which is \code{nrow} long. When a column is \emph{plonked}, the original column is not updated by reference because that would entail updating every single element of that column whereas the plonk is just one column pointer update.
78+
Unlike \samp{<-} for \code{data.frame}, the (potentially large) LHS is not coerced to match the type of the (often small) RHS. Instead the RHS is coerced to match the type of the LHS, if necessary. Where this involves double precision values being coerced to an integer column, a warning is given when fractional data is truncated. It is best to get the column types correct up front and stick to them. Changing a column type is possible but deliberately harder: provide a whole column as the RHS. This RHS is then \emph{plonked} into that column slot and we call this \emph{plonk syntax}, or \emph{replace column syntax} if you prefer. By needing to construct a full length vector of a new type, you as the user are more aware of what is happening and it is clearer to readers of your code that you really do intend to change the column type; e.g., \code{DT[, colA:=as.integer(colA)]}. A plonk occurs whenever you provide a RHS value to \samp{:=} which is \code{nrow} long. When a column is \emph{plonked}, the original column is not updated by reference because that would entail updating every single element of that column whereas the plonk is just one column pointer update.
7979
8080
\code{data.table}s are \emph{not} copied-on-change by \code{:=}, \code{setkey} or any of the other \code{set*} functions. See \code{\link{copy}}.
8181
}
@@ -85,7 +85,7 @@ Unlike \code{<-} for \code{data.frame}, the (potentially large) LHS is not coerc
8585
Since \code{[.data.table} incurs overhead to check the existence and type of arguments (for example), \code{set()} provides direct (but less flexible) assignment by reference with low overhead, appropriate for use inside a \code{for} loop. See examples. \code{:=} is more powerful and flexible than \code{set()} because \code{:=} is intended to be combined with \code{i} and \code{by} in single queries on large datasets.
8686
}
8787
\note{
88-
\code{DT[a > 4, b := c]} is different from \code{DT[a > 4][, b := c]}. The first expression updates (or adds) column \code{b} with the value \code{c} on those rows where \code{a > 4} evaluates to \code{TRUE}. \code{X} is updated \emph{by reference}, therefore no assignment needed. Note that this does not apply when `i` is missing, i.e. \code{DT[]}.
88+
\code{DT[a > 4, b := c]} is different from \code{DT[a > 4][, b := c]}. The first expression updates (or adds) column \code{b} with the value \code{c} on those rows where \code{a > 4} evaluates to \code{TRUE}. \code{X} is updated \emph{by reference}, therefore no assignment needed. Note that this does not apply when \code{i} is missing, i.e. \code{DT[]}.
8989
9090
The second expression on the other hand updates a \emph{new} \code{data.table} that's returned by the subset operation. Since the subsetted data.table is ephemeral (it is not assigned to a symbol), the result would be lost; unless the result is assigned, for example, as follows: \code{ans <- DT[a > 4][, b := c]}.
9191
}

man/datatable-optimize.Rd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ For \code{getOption("datatable.optimize") >= 2}, additional optimisations are im
6969
(which can get costly with large number of groups) by implementing it
7070
specifically for a particular function. As a result, it is extremely fast.
7171
72-
\item In addition to all the functions above, `.N` is also optimised to
72+
\item In addition to all the functions above, \code{.N} is also optimised to
7373
use GForce, when used separately or when combined with the functions mentioned
7474
above. Note further that GForce-optimized functions must be used separately,
7575
i.e., code like \code{DT[ , max(x) - min(x), by=z]} will \emph{not} currently

man/fread.Rd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ yaml=FALSE, tmpdir=tempdir(), tz="UTC"
6666
\item{keepLeadingZeros}{If TRUE a column containing numeric data with leading zeros will be read as character, otherwise leading zeros will be removed and converted to numeric.}
6767
\item{yaml}{ If \code{TRUE}, \code{fread} will attempt to parse (using \code{\link[yaml]{yaml.load}}) the top of the input as YAML, and further to glean parameters relevant to improving the performance of \code{fread} on the data itself. The entire YAML section is returned as parsed into a \code{list} in the \code{yaml_metadata} attribute. See \code{Details}. }
6868
\item{tmpdir}{ Directory to use as the \code{tmpdir} argument for any \code{tempfile} calls, e.g. when the input is a URL or a shell command. The default is \code{tempdir()} which can be controlled by setting \code{TMPDIR} before starting the R session; see \code{\link[base:tempfile]{base::tempdir}}. }
69-
\item{tz}{ Relevant to datetime values which have no Z or UTC-offset at the end, i.e. \emph{unmarked} datetime, as written by \code{\link[utils:write.table]{utils::write.csv}}. The default \code{tz="UTC"} reads unmarked datetime as UTC POSIXct efficiently. \code{tz=""} reads unmarked datetime as type character (slowly) so that \code{as.POSIXct} can interpret (slowly) the character datetimes in local timezone; e.g. by using \code{"POSIXct"} in \code{colClasses=}. Note that \code{fwrite()} by default writes datetime in UTC including the final Z and therefore \code{fwrite}'s output will be read by \code{fread} consistently and quickly without needing to use \code{tz=} or \code{colClasses=}. If the \code{TZ} environment variable is set to \code{"UTC"} (or \code{""} on non-Windows where unset vs `""` is significant) then the R session's timezone is already UTC and \code{tz=""} will result in unmarked datetimes being read as UTC POSIXct. For more information, please see the news items from v1.13.0 and v1.14.0. }
69+
\item{tz}{ Relevant to datetime values which have no Z or UTC-offset at the end, i.e. \emph{unmarked} datetime, as written by \code{\link[utils:write.table]{utils::write.csv}}. The default \code{tz="UTC"} reads unmarked datetime as UTC POSIXct efficiently. \code{tz=""} reads unmarked datetime as type character (slowly) so that \code{as.POSIXct} can interpret (slowly) the character datetimes in local timezone; e.g. by using \code{"POSIXct"} in \code{colClasses=}. Note that \code{fwrite()} by default writes datetime in UTC including the final Z and therefore \code{fwrite}'s output will be read by \code{fread} consistently and quickly without needing to use \code{tz=} or \code{colClasses=}. If the \code{TZ} environment variable is set to \code{"UTC"} (or \code{""} on non-Windows where unset vs \code{""} is significant) then the R session's timezone is already UTC and \code{tz=""} will result in unmarked datetimes being read as UTC POSIXct. For more information, please see the news items from v1.13.0 and v1.14.0. }
7070
}
7171
\details{
7272

0 commit comments

Comments
 (0)