Skip to content

Commit 625250e

Browse files
Add central documentation page for data.table options (#7075)
* add doc page for data.table options * add script * add install remote in workflow * small typo * restructure script * restore (bad merge?) * trailing ws * terminal newline * terminal newline * more potential aliases * clarification for reading outside interactive sessions * Move the "See..." reference outside \describe{} * rm extra '.' * style change: mention default up-front * sweep '`' usage * Update .ci/linters/rd/options_doc_check.R Co-authored-by: Michael Chirico <[email protected]> * Update .ci/linters/rd/options_doc_check.R Co-authored-by: Michael Chirico <[email protected]> * chng if to else if and _ast to _for_dt_optopns * remove unnecessary line * further simplify * more simplification * simplify again: remove a helper * further simplify, unify helper naming * upd to sggsns * updt sgns * consistent naming style * can't if(grepl(readLines())) b/c it's a vector * fix naming at call site * another renaming * ignore nomatch (deprecated) * add nm to desc * one more [base] qualification * fine-tune allow.cartesian description --------- Co-authored-by: Michael Chirico <[email protected]>
1 parent a5eac7d commit 625250e

File tree

5 files changed

+179
-3
lines changed

5 files changed

+179
-3
lines changed

.ci/linters/rd/options_doc_check.R

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# Ensure that data.table options in code match documentation
2+
options_documentation_linter = function(rd_file) {
3+
if (!grepl("\\name{data.table-options}", readChar(rd_file, 100L), fixed = TRUE)) return(invisible())
4+
5+
# Find options in R code
6+
walk_r_ast_for_options = function(expr) {
7+
if (is.call(expr) && length(expr) >= 2L && identical(expr[[1L]], quote(getOption)) && is.character(e2 <- expr[[2L]]) && startsWith(e2, "datatable.")) {
8+
e2
9+
} else if (is.recursive(expr)) {
10+
unlist(lapply(expr, walk_r_ast_for_options))
11+
}
12+
}
13+
14+
# Find options in documentation
15+
walk_rd_ast_for_options = function(rd_element) {
16+
if (!is.list(rd_element)) return(character())
17+
18+
result = character()
19+
if (isTRUE(attr(rd_element, "Rd_tag") == "\\code") && length(rd_element) >= 1L) {
20+
content = rd_element[[1L]]
21+
if (is.character(content) && startsWith(content, "datatable.")) {
22+
result = content
23+
}
24+
}
25+
c(result, unlist(lapply(rd_element, walk_rd_ast_for_options)))
26+
}
27+
28+
code_opts = list.files("R", pattern = "\\.R$", full.names = TRUE) |>
29+
lapply(\(f) lapply(parse(f), walk_r_ast_for_options)) |>
30+
unlist() |>
31+
unique() |>
32+
setdiff("datatable.nomatch") # ignore deprecated option(s)
33+
34+
doc_opts = rd_file |>
35+
tools::parse_Rd() |>
36+
walk_rd_ast_for_options() |>
37+
unique()
38+
39+
miss_in_doc = setdiff(code_opts, doc_opts)
40+
miss_in_code = setdiff(doc_opts, code_opts)
41+
42+
if (length(miss_in_doc) > 0L || length(miss_in_code) > 0L) {
43+
if (length(miss_in_doc) > 0L) {
44+
cat(sprintf("Options in code but missing from docs: %s\n", toString(miss_in_doc)))
45+
}
46+
if (length(miss_in_code) > 0L) {
47+
cat(sprintf("Options in docs but not in code: %s\n", toString(miss_in_code)))
48+
}
49+
stop("Please sync man/data.table-options.Rd with code options")
50+
}
51+
}

.github/workflows/code-quality.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,3 +61,10 @@ jobs:
6161
- uses: r-lib/actions/setup-r@v2
6262
- name: Lint
6363
run: Rscript .ci/lint.R .ci/linters/md . '[.]R?md$'
64+
lint-rd:
65+
runs-on: ubuntu-latest
66+
steps:
67+
- uses: actions/checkout@v4
68+
- uses: r-lib/actions/setup-r@v2
69+
- name: Lint Rd files
70+
run: Rscript .ci/lint.R .ci/linters/rd man '[.]Rd$'

DESCRIPTION

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,5 +103,6 @@ Authors@R: c(
103103
person("Bill", "Evans", role="ctb"),
104104
person("Reino", "Bruner", role="ctb"),
105105
person(given="@badasahog", role="ctb", comment="GitHub user"),
106-
person("Vinit", "Thakur", role="ctb")
106+
person("Vinit", "Thakur", role="ctb"),
107+
person("Mukul", "Kumar", role="ctb")
107108
)

man/data.table-options.Rd

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
\name{data.table-options}
2+
\alias{data.table-options}
3+
\alias{data.table.options}
4+
\alias{datatable.options}
5+
\alias{datatable-options}
6+
7+
\title{Global Options for the data.table Package}
8+
9+
\description{
10+
The data.table package uses a number of global options to control its
11+
behavior. These are regular R options that can be set with options()
12+
and retrieved with getOption(). For example:
13+
\preformatted{
14+
# Get the current value of an option
15+
getOption("datatable.print.topn")
16+
17+
# Set a new value for an option
18+
options(datatable.print.topn = 10)
19+
}
20+
This page provides a comprehensive, up-to-date list of all user-configurable
21+
options. NB: If you're reading this on the web, make sure the version numbers match with what you have installed.
22+
}
23+
24+
\section{Printing Options}{
25+
See \code{\link{print.data.table}} for a full description of printing data.tables.
26+
\describe{
27+
\item{\code{datatable.print.topn}}{An integer, default \code{5L}. When a data.table is printed,
28+
only the first topn and last topn rows are displayed.}
29+
\item{\code{datatable.print.nrows}}{An integer, default \code{100L}. The total number of rows
30+
to print before the topn logic is triggered.}
31+
\item{\code{datatable.print.class}}{A logical, default \code{FALSE}. If \code{TRUE}, the class of
32+
each column is printed below its name.}
33+
\item{\code{datatable.print.keys}}{A logical, default \code{FALSE}. If \code{TRUE}, the table's
34+
keys are printed above the data.}
35+
\item{\code{datatable.show.indices}}{A logical, default \code{TRUE}. A synonym for \code{datatable.print.keys} for historical reasons.}
36+
\item{\code{datatable.print.trunc.cols}}{A logical, default \code{FALSE}. If \code{TRUE} and a
37+
table has more columns than fit on the screen, it truncates the middle columns.}
38+
\item{\code{datatable.prettyprint.char}}{An integer, default \code{100L}. The maximum number of
39+
characters to display in a character column cell before truncating.}
40+
\item{\code{datatable.print.colnames}}{A logical, default \code{TRUE}. If \code{TRUE}, prints column names.}
41+
\item{\code{datatable.print.rownames}}{A logical, default \code{TRUE}. If \code{TRUE}, prints row numbers.}
42+
}
43+
}
44+
45+
\section{File I/O Options (fread and fwrite)}{
46+
See \code{\link{fread}} and \code{\link{fwrite}} for a full description of data.table I/O.
47+
\describe{
48+
\item{\code{datatable.fread.input.cmd.message}}{A logical, default \code{TRUE}. If \code{TRUE},
49+
\code{fread} will print the shell command it is using when the input is a
50+
command (e.g., \code{fread("grep ...")}).}
51+
\item{\code{datatable.fread.datatable}}{A logical, default \code{TRUE}. If \code{TRUE}, \code{fread}
52+
returns a \code{data.table}. If \code{FALSE}, it returns a \code{data.frame}.}
53+
\item{\code{datatable.integer64}}{A character string, default \code{"integer64"}. Controls how \code{fread}
54+
handles 64-bit integers. Can be "integer64", "double", or "character".}
55+
\item{\code{datatable.logical01}}{A logical, default \code{FALSE}. If \code{TRUE}, \code{fread} will
56+
interpret columns containing only 0 and 1 as logical.}
57+
\item{\code{datatable.keepLeadingZeros}}{A logical, default \code{FALSE}. If \code{TRUE}, \code{fread}
58+
preserves leading zeros in character columns by reading them as strings;
59+
otherwise they may be coerced to numeric.}
60+
\item{\code{datatable.logicalYN}}{A logical, default \code{FALSE}. If \code{TRUE}, \code{fread}
61+
will interpret "Y" and "N" as logical.}
62+
\item{\code{datatable.na.strings}}{A character vector, default \code{"NA"}. Global default for strings that
63+
\code{fread} should interpret as \code{NA}.}
64+
\item{\code{datatable.fwrite.sep}}{A character string, default \code{","}. The default separator
65+
used by \code{fwrite}.}
66+
\item{\code{datatable.showProgress}}{An integer or logical, default \code{\link[base]{interactive}()}. Controls whether
67+
long-running operations like \code{fread} display a progress bar.}
68+
}
69+
}
70+
71+
\section{Join and Subset Options}{
72+
\describe{
73+
\item{\code{datatable.allow.cartesian}}{A logical, default \code{FALSE}. Controls the default value of the
74+
\code{allow.cartesian} parameter; see \code{\link{data.table}}. If the value of this parameter is FALSE, an error is raised as a safeguard against an explosive Cartesian join.}
75+
}
76+
}
77+
78+
\section{Performance and Indexing Options}{
79+
\describe{
80+
\item{\code{datatable.auto.index}}{A logical, default \code{TRUE}. If \code{TRUE}, \code{data.table}
81+
automatically creates a secondary index on-the-fly when a column is first
82+
used in a subset, speeding up all subsequent queries.}
83+
\item{\code{datatable.use.index}}{A logical, default \code{TRUE}. A global switch to control
84+
whether existing secondary indices are used for subsetting.}
85+
\item{\code{datatable.forder.auto.index}}{A logical, default \code{TRUE}. Similar to \code{datatable.auto.index},
86+
but applies to ordering operations (\code{forder}).}
87+
\item{\code{datatable.optimize}}{A numeric, default \code{Inf}. Controls the GForce query
88+
optimization engine. The default enables all possible optimizations.
89+
See \code{\link{datatable.optimize}}.}
90+
\item{\code{datatable.alloccol}}{An integer, default \code{1024L}. Controls the number of column
91+
slots to pre-allocate, improving performance when adding many columns.
92+
See \code{\link{alloc.col}}.}
93+
\item{\code{datatable.reuse.sorting}}{A logical, default \code{TRUE}. If \code{TRUE}, \code{data.table}
94+
can reuse the sorted order of a table in joins, improving performance.}
95+
}
96+
}
97+
98+
\section{Development and Verbosity Options}{
99+
\describe{
100+
\item{\code{datatable.quiet}}{A logical, default \code{FALSE}. The master switch to suppress all
101+
\code{data.table} status messages, including the startup message.}
102+
\item{\code{datatable.verbose}}{A logical, default \code{FALSE}. If \code{TRUE}, \code{data.table} will
103+
print detailed diagnostic information as it processes a query.}
104+
\item{\code{datatable.enlist}}{Experimental feature. Default is \code{NULL}. If set to a function
105+
(e.g., \code{list}), the \code{j} expression can return a \code{list}, which will then
106+
be "enlisted" into columns in the result.}
107+
}
108+
}
109+
110+
\seealso{
111+
\code{\link[base]{options}},
112+
\code{\link[base]{getOption}},
113+
\code{\link{data.table}}
114+
}
115+
116+
\keyword{data}
117+
\keyword{utilities}

src/assign.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -277,10 +277,10 @@ int checkOverAlloc(SEXP x)
277277
if (!isInteger(x) && !isReal(x))
278278
error(_("getOption('datatable.alloccol') should be a number, by default 1024. But its type is '%s'."), type2char(TYPEOF(x)));
279279
if (LENGTH(x) != 1)
280-
error(_("getOption('datatable.alloc') is a numeric vector ok but its length is %d. Its length should be 1."), LENGTH(x));
280+
error(_("getOption('datatable.alloccol') is a numeric vector ok but its length is %d. Its length should be 1."), LENGTH(x));
281281
int ans = asInteger(x);
282282
if (ans<0)
283-
error(_("getOption('datatable.alloc')==%d. It must be >=0 and not NA."), ans);
283+
error(_("getOption('datatable.alloccol')==%d. It must be >=0 and not NA."), ans);
284284
return ans;
285285
}
286286

0 commit comments

Comments
 (0)