Skip to content

Commit 2071b3f

Browse files
add script
1 parent fe837d4 commit 2071b3f

File tree

3 files changed

+168
-114
lines changed

3 files changed

+168
-114
lines changed

.ci/check-options.R

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Run with: Rscript .ci/check-options.R
2+
cat(">> Checking data.table options documentation consistency\n")
3+
4+
# Scan R source code for data.table options
5+
code_opts <- sort(unique(gsub('["\']', '',
6+
unlist(lapply( list.files("R", pattern = "\\.R$", full.names = TRUE),
7+
function(f) {
8+
lines <- suppressWarnings(readLines(f, warn = FALSE))
9+
regmatches(lines, gregexpr('["\'](datatable\\.[.A-Za-z0-9]+)["\']', lines))
10+
}
11+
))
12+
)))
13+
14+
# Scan the documentation file for data.table options
15+
doc_file <- "man/data.table-options.Rd"
16+
if (!file.exists(doc_file)) stop("CRITICAL: '", doc_file, "' not found.")
17+
doc_opts <- sort(unique(unlist(
18+
regmatches(readLines(doc_file, warn = FALSE), gregexpr("(?<=\\\\code\\{)datatable\\.[^}]+", readLines(doc_file, warn = FALSE), perl = TRUE))
19+
)))
20+
21+
# Compare the final lists and report status
22+
cat(sprintf(" Found %d options in code, %d in documentation.\n", length(code_opts), length(doc_opts)))
23+
24+
miss_in_doc <- setdiff(code_opts, doc_opts)
25+
miss_in_code <- setdiff(doc_opts, doc_opts)
26+
27+
if (length(miss_in_doc) || length(miss_in_code)) {
28+
message(" Mismatch in data.table options documentation:")
29+
if (length(miss_in_doc)) {
30+
message(" In code but MISSING from docs:\n - ", paste(miss_in_doc, collapse="\n - "))
31+
}
32+
if (length(miss_in_code)) {
33+
message("\n In docs but NOT in code (check for typos/deprecation):\n - ", paste(miss_in_code, collapse="\n - "))
34+
}
35+
quit(status = 1)
36+
}
37+
38+
message(" Options documentation is perfectly in sync.")

.github/workflows/code-quality.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,3 +77,15 @@ jobs:
7777
- name: Lint
7878
run: for (f in list.files('.ci/linters/md', full.names=TRUE)) source(f)
7979
shell: Rscript {0}
80+
build:
81+
runs-on: ubuntu-latest
82+
steps:
83+
- uses: actions/checkout@v2
84+
- uses: r-lib/actions/setup-r@v2
85+
- name: Install dependencies
86+
run: Rscript -e 'remotes::install_deps(dependencies = TRUE)'
87+
- name: Check documentation and options consistency
88+
run: Rscript .ci/code-quality-checks.R
89+
- name: Run R CMD check
90+
run: Rscript -e 'devtools::check()'
91+

man/data.table-options.Rd

Lines changed: 118 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -24,124 +24,128 @@ options(datatable.print.topn = 10)
2424
}
2525

2626
\section{Printing Options}{
27-
\details{
28-
\describe{
29-
\item{\code{datatable.print.topn}}{An integer. When a data.table is printed,
30-
only the first topn and last topn rows are displayed. This prevents
31-
cluttering the console for large tables.
32-
Default: \code{5L}. See \code{\link{print.data.table}}.}
33-
\item{\code{datatable.print.nrows}}{An integer. The total number of rows
34-
to print before the topn logic is triggered.
35-
Default: \code{100L}.}
36-
\item{\code{datatable.print.class}}{A logical. If \code{TRUE}, the class of
37-
each column is printed below the column name.
38-
Default: \code{FALSE}.}
39-
\item{\code{datatable.print.keys}}{A logical. If \code{TRUE}, the table's
40-
keys are printed above the data.
41-
Default: \code{FALSE}.}
42-
\item{\code{datatable.print.trunc.cols}}{A logical. If \code{TRUE} and a
43-
table has more columns than fit on the screen, it truncates the middle
44-
columns for printing (e.g., a, b, ..., z). If \code{FALSE}, it wraps
45-
the columns to the next line.
46-
Default: \code{FALSE}.}
47-
\item{\code{datatable.prettyprint.char}}{An integer. The maximum number of
48-
characters to display in a character column cell before truncating
49-
with ....
50-
Default: \code{100L}.}
51-
}
52-
}
53-
54-
\section{File I/O Options (fread and fwrite)}{
55-
\describe{
56-
\item{\code{datatable.fread.input.cmd.message}}{A logical. If \code{TRUE},
57-
fread will print the shell command it is using when the input is a
58-
command (e.g., fread("grep ...")).
59-
Default: \code{TRUE}. See \code{\link{fread}}.}
60-
\item{\code{datatable.fread.datatable}}{A logical. If \code{TRUE}, fread
61-
returns a data.table. If FALSE, it returns a data.frame.
62-
Default: \code{TRUE}.}
63-
\item{\code{datatable.integer64}}{A character string. Controls how fread
64-
handles 64-bit integers. Can be "integer64" (requires bit64 package),
65-
"double" (loses precision), or "character" (reads as text).
66-
Default: \code{"integer64"}.}
67-
\item{\code{datatable.logical01}}{A logical. If \code{TRUE}, fread will
68-
interpret columns containing only 0 and 1 as logical FALSE/TRUE.
69-
Default: \code{FALSE}.}
70-
\item{\code{datatable.showProgress}}{An integer or logical. Controls whether
71-
long-running operations like fread display a progress bar. Default
72-
is \code{interactive()}, showing it only in interactive R sessions.}
73-
}
74-
}
27+
\describe{
28+
\item{\code{datatable.print.topn}}{An integer. When a data.table is printed,
29+
only the first topn and last topn rows are displayed.
30+
Default: \code{5L}. See \code{\link{print.data.table}}.}
31+
\item{\code{datatable.print.nrows}}{An integer. The total number of rows
32+
to print before the topn logic is triggered.
33+
Default: \code{100L}.}
34+
\item{\code{datatable.print.class}}{A logical. If \code{TRUE}, the class of
35+
each column is printed below its name.
36+
Default: \code{FALSE}.}
37+
\item{\code{datatable.print.keys}}{A logical. If \code{TRUE}, the table's
38+
keys are printed above the data.
39+
Default: \code{FALSE}.}
40+
\item{\code{datatable.show.indices}}{A logical. A synonym for `datatable.print.keys` for historical reasons.
41+
Default: \code{TRUE}.}
42+
\item{\code{datatable.print.trunc.cols}}{A logical. If \code{TRUE} and a
43+
table has more columns than fit on the screen, it truncates the middle
44+
columns.
45+
Default: \code{FALSE}.}
46+
\item{\code{datatable.prettyprint.char}}{An integer. The maximum number of
47+
characters to display in a character column cell before truncating.
48+
Default: \code{100L}.}
49+
\item{\code{datatable.print.colnames}}{A logical. If \code{TRUE}, prints column names.
50+
Default: \code{TRUE}.}
51+
\item{\code{datatable.print.rownames}}{A logical. If \code{TRUE}, prints row numbers.
52+
Default: \code{TRUE}.}
53+
}
54+
}
7555
76-
\section{Join and Subset Options}{
77-
\describe{
78-
\item{\code{datatable.allow.cartesian}}{A logical. A safety feature to prevent
79-
accidental memory-exploding joins. If FALSE (default), a join is not
80-
allowed if the result would be more rows than max(nrow(x), nrow(i)),
81-
which occurs when a row in i matches more than one row in x.
82-
Default: \code{FALSE}. See \code{\link{data.table}}.}
83-
\item{\code{datatable.nomatch}}{Controls the behavior of non-matching rows in
84-
a join. The default is NA, which returns NA for columns of x when a
85-
row in i has no match. Can be set to 0L to drop non-matching rows,
86-
behaving like an inner join.
87-
Default: \code{NA}.}
88-
}
89-
}
56+
\section{File I/O Options (fread and fwrite)}{
57+
\describe{
58+
\item{\code{datatable.fread.input.cmd.message}}{A logical. If \code{TRUE},
59+
`fread` will print the shell command it is using when the input is a
60+
command (e.g., `fread("grep ...")`).
61+
Default: \code{TRUE}. See \code{\link{fread}}.}
62+
\item{\code{datatable.fread.datatable}}{A logical. If \code{TRUE}, `fread`
63+
returns a `data.table`. If `FALSE`, it returns a `data.frame`.
64+
Default: \code{TRUE}.}
65+
\item{\code{datatable.integer64}}{A character string. Controls how `fread`
66+
handles 64-bit integers. Can be "integer64", "double", or "character".
67+
Default: \code{"integer64"}.}
68+
\item{\code{datatable.logical01}}{A logical. If \code{TRUE}, `fread` will
69+
interpret columns containing only 0 and 1 as logical.
70+
Default: \code{FALSE}.}
71+
\item{\code{datatable.keepLeadingZeros}}{A logical. If \code{TRUE}, `fread`
72+
preserves leading zeros in character columns by reading them as strings;
73+
otherwise they may be coerced to numeric.
74+
Default: \code{FALSE}.}
75+
\item{\code{datatable.logicalYN}}{A logical. If \code{TRUE}, `fread`
76+
will interpret "Y" and "N" as logical.
77+
Default: \code{FALSE}.}
78+
\item{\code{datatable.na.strings}}{A character vector. Global default for strings that
79+
`fread` should interpret as `NA`.
80+
Default: \code{"NA"}.}
81+
\item{\code{datatable.fwrite.sep}}{A character string. The default separator
82+
used by `fwrite`.
83+
Default: \code{","}.}
84+
\item{\code{datatable.showProgress}}{An integer or logical. Controls whether
85+
long-running operations like `fread` display a progress bar. Default
86+
is \code{interactive()}.}
87+
}
88+
}
9089
91-
\section{Performance and Indexing Options}{
92-
\describe{
93-
\item{\code{datatable.auto.index}}{A logical. If \code{TRUE}, data.table
94-
automatically creates a secondary index on-the-fly the first time a column
95-
is used in a query (e.g., DT[col == 'value']). This dramatically
96-
speeds up all subsequent queries on that same column.
97-
Default: \code{TRUE}.}
98-
\item{\code{datatable.use.index}}{A logical. A global switch to control
99-
whether existing indices are used for subsetting.
100-
Default: \code{TRUE}.}
101-
\item{\code{datatable.optimize}}{An integer controlling the GForce query
102-
optimization engine. Set to Inf to enable all possible optimizations,
103-
which data.table does by default. See \code{\link{datatable.optimize}}.
104-
Default: \code{Inf}.}
105-
\item{\code{datatable.alloccol}}{An integer. data.table pre-allocates
106-
memory for a certain number of columns when first created. This option
107-
controls the length of this pre-allocation, improving performance when
108-
adding many columns via :=. See \code{\link{alloc.col}}.
109-
Default: \code{1024L}.}
110-
}
111-
}
90+
\section{Join and Subset Options}{
91+
\describe{
92+
\item{\code{datatable.allow.cartesian}}{A logical. A safety feature. If `FALSE`, a join
93+
is not allowed if the result would have more rows than the largest of the two tables.
94+
Default: \code{FALSE}. See \code{\link{data.table}}.}
95+
\item{\code{datatable.nomatch}}{Controls the behavior of non-matching rows in
96+
a join. The default is `NA`. Can be set to `0L` to drop non-matching rows.
97+
Default: \code{NA}.}
98+
}
99+
}
112100
113-
\section{Development and Verbosity Options}{
114-
\describe{
115-
\item{\code{datatable.verbose}}{A logical. If \code{TRUE}, data.table will
116-
print detailed, step-by-step diagnostic information as it processes a
117-
query. Extremely useful for debugging and performance tuning.
118-
Default: \code{FALSE}.}
119-
\item{\code{datatable.pedantic}}{A logical. If \code{TRUE}, data.table
120-
enters a "pedantic" mode, issuing helpful warnings for situations that
121-
are not technically errors but might be unintentional (e.g., when a
122-
variable in j is found in the global environment instead of inside the
123-
data.table).
124-
Default: \code{FALSE}.}
125-
\item{\code{datatable.dfdispatchwarn}}{A logical. If \code{TRUE}, warns
126-
when a generic function from another package (e.g., dplyr::filter) is
127-
applied to a data.table. This can be a useful reminder that you are not
128-
using data.table's optimized methods.
129-
Default: \code{TRUE}.}
130-
}
131-
}
101+
\section{Performance and Indexing Options}{
102+
\describe{
103+
\item{\code{datatable.auto.index}}{A logical. If \code{TRUE}, `data.table`
104+
automatically creates a secondary index on-the-fly when a column is first
105+
used in a subset, speeding up all subsequent queries.
106+
Default: \code{TRUE}.}
107+
\item{\code{datatable.use.index}}{A logical. A global switch to control
108+
whether existing secondary indices are used for subsetting.
109+
Default: \code{TRUE}.}
110+
\item{\code{datatable.forder.auto.index}}{A logical. Similar to `datatable.auto.index`,
111+
but applies to ordering operations (`forder`).
112+
Default: \code{TRUE}.}
113+
\item{\code{datatable.optimize}}{An integer controlling the GForce query
114+
optimization engine. The default enables all possible optimizations.
115+
See \code{\link{datatable.optimize}}.
116+
Default: \code{Inf}.}
117+
\item{\code{datatable.alloccol}}{An integer. Controls the number of column
118+
slots to pre-allocate, improving performance when adding many columns.
119+
See \code{\link{alloc.col}}.
120+
Default: \code{1024L}.}
121+
\item{\code{datatable.reuse.sorting}}{A logical. If `TRUE`, `data.table`
122+
can reuse the sorted order of a table in joins, improving performance.
123+
Default: \code{TRUE}.}
124+
}
125+
}
132126
133-
\section{Internal Strings (Not User Options)}{
134-
\describe{
135-
\item{The following strings are not options to be set by users.}{They are
136-
included here to assist developers searching the source code. They are typically
137-
parts of URLs found in code comments or documentation.
138-
\itemize{
139-
\item \code{datatable.com}
140-
\item \code{datatable.github.io}
141-
\item \code{datatable.gitlab.io}
142-
}
143-
}
144-
}
127+
\section{Development and Verbosity Options}{
128+
\describe{
129+
\item{\code{datatable.quiet}}{A logical. The master switch to suppress all
130+
`data.table` status messages, including the startup message.
131+
Default: \code{FALSE}.}
132+
\item{\code{datatable.verbose}}{A logical. If \code{TRUE}, `data.table` will
133+
print detailed diagnostic information as it processes a query.
134+
Default: \code{FALSE}.}
135+
\item{\code{datatable.pedantic}}{A logical. If \code{TRUE}, `data.table`
136+
enters a "pedantic" mode, issuing helpful warnings for potentially
137+
unintentional user behavior.
138+
Default: \code{FALSE}.}
139+
\item{\code{datatable.dfdispatchwarn}}{A logical. If \code{TRUE}, warns
140+
when a generic function from another package is applied to a `data.table`.
141+
Default: \code{TRUE}.}
142+
\item{\code{datatable.warnredundantby}}{A logical. If \code{TRUE}, `data.table`
143+
will warn when grouping by columns that are already the key of the table.
144+
Default: \code{TRUE}.}
145+
\item{\code{datatable.enlist}}{Experimental feature. If set to a function
146+
(e.g., `list`), the `j` expression can return a `list`, which will then
147+
be "enlisted" into columns in the result.
148+
Default: \code{NULL}.}
145149
}
146150
}
147151

0 commit comments

Comments
 (0)