Skip to content

Commit 0c32a51

Browse files
committed
Merge branch 'master' into GHA-sanitizers
2 parents ff4d1ab + fd2a915 commit 0c32a51

38 files changed

+332
-335
lines changed

.ci/.lintr.R

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,10 @@ linters = c(dt_linters, all_linters(
99
packages = "lintr", # TODO(lintr->3.2.0): Remove this.
1010
# eq_assignment_linter(),
1111
brace_linter(allow_single_line = TRUE),
12+
implicit_integer_linter(allow_colon = TRUE),
1213
# TODO(michaelchirico): Activate these incrementally. These are the
1314
# parameterizations that match our style guide.
1415
# implicit_assignment_linter(allow_lazy = TRUE, allow_scoped = TRUE),
15-
# implicit_integer_linter(allow_colon = TRUE),
1616
# system_time_linter = undesirable_function_linter(c(
1717
# system.time = "Only run timings in benchmark.Rraw"
1818
# )),
@@ -85,6 +85,7 @@ exclusions = c(local({
8585
undesirable_function_linter = Inf
8686
)),
8787
exclusion_for_dir(c("vignettes", "vignettes/fr"), list(
88+
implicit_integer_linter = Inf,
8889
quotes_linter = Inf,
8990
sample_int_linter = Inf
9091
# strings_as_factors_linter = Inf

.ci/linters/c/alloc_linter.R

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@
44
# 2. Check the next line for a check like 'if (!x || !y)'
55
alloc_linter = function(c_obj) {
66
lines = c_obj$lines
7-
# Be a bit more precise to avoid mentions in comments
8-
alloc_lines = grep(R"{=\s*([(]\w+\s*[*][)])?[mc]alloc[(]}", lines)
7+
# Be a bit more precise to avoid mentions in comments, and allow
8+
# malloc(0) to be used for convenience (e.g. #6757)
9+
alloc_lines = grep(R"{=\s*([(]\w+\s*[*][)])?[mc]alloc[(][^0]}", lines)
910
if (!length(alloc_lines)) return()
1011
# int *tmp=(int*)malloc(...); or just int tmp=malloc(...);
1112
alloc_keys = lines[alloc_lines] |>

.dev/CRAN_Release.cmd

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -114,12 +114,6 @@ grep -P "\t" ./src/*.c
114114
grep -n "[^A-Za-z0-9]T[^A-Za-z0-9]" ./inst/tests/tests.Rraw
115115
grep -n "[^A-Za-z0-9]F[^A-Za-z0-9]" ./inst/tests/tests.Rraw
116116

117-
# All integers internally should have L suffix to avoid lots of one-item coercions
118-
# Where 0 numeric is intended we should perhaps use 0.0 for clarity and make the grep easier
119-
# 1) tolerance=0 usages in setops.R are valid numeric 0, as are anything in strings
120-
# 2) leave the rollends default using roll>=0 though; comments in PR #3803
121-
grep -Enr "^[^#]*(?:\[|==|>|<|>=|<=|,|\(|\+)\s*[-]?[0-9]+[^0-9L:.e]" R | grep -Ev "stop|warning|tolerance"
122-
123117
# Never use ifelse. fifelse for vectors when necessary (nothing yet)
124118
grep -Enr "\bifelse" R
125119

@@ -135,10 +129,6 @@ grep -Fn "tryCatch" ./inst/tests/*.Rraw
135129
# All % in *.Rd should be escaped otherwise text gets silently chopped
136130
grep -n "[^\]%" ./man/*.Rd
137131

138-
# if (a & b) is either invalid or inefficient (ditto for replace & with |);
139-
# if(any(a [&|] b)) is appropriate b/c of collapsing the logical vector to scalar
140-
grep -nr "^[^#]*if[^&#]*[^&#\"][&][^&]" R | grep -Ev "if\s*[(](?:any|all)"
141-
142132
# seal leak potential where two unprotected API calls are passed to the same
143133
# function call, usually involving install() or mkChar()
144134
# Greppable thanks to single lines and wide screens

.github/workflows/pkgup.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ jobs:
5050
cp -R ${{ env.R_LIBS_USER }} library
5151
R CMD INSTALL --library="library" $(ls -1t data.table_*.tar.gz | head -n 1) --html
5252
mkdir -p doc/html
53-
cp /usr/share/R/doc/html/{left.jpg,up.jpg,Rlogo.svg,R.css,index.html} doc/html
53+
cp $(R RHOME)/doc/html/{left.jpg,up.jpg,Rlogo.svg,R.css,index.html} doc/html
5454
Rscript -e 'utils::make.packages.html("library", docdir="doc")'
5555
sed -i "s|file://|../..|g" doc/html/packages.html
5656
mkdir -p public

CODEOWNERS

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,11 @@
6868
# docs
6969
/man/openmp-utils.Rd @Anirban166
7070
/Seal_of_Approval.md @tdhock
71+
/GOVERNANCE.md: @Rdatatable/committers
7172

7273
# GLCI
7374
.gitlab-ci.yml @jangorecki @ben-schwen
75+
76+
# C code tricks
77+
/src/chmatch.c @aitap
78+
/src/fread.c @aitap

GOVERNANCE.md

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,9 @@ A pull request can be merged by any committer, if there is one approving review,
9494

9595
## Changing this GOVERNANCE.md document
9696

97-
There is no special process for changing this document (submit a PR
98-
and ask for review).
97+
There is no special process for changing this document. Submit a PR and ask for review; the group `@Rdatatable/committers` will automatically be assigned to ensure all current Committers are aware of the change.
98+
99+
Please also make a note in the change log under [`# Governance history`](#governance-history)
99100

100101
# Code of conduct
101102

@@ -123,6 +124,8 @@ data.table Version line in DESCRIPTION typically has the following meanings
123124

124125
# Governance history
125126

127+
Jan 2025: clarify that edits to governance should notify all committers.
128+
126129
Feb 2024: change team name/link maintainers to committers, to be consistent with role defined in governance.
127130

128131
Nov-Dec 2023: initial version drafted by Toby Dylan Hocking and

NEWS.md

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ rowwiseDT(
6969

7070
6. `fread()` gains `logicalYN` argument to read columns consisting only of strings `Y`, `N` as `logical` (as opposed to character), [#4563](https://github.com/Rdatatable/data.table/issues/4563). The default is controlled by option `datatable.logicalYN`, itself defaulting to `FALSE`, for back-compatibility -- some smaller tables (especially sharded tables) might inadvertently read a "true" string column as `logical` and cause bugs. This is particularly important for tables with a column named `y` or `n` -- automatic header detection under `logicalYN=TRUE` will see these values in the first row as being "data" as opposed to column names. A parallel option was not included for `fwrite()` at this time -- users looking for a compact representation of logical columns can still use `fwrite(logical01=TRUE)`. We also opted for now to check only `Y`, `N` and not `Yes`/`No`/`YES`/`NO`.
7171

72-
7. `fwrite()` with `compress="gzip"` produces compatible gz files when composed of multiple independent chunks owing to parallelization, [#6356](https://github.com/Rdatatable/data.table/issues/6356). Earlier `fwrite()` versions could have issues with HTTP upload using `Content-Encoding: gzip` and `Transfer-Encoding: chunked`. Thanks to @oliverfoster for report and @philippechataignon for the fix.
72+
7. `fwrite()` with `compress="gzip"` produces compatible gz files when composed of multiple independent chunks owing to parallelization, [#6356](https://github.com/Rdatatable/data.table/issues/6356). Earlier `fwrite()` versions could have issues with HTTP upload using `Content-Encoding: gzip` and `Transfer-Encoding: chunked`. Thanks to @oliverfoster for report and @philippechataignon for the fix. Thanks also @aitap for pre-release testing that found some possible memory leaks in the initial fix.
7373

7474
8. `fwrite()` gains a new parameter `compressLevel` to control compression level for gzip, [#5506](https://github.com/Rdatatable/data.table/issues/5506). This parameter balances compression speed and total compression, and corresponds directly to the analogous command-line parameter, e.g. `compressLevel=4` corresponds to passing `-4`; the default, `6`, matches the command-line default, i.e. equivalent to passing `-6`. Thanks @mgarbuzov for the request and @philippechataignon for implementing.
7575

@@ -109,31 +109,29 @@ rowwiseDT(
109109

110110
8. Fixed possible segfault in `setDT(df); attr(df, key) <- value; set(df, ...)`, i.e. adding columns to an object with `set()` that was converted to data.table with `setDT()` and later had attributes add with `attr<-`, [#6410](https://github.com/Rdatatable/data.table/issues/6410). Thanks to @hongyuanjia for the report and @ben-schwen for the PR. Note that `setattr()` should be preferred for adding attributes to a data.table.
111111

112-
9. `setDT()` no longer modifies the class of other names bound to the origin data.frame, e.g., in `DF1 <- data.frame(a=1); DF2 <- DF1; setDT(DF2)`, `DF1`'s class will not change. [#4784](https://github.com/Rdatatable/data.table/issues/4784). Thanks @OfekShilon for the report and fix.
112+
9. `DT[1, on=NULL]` now works for returning the first row, [#6579](https://github.com/Rdatatable/data.table/issues/6579). Thanks to @Kodiologist for the report and @tdhock for the PR.
113113

114-
10. `DT[1, on=NULL]` now works for returning the first row, [#6579](https://github.com/Rdatatable/data.table/issues/6579). Thanks to @Kodiologist for the report and @tdhock for the PR.
114+
10. `tables()` now returns the correct size for data.tables over 2GiB, [#6607](https://github.com/Rdatatable/data.table/issues/6607). Thanks to @vlulla for the report and the PR.
115115

116-
11. `tables()` now returns the correct size for data.tables over 2GiB, [#6607](https://github.com/Rdatatable/data.table/issues/6607). Thanks to @vlulla for the report and the PR.
116+
11. `rbindlist(l, use.names=TRUE)` can now handle different encodings for the column names in different entries of `l`, [#5452](https://github.com/Rdatatable/data.table/issues/5452). Thanks to @MEO265 for the report, and Benjamin Schwendinger for the fix.
117117

118-
12. `rbindlist(l, use.names=TRUE)` can now handle different encodings for the column names in different entries of `l`, [#5452](https://github.com/Rdatatable/data.table/issues/5452). Thanks to @MEO265 for the report, and Benjamin Schwendinger for the fix.
118+
12. Added a `data.frame` method for `format_list_item()` to fix error printing data.tables with columns containing 1-column data.frames, [#6592](https://github.com/Rdatatable/data.table/issues/6592). Thanks to @r2evans for the bug report and fix.
119119

120-
13. Added a `data.frame` method for `format_list_item()` to fix error printing data.tables with columns containing 1-column data.frames, [#6592](https://github.com/Rdatatable/data.table/issues/6592). Thanks to @r2evans for the bug report and fix.
121-
122-
14. Auto-printing gets some substantial improvements
120+
13. Auto-printing gets some substantial improvements
123121
- Suppression in `knitr` documents is now done by implementing a method for `knit_print` instead of looking up the call stack, [#6589](https://github.com/Rdatatable/data.table/pull/6589). The old way was fragile and wound up broken by some implementation changes in {knitr}. Thanks to @jangorecki for the report [#6509](https://github.com/Rdatatable/data.table/issues/6509) and @aitap for the fix.
124122
- `print()` methods for S3 subclasses of data.table (e.g. an object of class `c("my.table", "data.table", "data.frame")`) no longer print where plain data.tables wouldn't, e.g. `myDT[, y := 2]`, [#3029](https://github.com/Rdatatable/data.table/issues/3029). The improved detection of auto-printing scenarios has the added benefit of _allowing_ print in highly explicit statements like `print(DT[, y := 2])`, obviating our recommendation since v1.9.6 to append `[]` to signal "please print me".
125123
126-
15. Joins of `integer64` and `double` columns succeed when the `double` column has lossless `integer64` representation, [#4167](https://github.com/Rdatatable/data.table/issues/4167) and [#6625](https://github.com/Rdatatable/data.table/issues/6625). Previously, this only worked when the double column had lossless _32-bit_ integer representation. Thanks @MichaelChirico for the reports and fix.
124+
14. Joins of `integer64` and `double` columns succeed when the `double` column has lossless `integer64` representation, [#4167](https://github.com/Rdatatable/data.table/issues/4167) and [#6625](https://github.com/Rdatatable/data.table/issues/6625). Previously, this only worked when the double column had lossless _32-bit_ integer representation. Thanks @MichaelChirico for the reports and fix.
127125
128-
16. `DT[order(...)]` better matches `base::order()` behavior by (1) recognizing the `method=` argument (and erroring since this is not supported) and (2) accepting a vector of `TRUE`/`FALSE` in `decreasing=` as an alternative to using `-a` to convey "sort `a` decreasing", [#4456](https://github.com/Rdatatable/data.table/issues/4456). Thanks @jangorecki for the FR and @MichaelChirico for the PR.
126+
15. `DT[order(...)]` better matches `base::order()` behavior by (1) recognizing the `method=` argument (and erroring since this is not supported) and (2) accepting a vector of `TRUE`/`FALSE` in `decreasing=` as an alternative to using `-a` to convey "sort `a` decreasing", [#4456](https://github.com/Rdatatable/data.table/issues/4456). Thanks @jangorecki for the FR and @MichaelChirico for the PR.
129127
130-
17. Assignment with `:=` to an S4 slot of an under-allocated data.table now works, [#6704](https://github.com/Rdatatable/data.table/issues/6704). Thanks @MichaelChirico for the report and fix.
128+
16. Assignment with `:=` to an S4 slot of an under-allocated data.table now works, [#6704](https://github.com/Rdatatable/data.table/issues/6704). Thanks @MichaelChirico for the report and fix.
131129
132-
18. `as.data.table()` method for `data.frame`s (especially those with extended classes) is more consistent with `as.data.frame()` with respect to rention of attributes, [#5699](https://github.com/Rdatatable/data.table/issues/5699). Thanks @jangorecki for the report and fix.
130+
17. `as.data.table()` method for `data.frame`s (especially those with extended classes) is more consistent with `as.data.frame()` with respect to rention of attributes, [#5699](https://github.com/Rdatatable/data.table/issues/5699). Thanks @jangorecki for the report and fix.
133131
134-
19. Grouped queries on keyed tables no longer return an incorrectly keyed result if the _ad hoc_ `by=` list has some function call (in particular, a function which happens to return a strictly decreasing function of the keys), e.g. `by=.(a = rev(a))`, [#5583](https://github.com/Rdatatable/data.table/issues/5583). Thanks @AbrJA for the report and @MichaelChirico for the fix.
132+
18. Grouped queries on keyed tables no longer return an incorrectly keyed result if the _ad hoc_ `by=` list has some function call (in particular, a function which happens to return a strictly decreasing function of the keys), e.g. `by=.(a = rev(a))`, [#5583](https://github.com/Rdatatable/data.table/issues/5583). Thanks @AbrJA for the report and @MichaelChirico for the fix.
135133
136-
20. Assigning `list(NULL)` to a list column now replaces the column with `list(NULL)`, instead of deleting the column [#5558](https://github.com/Rdatatable/data.table/issues/5558). This behavior is now consistent with base `data.frame`. Thanks @tdhock for the report and @joshhwuu for the fix. This is due to a fundamental ambiguity from both allowing list columns _and_ making the use of `list()` to wrap `j=` arguments optional. We think that the code behaves as expected in all cases now. See the below for some illustration:
134+
19. Assigning `list(NULL)` to a list column now replaces the column with `list(NULL)`, instead of deleting the column [#5558](https://github.com/Rdatatable/data.table/issues/5558). This behavior is now consistent with base `data.frame`. Thanks @tdhock for the report and @joshhwuu for the fix. This is due to a fundamental ambiguity from both allowing list columns _and_ making the use of `list()` to wrap `j=` arguments optional. We think that the code behaves as expected in all cases now. See the below for some illustration:
137135
138136
```r
139137
DT = data.table(L=list(1L), i=2L, c='a')
@@ -154,7 +152,7 @@ rowwiseDT(
154152
DT[, c('L', 'i') := list(NULL, 3L)] # delete L, assign to i
155153
DT[, c('L', 'i') := list(list(NULL), NULL)] # assign to L, delete i
156154
```
157-
21. An integer overflow in `fread()` with lines longer than `2^(31/2)` bytes is prevented, [#6729](https://github.com/Rdatatable/data.table/issues/6729). The typical impact was no worse than a wrong initial allocation size, corrected later. Thanks to @TaikiSan21 for the report and @aitap for the fix.
155+
20. An integer overflow in `fread()` with lines longer than `2^(31/2)` bytes is prevented, [#6729](https://github.com/Rdatatable/data.table/issues/6729). The typical impact was no worse than a wrong initial allocation size, corrected later. Thanks to @TaikiSan21 for the report and @aitap for the fix.
158156
159157
## NOTES
160158
@@ -176,11 +174,14 @@ rowwiseDT(
176174

177175
9. `key<-`, marked as deprecated since 2012 and unusable since v1.15.0, has been fully removed.
178176

179-
10. Deprecation of `logicalAsInt` argument to `fwrite()` has been upgraded from a warning (since v1.15.0) to an error. It will be removed in the next release.
177+
10. The following in-progress deprecations have proceeded:
180178

181-
11. Deprecation of `fread(autostart=)` has been upgraded to an error. It has been warning since v1.11.0 (6 years ago). The argument will be removed in the next release.
179+
+ Using `fwrite(logicalAsInt=)` has been upgraded from a warning (since v1.15.0) to an error. It will be removed in the next release.
180+
+ Using `fread(autostart=)` has been upgraded to an error. It has been warning since v1.11.0 (6 years ago). The argument will be removed in the next release.
181+
+ Using `droplevels(in.place=TRUE)` (warning since v1.16.0) has been upgraded from warning to error. The argument will be removed in the next release.
182+
+ Use of `:=` and `with=FALSE` in `[` has been upgraded from warning (since v1.15.0) to error. Long ago (before 2014), this was needed when, e.g., assigning to a vector of column names defined outside the table, but `with=FALSE` is no longer needed to do so: `DT[, (cols) := ...]` works fine.
182183

183-
12. Deprecation of `droplevels(in.place=TRUE)` (warning since v1.16.0) has been upgraded from warning to error. The argument will be removed in the next release.
184+
11. Better handling of multibyte characters in `print()`, added in 1.16.0, has the side effect of possibly ignoring invisible characters like `\n` or `\t` for the purposes of counting width for `datatable.prettyprint.char`. That's because we switched to using `strtrim()` over `substring()`, the latter of which is explicitly discouraged for the purposes of truncating strings, whereas the former of which has platform-dependent behavior for whether invisible characters count towards string width.
184185
185186
# data.table [v1.16.4](https://github.com/Rdatatable/data.table/milestone/36) 4 December 2024
186187

R/IDateTime.R

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ as.ITime.POSIXlt = function(x, ms = 'truncate', ...) {
186186
}
187187

188188
as.ITime.times = function(x, ms = 'truncate', ...) {
189-
secs = 86400 * (unclass(x) %% 1)
189+
secs = 86400L * (unclass(x) %% 1L)
190190
secs = clip_msec(secs, ms)
191191
(setattr(secs, "class", "ITime")) # the first line that creates sec will create a local copy so we can use setattr() to avoid potential copy of class()<-
192192
}
@@ -238,16 +238,16 @@ rep.ITime = function(x, ...)
238238
round.ITime <- function(x, digits = c("hours", "minutes"), ...)
239239
{
240240
(setattr(switch(match.arg(digits),
241-
hours = as.integer(round(unclass(x)/3600)*3600),
242-
minutes = as.integer(round(unclass(x)/60)*60)),
241+
hours = as.integer(round(unclass(x)/3600.0)*3600.0),
242+
minutes = as.integer(round(unclass(x)/60.0)*60.0)),
243243
"class", "ITime"))
244244
}
245245

246246
trunc.ITime <- function(x, units = c("hours", "minutes"), ...)
247247
{
248248
(setattr(switch(match.arg(units),
249-
hours = as.integer(unclass(x)%/%3600*3600),
250-
minutes = as.integer(unclass(x)%/%60*60)),
249+
hours = as.integer(unclass(x)%/%3600.0*3600.0),
250+
minutes = as.integer(unclass(x)%/%60.0*60.0)),
251251
"class", "ITime"))
252252
}
253253

@@ -280,7 +280,7 @@ IDateTime.default = function(x, ...) {
280280

281281
# POSIXt support
282282

283-
as.POSIXct.IDate = function(x, tz = "UTC", time = 0, ...) {
283+
as.POSIXct.IDate = function(x, tz = "UTC", time = 0.0, ...) {
284284
if (missing(time) && inherits(tz, "ITime")) {
285285
time = tz # allows you to use time as the 2nd argument
286286
tz = "UTC"

R/as.data.table.R

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,7 @@ as.data.table.list = function(x,
214214
}
215215

216216
as.data.table.data.frame = function(x, keep.rownames=FALSE, key=NULL, ...) {
217+
if (is.data.table(x)) return(as.data.table.data.table(x)) # S3 is weird, #6739. Also # nocov; this is tested in 2302.{2,3}, not sure why it doesn't show up in coverage.
217218
if (!identical(class(x), "data.frame")) return(as.data.table(as.data.frame(x)))
218219
if (!isFALSE(keep.rownames)) {
219220
# can specify col name to keep.rownames, #575; if it's the same as key,

0 commit comments

Comments
 (0)