Skip to content

Commit 7827275

Browse files
committed
Merge branch 'master' of https://github.com/Rdatatable/data.table into issue____2611
2 parents 8f6d2b2 + 4d0b4a5 commit 7827275

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+3497
-910
lines changed

.github/workflows/pkgup.yaml

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ on:
1212
branches:
1313
- 'master'
1414

15-
name: pkgdown-deploy
15+
name: pkgup
1616

1717
jobs:
1818
build:
@@ -39,6 +39,7 @@ jobs:
3939
Rscript -e 'stopifnot(file.copy("DESCRIPTION", file.path(tdir<-tempdir(), "PACKAGES"))); db<-available.packages(paste0("file://", tdir)); deps<-setdiff(tools::package_dependencies(read.dcf("DESCRIPTION", fields="Package")[[1L]], db, which="most")[[1L]], installed.packages(priority="high")[,"Package"]); if (length(deps)) { ap<-available.packages()[,"Version"]; ap<-ap[names(ap) %in% deps]; if (!all(deps%in%names(ap))) stop("dependencies are not avaiable in repository: ",paste(setdiff(deps, names(ap)), collapse=", ")); ip<-installed.packages()[,"Version"]; ip<-ip[names(ip) %in% deps]; pkgs<-ap[deps]>ip[deps]; install.packages(names(pkgs[pkgs|is.na(pkgs)]), INSTALL_opts="--html") }'
4040
- name: build
4141
run: |
42+
sed -i "0,/^Version: [0-9.]\+$/s//&-$(TZ=UTC git log -1 --format=%ct)/" ./DESCRIPTION
4243
echo "Revision:" $GITHUB_SHA >> ./DESCRIPTION
4344
R CMD build .
4445
- name: check
@@ -47,16 +48,9 @@ jobs:
4748
- name: manual
4849
if: github.ref == 'refs/heads/master'
4950
run: |
50-
cp -R ${{ env.R_LIBS_USER }} library
51-
R CMD INSTALL --library="library" $(ls -1t data.table_*.tar.gz | head -n 1) --html
52-
mkdir -p doc/html
53-
cp $(R RHOME)/doc/html/{left.jpg,up.jpg,Rlogo.svg,R.css,index.html} doc/html
54-
Rscript -e 'utils::make.packages.html("library", docdir="doc")'
55-
sed -i "s|file://|../..|g" doc/html/packages.html
51+
R CMD INSTALL $(ls -1t data.table_*.tar.gz | head -n 1)
5652
mkdir -p public
57-
mv doc public/doc
58-
cp -r --parents library/*/{html,help,doc,demo,DESCRIPTION,README,NEWS,README.md,NEWS.md} public 2>/dev/null || :
59-
sed -i 's|"/doc/html/|"/data.table/doc/html/|g' public/library/data.table/doc/index.html 2>/dev/null || :
53+
Rscript -e 'tools::pkg2HTML("data.table", out="public/index.html")'
6054
- name: repo
6155
if: github.ref == 'refs/heads/master'
6256
run: |

.gitlab-ci.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ build:
7777
- rm -r bus
7878
script:
7979
- sed -i '/^[[:space:]]*$/d' ./DESCRIPTION ## make last line end abruptly; i.e. without a final \n
80+
- |
81+
sed -i "0,/^Version: [0-9.]\+$/s//&-$(TZ=UTC git log -1 --format=%ct)/" ./DESCRIPTION
8082
- echo "Revision:" $CI_COMMIT_SHA >> ./DESCRIPTION
8183
- R CMD build .
8284
- mkdir -p bus/$CI_JOB_NAME/
@@ -314,6 +316,8 @@ test-win-old:
314316
tags:
315317
- saas-macos-medium-m1
316318
before_script:
319+
- curl -LO https://github.com/XQuartz/XQuartz/releases/download/XQuartz-2.8.5/XQuartz-2.8.5.pkg
320+
- sudo installer -pkg "$(ls -1t XQuartz-*.pkg | head -n 1)" -target /
317321
- curl -O $R_BIN
318322
- sudo installer -pkg "$(ls -1t R-*-arm64.pkg | head -n 1)" -target /
319323
- sudo Rscript -e "source('https://mac.R-project.org/bin/install.R'); install.libs('gettext')"

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ Version: 1.17.99
33
Title: Extension of `data.frame`
44
Depends: R (>= 3.4.0)
55
Imports: methods
6-
Suggests: bit64 (>= 4.0.0), bit (>= 4.0.4), R.utils, xts, zoo (>= 1.8-1), yaml, knitr, markdown
6+
Suggests: bit64 (>= 4.0.0), bit (>= 4.0.4), R.utils (>= 2.13.0), xts, zoo (>= 1.8-1), yaml, knitr, markdown
77
Description: Fast aggregation of large data (e.g. 100GB in RAM), fast ordered joins, fast add/modify/delete of columns by group using no copies at all, list columns, friendly and fast character-separated-value read/write. Offers a natural and flexible syntax, for faster development.
88
License: MPL-2.0 | file LICENSE
99
URL: https://r-datatable.com, https://Rdatatable.gitlab.io/data.table, https://github.com/Rdatatable/data.table

NAMESPACE

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,10 @@ S3method(rollup, data.table)
5555
export(frollmean)
5656
export(frollsum)
5757
export(frollmax)
58+
export(frollmin)
59+
export(frollprod)
5860
export(frollapply)
61+
export(frolladapt)
5962
export(nafill)
6063
export(setnafill)
6164
export(.Last.updated)
@@ -212,6 +215,7 @@ S3method(format_list_item, data.frame)
212215
export(fdroplevels, setdroplevels)
213216
S3method(droplevels, data.table)
214217
export(frev)
218+
export(.selfref.ok)
215219

216220
# sort_by added in R 4.4.0, #6662, https://stat.ethz.ch/pipermail/r-announce/2024/000701.html
217221
if (getRversion() >= "4.4.0") S3method(sort_by, data.table)

NEWS.md

Lines changed: 94 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,20 @@
2020
frollsum(c(1,2,3,Inf,5,6), 2)
2121
#[1] NA 3 5 Inf Inf 11
2222

23+
4. `frollapply` result is not coerced to numeric anymore. Users' code could possibly break if it depends on forced coercion of input/output to numeric type.
24+
```r
25+
## before
26+
frollapply(c(F,T,F,F,F,T), 2, any)
27+
#[1] NA 1 1 0 0 1
28+
29+
## now
30+
frollapply(c(F,T,F,F,F,T), 2, any)
31+
#[1] NA TRUE TRUE FALSE FALSE TRUE
32+
```
33+
Additionally argument names in `frollapply` has been renamed from `x` to `X` and `n` to `N` to avoid conflicts with common argument names that may be passed to `...`, aligning to base R API of `lapply`. `x` and `n` continue to work with a warning, for now.
34+
35+
5. Negative and missing values of `n` argument of adaptive rolling functions trigger an error.
36+
2337
### NOTICE OF INTENDED FUTURE POTENTIAL BREAKING CHANGES
2438
2539
1. `data.table(x=1, <expr>)`, where `<expr>` is an expression resulting in a 1-column matrix without column names, will eventually have names `x` and `V2`, not `x` and `V1`, consistent with `data.table(x=1, <expr>)` where `<expr>` results in an atomic vector, for example `data.table(x=1, cbind(1))` and `data.table(x=1, 1)` will both have columns named `x` and `V2`. In this release, the matrix case continues to be named `V1`, but the new behavior can be activated by setting `options(datatable.old.matrix.autoname)` to `FALSE`. See point 5 under Bug Fixes for more context; this change will provide more internal consistency as well as more consistency with `data.frame()`.
@@ -93,7 +107,7 @@
93107
94108
13. New `mergelist()` and `setmergelist()` similarly work _a la_ `Reduce()` to recursively merge a `list` of data.tables, [#599](https://github.com/Rdatatable/data.table/issues/599). Different join modes (_left_, _inner_, _full_, _right_, _semi_, _anti_, and _cross_) are supported through the `how` argument; duplicate handling goes through the `mult` argument. `setmergelist()` carefully avoids copies where one is not needed, e.g. in a 1:1 left join. Thanks Patrick Nicholson for the FR (in 2013!), @jangorecki for the PR, and @MichaelChirico for extensive reviews and fine-tuning.
95109
96-
```r
110+
```r
97111
l = list(
98112
data.table(id = c(1L, 2L, 3L), x = c("a", "b", "c")),
99113
data.table(id = c(1L, 2L, 4L), y = c("d", "e", "f")),
@@ -157,6 +171,83 @@
157171
158172
As of now, adaptive rolling max has no _on-line_ implementation (`algo="fast"`), it uses a naive approach (`algo="exact"`). Therefore further speed up is still possible if `algo="fast"` gets implemented.
159173
174+
17. Function `frollapply` has been completely rewritten. Thanks to @jangorecki for implementation. Be sure to read `frollapply` manual before using the function. There are following changes:
175+
- all basic types are now supported on input/output, not only double. Users' code could possibly break if it depends on forced coercion of input/output to double type.
176+
- new argument `by.column` allowing to pass a multi-column subset of a data.table into a rolling function, closes [#4887](https://github.com/Rdatatable/data.table/issues/4887).
177+
```r
178+
x = data.table(v1=rnorm(120), v2=rnorm(120))
179+
f = function(x) coef(lm(v2 ~ v1, data=x))
180+
frollapply(x, 4, f, by.column=FALSE)
181+
# (Intercept) v1
182+
# <num> <num>
183+
# 1: NA NA
184+
# 2: NA NA
185+
# 3: NA NA
186+
# 4: -0.04648236 -0.6349687
187+
# 5: 0.09208733 -0.4964023
188+
#---
189+
#116: -0.21169439 0.7421358
190+
#117: -0.19729119 0.4926939
191+
#118: -0.04217896 0.0452713
192+
#119: 0.22472549 -0.5245874
193+
#120: 0.54540359 -0.1638333
194+
```
195+
- uses multiple CPU threads (on a decent OS); evaluation of UDF is inherently slow so this can be a great help.
196+
```r
197+
x = rnorm(1e5)
198+
n = 500
199+
setDTthreads(1)
200+
system.time(
201+
th1 <- frollapply(x, n, median, simplify=unlist)
202+
)
203+
# user system elapsed
204+
# 3.078 0.005 3.084
205+
setDTthreads(4)
206+
system.time(
207+
th4 <- frollapply(x, n, median, simplify=unlist)
208+
)
209+
# user system elapsed
210+
# 2.453 0.135 0.897
211+
all.equal(th1, th4)
212+
#[1] TRUE
213+
```
214+
215+
18. New helper `frolladapt` to facilitate applying rolling functions over windows of fixed calendar-time width in irregularly-spaced data sets, thereby bypassing the need to "augment" such data with placeholder rows, [#3241](https://github.com/Rdatatable/data.table/issues/3241). Thanks to @jangorecki for implementation.
216+
```r
217+
idx = as.Date("2025-09-05") + c(0,4,7,8,9,10,12,13,17)
218+
dt = data.table(index=idx, value=seq_along(idx))
219+
dt
220+
# index value
221+
# <Date> <int>
222+
#1: 2025-09-05 1
223+
#2: 2025-09-09 2
224+
#3: 2025-09-12 3
225+
#4: 2025-09-13 4
226+
#5: 2025-09-14 5
227+
#6: 2025-09-15 6
228+
#7: 2025-09-17 7
229+
#8: 2025-09-18 8
230+
#9: 2025-09-22 9
231+
dt[, c("rollmean3","rollmean3days") := list(
232+
frollmean(value, 3),
233+
frollmean(value, frolladapt(index, 3), adaptive=TRUE)
234+
)]
235+
dt
236+
# index value rollmean3 rollmean3days
237+
# <Date> <int> <num> <num>
238+
#1: 2025-09-05 1 NA NA
239+
#2: 2025-09-09 2 NA 2.0
240+
#3: 2025-09-12 3 2 3.0
241+
#4: 2025-09-13 4 3 3.5
242+
#5: 2025-09-14 5 4 4.0
243+
#6: 2025-09-15 6 5 5.0
244+
#7: 2025-09-17 7 6 6.5
245+
#8: 2025-09-18 8 7 7.5
246+
#9: 2025-09-22 9 8 9.0
247+
```
248+
249+
19. New rolling functions, `frollmin` and `frollprod`, have been implemented, towards [#2778](https://github.com/Rdatatable/data.table/issues/2778). Thanks to @jangorecki for implementation.
250+
160251
### BUG FIXES
161252

162253
1. `fread()` no longer warns on certain systems on R 4.5.0+ where the file owner can't be resolved, [#6918](https://github.com/Rdatatable/data.table/issues/6918). Thanks @ProfFancyPants for the report and PR.
@@ -222,6 +313,8 @@
222313
223314
6. Using a double vector in `set()`'s `i=` and/or `j=` no longer throws a warning about preferring integer, [#6594](https://github.com/Rdatatable/data.table/issues/6594). While it may improve efficiency to use integer, there's no guarantee it's an improvement and the difference is likely to be minimal. The coercion will still be reported under `datatable.verbose=TRUE`. For package/production use cases, static analyzers such as `lintr::implicit_integer_linter()` can also report when numeric literals should be rewritten as integer literals.
224315

316+
7. In rare situations a data.table object may lose its internal attribute that holds a self-reference. New helper function `.selfref.ok()` tests just that. It is only intended for technical use cases. See manual for examples.
317+
225318
## data.table [v1.17.8](https://github.com/Rdatatable/data.table/milestone/41) (6 July 2025)
226319

227320
1. Internal functions used to signal errors are now marked as non-returning, silencing a compiler warning about potentially unchecked allocation failure. Thanks to Prof. Brian D. Ripley for the report and @aitap for the fix, [#7070](https://github.com/Rdatatable/data.table/pull/7070).

R/fread.R

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,9 @@ yaml=FALSE, tmpdir=tempdir(), tz="UTC")
119119
if ((w <- endsWithAny(file, c(".gz", ".bgz",".bz2"))) || (gzsig <- is_gzip(file_signature)) || is_bzip(file_signature)) {
120120
if (!requireNamespace("R.utils", quietly = TRUE))
121121
stopf("To read %s files directly, fread() requires 'R.utils' package which cannot be found. Please install 'R.utils' using 'install.packages('R.utils')'.", if (w<=2L || gzsig) "gz" else "bz2") # nocov
122+
# not worth doing a behavior test here, so just use getRversion().
123+
if (packageVersion("R.utils") < "2.13.0" && base::getRversion() >= "4.5.0")
124+
stopf("Reading compressed files in fread requires R.utils version 2.13.0 or higher. Please upgrade R.utils.") # nocov
122125
FUN = if (w<=2L || gzsig) gzfile else bzfile
123126
decompFile = tempfile(tmpdir=tmpdir)
124127
on.exit(unlink(decompFile), add=TRUE)

0 commit comments

Comments
 (0)