Skip to content

Commit 21769cf

Browse files
Merge branch 'master' into fread_commentChar
2 parents 01edb9a + 67129f0 commit 21769cf

File tree

18 files changed

+551
-70
lines changed

18 files changed

+551
-70
lines changed

NAMESPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ export(frollmax)
5858
export(frollmin)
5959
export(frollprod)
6060
export(frollmedian)
61+
export(frollvar)
62+
export(frollsd)
6163
export(frollapply)
6264
export(frolladapt)
6365
export(nafill)

NEWS.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@
246246
#9: 2025-09-22 9 8 9.0
247247
```
248248

249-
19. New rolling functions: `frollmin`, `frollprod` and `frollmedian`, have been implemented, towards [#2778](https://github.com/Rdatatable/data.table/issues/2778). Thanks to @jangorecki for implementation. Implementation of rolling median is based on a novel algorithm "sort-median" described by [@suomela](https://github.com/suomela) in his 2014 paper [Median Filtering is Equivalent to Sorting](https://arxiv.org/abs/1406.1717). "sort-median" scales very well, not only for size of input vector but also for size of rolling window.
249+
19. Other new rolling functions: `frollmin`, `frollprod`, `frollmedian`, `frollvar` and `frollsd`, have been implemented, resolving long standing issue [#2778](https://github.com/Rdatatable/data.table/issues/2778). Thanks to @jangorecki for implementation. Implementation of rolling median is based on a novel algorithm "sort-median" described by [@suomela](https://github.com/suomela) in his 2014 paper [Median Filtering is Equivalent to Sorting](https://arxiv.org/abs/1406.1717). "sort-median" scales very well, not only for size of input vector but also for size of rolling window.
250250
```r
251251
rollmedian = function(x, n) {
252252
ans = rep(NA_real_, nx<-length(x))

R/froll.R

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,3 +216,9 @@ frollprod = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right","lef
216216
frollmedian = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, give.names=FALSE, hasNA) {
217217
froll(fun="median", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, has.nf=has.nf, adaptive=adaptive, partial=partial, hasNA=hasNA, give.names=give.names)
218218
}
219+
frollvar = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, give.names=FALSE, hasNA) {
220+
froll(fun="var", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, has.nf=has.nf, adaptive=adaptive, partial=partial, hasNA=hasNA, give.names=give.names)
221+
}
222+
frollsd = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, give.names=FALSE, hasNA) {
223+
froll(fun="sd", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, has.nf=has.nf, adaptive=adaptive, partial=partial, hasNA=hasNA, give.names=give.names)
224+
}

inst/tests/froll.Rraw

Lines changed: 83 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1418,6 +1418,86 @@ test(6001.693, frollapply(FUN=median, adaptive=TRUE, c(1:2,NA), c(2,0,2)), c(NA,
14181418
test(6001.694, frollapply(FUN=median, adaptive=TRUE, c(1:2,NA), c(2,0,2), na.rm=TRUE), c(NA,NA_integer_,2L))
14191419
test(6001.695, frollapply(FUN=median, adaptive=TRUE, c(1:2,NA_real_), c(2,0,2), na.rm=TRUE, partial=TRUE), c(1,NA_real_,2))
14201420

1421+
test(6001.711, frollvar(1:3, 0), c(NA_real_,NA_real_,NA_real_), options=c("datatable.verbose"=TRUE), output="window width of size 0")
1422+
test(6001.712, frollvar(1:3, 0, fill=99), c(NA_real_,NA_real_,NA_real_))
1423+
test(6001.713, frollvar(c(1:2,NA), 0), c(NA_real_,NA_real_,NA_real_))
1424+
test(6001.714, frollvar(c(1:2,NA), 0, na.rm=TRUE), c(NA_real_,NA_real_,NA_real_))
1425+
test(6001.715, frollvar(1:3, 0, algo="exact"), c(NA_real_,NA_real_,NA_real_), options=c("datatable.verbose"=TRUE), output="window width of size 0")
1426+
test(6001.716, frollvar(c(1:2,NA), 0, algo="exact"), c(NA_real_,NA_real_,NA_real_))
1427+
test(6001.717, frollvar(c(1:2,NA), 0, algo="exact", na.rm=TRUE), c(NA_real_,NA_real_,NA_real_))
1428+
test(6001.718, frollvar(c(1:2,NA), 2), c(NA,0.5,NA), options=c("datatable.verbose"=TRUE), output="redirecting to frollvarExact")
1429+
test(6001.721, frollvar(adaptive=TRUE, 1:3, c(2,0,2)), c(NA,NA,0.5), options=c("datatable.verbose"=TRUE), output="not implemented, fall back to")
1430+
test(6001.722, frollvar(adaptive=TRUE, 1:3, c(2,0,2), fill=99), c(99,NA,0.5))
1431+
test(6001.723, frollvar(adaptive=TRUE, c(1:2,NA), c(2,0,2)), c(NA_real_,NA_real_,NA_real_))
1432+
test(6001.724, frollvar(adaptive=TRUE, c(1:2,NA), c(2,0,2), na.rm=TRUE), c(NA_real_,NA_real_,NA_real_))
1433+
test(6001.7241, frollvar(adaptive=TRUE, c(1:2,NA), c(2,2,2), has.nf=FALSE), c(NA_real_,0.5,NA_real_), warning="used but non-finite values are present in input")
1434+
test(6001.7242, frollvar(adaptive=TRUE, c(1:2,NA), c(2,2,2)), c(NA_real_,0.5,NA_real_), options=c("datatable.verbose"=TRUE), output="propagates NFs properply, no need to re-run")
1435+
test(6001.7243, frollvar(adaptive=TRUE, c(1:2,NA), c(2,2,2), na.rm=TRUE), c(NA_real_,0.5,NA_real_), options=c("datatable.verbose"=TRUE), output="re-running with extra care for NFs")
1436+
test(6001.725, frollvar(adaptive=TRUE, 1:3, c(2,0,2), algo="exact"), c(NA,NA,0.5))
1437+
test(6001.726, frollvar(adaptive=TRUE, 1:3, c(2,0,2), fill=99, algo="exact"), c(99,NA,0.5))
1438+
test(6001.727, frollvar(adaptive=TRUE, c(1:2,NA), c(2,0,2), algo="exact"), c(NA_real_,NA_real_,NA_real_))
1439+
test(6001.728, frollvar(adaptive=TRUE, c(1:2,NA), c(2,0,2), algo="exact", na.rm=TRUE), c(NA_real_,NA_real_,NA_real_))
1440+
test(6001.729, frollvar(adaptive=TRUE, c(1:2,NA), c(2,0,2), algo="exact", na.rm=TRUE, partial=TRUE), c(NA_real_,NA_real_,NA_real_))
1441+
test(6001.730, frollvar(adaptive=TRUE, c(1:2,NA), c(2,0,2), fill=99, algo="exact", na.rm=TRUE), c(99,NA,NA))
1442+
y = c(1e8+2.980232e-8, 1e8, 1e8, 1e8) # CLAMP0 test
1443+
test(6001.731, frollvar(y, 3)[4L], 0)
1444+
test(6001.732, frollsd(y, 3)[4L], 0)
1445+
test(6001.733, frollvar(y, c(3,3,3,3), adaptive=TRUE)[4L], 0)
1446+
test(6001.734, frollsd(y, c(3,3,3,3), adaptive=TRUE)[4L], 0)
1447+
test(6001.781, frollapply(FUN=var, 1:3, 0), c(NA_real_,NA_real_,NA_real_))
1448+
test(6001.782, frollapply(FUN=var, 1:3, 0, fill=99), c(NA_real_,NA_real_,NA_real_))
1449+
test(6001.783, frollapply(FUN=var, c(1:2,NA), 0), c(NA_real_,NA_real_,NA_real_))
1450+
test(6001.784, frollapply(FUN=var, c(1:2,NA), 0, na.rm=TRUE), c(NA_real_,NA_real_,NA_real_))
1451+
test(6001.7910, frollapply(FUN=var, adaptive=TRUE, 1:3, c(2,0,2)), c(NA,NA,0.5))
1452+
test(6001.7911, frollapply(FUN=var, adaptive=TRUE, list(1:3,2:4), c(2,0,2)), list(c(NA,NA,0.5), c(NA,NA,0.5)))
1453+
test(6001.7912, frollapply(FUN=var, adaptive=TRUE, 1:3, list(c(2,0,2), c(0,2,0))), list(c(NA,NA,0.5), c(NA,0.5,NA)))
1454+
test(6001.7913, frollapply(FUN=var, adaptive=TRUE, list(1:3,2:4), list(c(2,0,2), c(0,2,0))), list(c(NA,NA,0.5), c(NA,0.5,NA), c(NA,NA,0.5), c(NA,0.5,NA)))
1455+
test(6001.792, frollapply(FUN=var, adaptive=TRUE, 1:3, c(2,0,2), fill=99), c(99,NA,0.5))
1456+
test(6001.793, frollapply(FUN=var, adaptive=TRUE, c(1:2,NA), c(2,0,2)), c(NA_real_,NA_real_,NA_real_))
1457+
test(6001.794, frollapply(FUN=var, adaptive=TRUE, c(1:2,NA), c(2,0,2), na.rm=TRUE), c(NA_real_,NA_real_,NA_real_))
1458+
test(6001.795, frollapply(FUN=var, adaptive=TRUE, c(1:2,NA_real_), c(2,0,2), na.rm=TRUE, partial=TRUE), c(NA_real_,NA_real_,NA_real_))
1459+
1460+
test(6001.810, frollsd(1:3, 0), c(NA_real_,NA_real_,NA_real_), options=c("datatable.verbose"=TRUE), output="frollsdFast: calling sqrt(frollvarFast(...))")
1461+
test(6001.811, frollsd(1:3, 0), c(NA_real_,NA_real_,NA_real_), options=c("datatable.verbose"=TRUE), output="window width of size 0")
1462+
test(6001.812, frollsd(1:3, 0, fill=99), c(NA_real_,NA_real_,NA_real_))
1463+
test(6001.813, frollsd(c(1:2,NA), 0), c(NA_real_,NA_real_,NA_real_))
1464+
test(6001.814, frollsd(c(1:2,NA), 0, na.rm=TRUE), c(NA_real_,NA_real_,NA_real_))
1465+
test(6001.815, frollsd(1:3, 0, algo="exact"), c(NA_real_,NA_real_,NA_real_), options=c("datatable.verbose"=TRUE), output="window width of size 0")
1466+
test(6001.816, frollsd(c(1:2,NA), 0, algo="exact"), c(NA_real_,NA_real_,NA_real_))
1467+
test(6001.817, frollsd(c(1:2,NA), 0, algo="exact", na.rm=TRUE), c(NA_real_,NA_real_,NA_real_))
1468+
test(6001.818, frollsd(c(1:2,NA), 2), c(NA,sqrt(0.5),NA), options=c("datatable.verbose"=TRUE), output="redirecting to frollvarExact")
1469+
test(6001.8191, frollsd(1:3, 2, fill=99), c(99,sqrt(0.5),sqrt(0.5)))
1470+
test(6001.8192, frollsd(1:3, 2, fill=99, algo="exact"), c(99,sqrt(0.5),sqrt(0.5)))
1471+
test(6001.8193, frollsd(c(1:2,NA), 2, has.nf=FALSE), c(NA,sqrt(0.5),NA), warning="used but non-finite values are present in input")
1472+
test(6001.8194, frollsd(c(NA,2:3), 2, has.nf=FALSE), c(NA,NA,sqrt(0.5)), warning="used but non-finite values are present in input")
1473+
test(6001.8195, frollsd(c(NA,2:3), 2), c(NA,NA,sqrt(0.5)), options=c("datatable.verbose"=TRUE), output="skip non-finite inaware attempt and run with extra care")
1474+
test(6001.8196, frollsd(c(NA,2:3), 2, has.nf=FALSE, algo="exact"), c(NA,NA,sqrt(0.5)), warning="used but non-finite values are present in input")
1475+
test(6001.8197, frollsd(c(NA,2:3), 2, algo="exact", na.rm=TRUE), c(NA,NA,sqrt(0.5)), options=c("datatable.verbose"=TRUE), output="re-running with extra care for NF")
1476+
test(6001.8201, frollsd(adaptive=TRUE, 1:3, c(2,2,2)), c(NA,sqrt(0.5),sqrt(0.5)), options=c("datatable.verbose"=TRUE), output="frolladaptivefun: algo 0 not implemented, fall back to 1")
1477+
test(6001.8202, frollsd(adaptive=TRUE, 1:3, c(2,2,2)), c(NA,sqrt(0.5),sqrt(0.5)), options=c("datatable.verbose"=TRUE), output="frolladaptivesdExact: calling sqrt(frolladaptivevarExact(...))")
1478+
test(6001.821, frollsd(adaptive=TRUE, 1:3, c(2,0,2)), c(NA,NA,sqrt(0.5)))
1479+
test(6001.822, frollsd(adaptive=TRUE, 1:3, c(2,0,2), fill=99), c(99,NA,sqrt(0.5)))
1480+
test(6001.823, frollsd(adaptive=TRUE, c(1:2,NA), c(2,0,2)), c(NA_real_,NA_real_,NA_real_))
1481+
test(6001.824, frollsd(adaptive=TRUE, c(1:2,NA), c(2,0,2), na.rm=TRUE), c(NA_real_,NA_real_,NA_real_))
1482+
test(6001.825, frollsd(adaptive=TRUE, 1:3, c(2,0,2), algo="exact"), c(NA,NA,sqrt(0.5)))
1483+
test(6001.826, frollsd(adaptive=TRUE, 1:3, c(2,0,2), fill=99, algo="exact"), c(99,NA,sqrt(0.5)))
1484+
test(6001.827, frollsd(adaptive=TRUE, c(1:2,NA), c(2,0,2), algo="exact"), c(NA_real_,NA_real_,NA_real_))
1485+
test(6001.828, frollsd(adaptive=TRUE, c(1:2,NA), c(2,0,2), algo="exact", na.rm=TRUE), c(NA_real_,NA_real_,NA_real_))
1486+
test(6001.829, frollsd(adaptive=TRUE, c(1:2,NA), c(2,0,2), algo="exact", na.rm=TRUE, partial=TRUE), c(NA_real_,NA_real_,NA_real_))
1487+
test(6001.830, frollsd(adaptive=TRUE, c(1:2,NA), c(2,0,2), fill=99, algo="exact", na.rm=TRUE), c(99,NA,NA))
1488+
test(6001.881, frollapply(FUN=sd, 1:3, 0), c(NA_real_,NA_real_,NA_real_))
1489+
test(6001.882, frollapply(FUN=sd, 1:3, 0, fill=99), c(NA_real_,NA_real_,NA_real_))
1490+
test(6001.883, frollapply(FUN=sd, c(1:2,NA), 0), c(NA_real_,NA_real_,NA_real_))
1491+
test(6001.884, frollapply(FUN=sd, c(1:2,NA), 0, na.rm=TRUE), c(NA_real_,NA_real_,NA_real_))
1492+
test(6001.8910, frollapply(FUN=sd, adaptive=TRUE, 1:3, c(2,0,2)), c(NA,NA,sqrt(0.5)))
1493+
test(6001.8911, frollapply(FUN=sd, adaptive=TRUE, list(1:3,2:4), c(2,0,2)), list(c(NA,NA,sqrt(0.5)), c(NA,NA,sqrt(0.5))))
1494+
test(6001.8912, frollapply(FUN=sd, adaptive=TRUE, 1:3, list(c(2,0,2), c(0,2,0))), list(c(NA,NA,sqrt(0.5)), c(NA,sqrt(0.5),NA)))
1495+
test(6001.8913, frollapply(FUN=sd, adaptive=TRUE, list(1:3,2:4), list(c(2,0,2), c(0,2,0))), list(c(NA,NA,sqrt(0.5)), c(NA,sqrt(0.5),NA), c(NA,NA,sqrt(0.5)), c(NA,sqrt(0.5),NA)))
1496+
test(6001.892, frollapply(FUN=sd, adaptive=TRUE, 1:3, c(2,0,2), fill=99), c(99,NA,sqrt(0.5)))
1497+
test(6001.893, frollapply(FUN=sd, adaptive=TRUE, c(1:2,NA), c(2,0,2)), c(NA_real_,NA_real_,NA_real_))
1498+
test(6001.894, frollapply(FUN=sd, adaptive=TRUE, c(1:2,NA), c(2,0,2), na.rm=TRUE), c(NA_real_,NA_real_,NA_real_))
1499+
test(6001.895, frollapply(FUN=sd, adaptive=TRUE, c(1:2,NA_real_), c(2,0,2), na.rm=TRUE, partial=TRUE), c(NA_real_,NA_real_,NA_real_))
1500+
14211501
# frollmedian
14221502
rollmedian = function(x, k, na.rm=FALSE) {
14231503
ans = rep(NA_real_, length(x))
@@ -2250,7 +2330,7 @@ rollfun = function(x, n, FUN, fill=NA_real_, na.rm=FALSE, nf.rm=FALSE, partial=F
22502330
}
22512331
ans
22522332
}
2253-
base_compare = function(x, n, funs=c("mean","sum","max","min","prod","median"), algos=c("fast","exact")) {
2333+
base_compare = function(x, n, funs=c("mean","sum","max","min","prod","median","var","sd"), algos=c("fast","exact")) {
22542334
num.step = 0.0001
22552335
for (fun in funs) {
22562336
for (na.rm in c(FALSE, TRUE)) {
@@ -2334,7 +2414,7 @@ base_compare(x, n)
23342414
#### against zoo
23352415
if (requireNamespace("zoo", quietly=TRUE)) {
23362416
drollapply = function(...) as.double(zoo::rollapply(...)) # rollapply is not consistent in data type of answer, force to double
2337-
zoo_compare = function(x, n, funs=c("mean","sum","max","min","prod","median"), algos=c("fast","exact")) {
2417+
zoo_compare = function(x, n, funs=c("mean","sum","max","min","prod","median","var","sd"), algos=c("fast","exact")) {
23382418
num.step = 0.0001
23392419
#### fun, align, na.rm, fill, algo, partial
23402420
for (fun in funs) {
@@ -2432,7 +2512,7 @@ arollfun = function(FUN, x, n, na.rm=FALSE, align=c("right","left"), fill=NA, nf
24322512
}
24332513
ans
24342514
}
2435-
afun_compare = function(x, n, funs=c("mean","sum","max","min","prod","median"), algos=c("fast","exact")) {
2515+
afun_compare = function(x, n, funs=c("mean","sum","max","min","prod","median","var","sd"), algos=c("fast","exact")) {
24362516
num.step = 0.0001
24372517
#### fun, align, na.rm, fill, algo
24382518
for (fun in funs) {

inst/tests/tests.Rraw

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17078,16 +17078,19 @@ DT = data.table(dates, times)
1707817078
DT_trunc = copy(DT)[, times := as.POSIXct(trunc(times))]
1707917079
tmp = tempfile()
1708017080
## ISO8601 format (%FT%TZ) by default
17081-
fwrite(DT, tmp)
17082-
test(2150.01, fread(tmp), DT) # defaults for fwrite/fread simple and preserving
17083-
fwrite(DT, tmp, dateTimeAs='write.csv') # as write.csv, writes the UTC times as-is not local because the time column has tzone=="UTC", but without the Z marker. Also truncates milliseconds, hence DT_trunc below.
17084-
test(2150.021, env=list(TZ=NULL), sapply(fread(tmp,tz=""), typeof), c(dates="integer", times="character")) # from v1.14.0 tz="" needed to read datetime as character
17085-
test(2150.022, env=list(TZ=NULL), fread(tmp,tz="UTC"), DT_trunc) # user can tell fread to interpret the unmarked datetimes as UTC
17086-
test(2150.023, env=c(TZ='UTC'), fread(tmp), DT_trunc) # TZ environment variable is also recognized
17087-
if (.Platform$OS.type!="windows") {
17088-
test(2150.024, env=c(TZ=''), fread(tmp), DT_trunc) # on Windows this unsets TZ, see ?Sys.setenv
17089-
# blank TZ env variable on non-Windows is recognized as UTC consistent with C and R; but R's tz= argument is the opposite and uses "" for local
17090-
}
17081+
local({
17082+
old=options(digits.secs=0); on.exit(options(old))
17083+
fwrite(DT, tmp)
17084+
test(2150.01, fread(tmp), DT) # defaults for fwrite/fread simple and preserving
17085+
fwrite(DT, tmp, dateTimeAs='write.csv') # as write.csv, writes the UTC times as-is not local because the time column has tzone=="UTC", but without the Z marker. Also truncates milliseconds, hence DT_trunc below.
17086+
test(2150.021, env=list(TZ=NULL), sapply(fread(tmp,tz=""), typeof), c(dates="integer", times="character")) # from v1.14.0 tz="" needed to read datetime as character
17087+
test(2150.022, env=list(TZ=NULL), fread(tmp,tz="UTC"), DT_trunc) # user can tell fread to interpret the unmarked datetimes as UTC
17088+
test(2150.023, env=c(TZ='UTC'), fread(tmp), DT_trunc) # TZ environment variable is also recognized
17089+
if (.Platform$OS.type!="windows") {
17090+
test(2150.024, env=c(TZ=''), fread(tmp), DT_trunc) # on Windows this unsets TZ, see ?Sys.setenv
17091+
# blank TZ env variable on non-Windows is recognized as UTC consistent with C and R; but R's tz= argument is the opposite and uses "" for local
17092+
}
17093+
})
1709117094
# Notes:
1709217095
# - from v1.14.0 tz="" needed
1709317096
# - as.POSIXct puts "" on the result (testing the write.csv version here with missing tzone)

0 commit comments

Comments
 (0)