Skip to content

Commit 09cc3ff

Browse files
committed
recoded frollapply but not split
1 parent 7258909 commit 09cc3ff

File tree

12 files changed

+1367
-540
lines changed

12 files changed

+1367
-540
lines changed

NEWS.md

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,17 @@
88

99
1. Rolling functions `frollmean` and `frollsum` used to treat `Inf` and `-Inf` as `NA` when using default `algo="fast"`. It has been changed now and infinite values are not treated as `NA` anymore. If your input into those functions has `Inf` or `-Inf` then you will be affected by this change.
1010

11+
2. `frollapply` result is not coerced to numeric anymore. Users code could possibly break if it depends on forced coercion of input/output to numeric type.
12+
```r
13+
## before
14+
frollapply(c(F,T,F,F,F,T), 2, any)
15+
#[1] NA 1 1 0 0 1
16+
17+
## 1.18.0
18+
frollapply(c(F,T,F,F,F,T), 2, any)
19+
#[1] NA TRUE TRUE FALSE FALSE TRUE
20+
```
21+
1122
### NEW FEATURES
1223

1324
1. New `sort_by()` method for data.tables, [#6662](https://github.com/Rdatatable/data.table/issues/6662). It uses `forder()` to improve upon the data.frame method and also match `DT[order(...)]` behavior with respect to locale. Thanks @rikivillalba for the suggestion and PR.
@@ -56,6 +67,54 @@ microbenchmark::microbenchmark(
5667
# frapply(x) 713.23108 742.34657 865.2524 848.31641 965.3599 1114.0531 10
5768
```
5869

70+
6. Function `frollapply` has been completely rewritten. Be sure to read `frollapply` manual before using the function. There are following changes:
71+
72+
- all basic types are now supported on input/output, not only double. Users code could possibly break if it depends on forced coercion of input/output to double type.
73+
- new argument `by.column` allowing to pass a multi-column subset of a data.table into a rolling function, closes [#4887](https://github.com/Rdatatable/data.table/issues/4887).
74+
```r
75+
x = as.data.table(iris)
76+
flow = function(x) {
77+
v1 = x[[1L]]
78+
v2 = x[[2L]]
79+
(v1[2L] - v1[1L] * (1+v2[2L])) / v1[1L]
80+
}
81+
x[, "flow" := frollapply(.(Sepal.Length, Sepal.Width), 2, flow, by.column=FALSE),
82+
by = Species][]
83+
# Sepal.Length Sepal.Width Petal.Length Petal.Width Species flow
84+
# <num> <num> <num> <num> <fctr> <num>
85+
# 1: 5.1 3.5 1.4 0.2 setosa NA
86+
# 2: 4.9 3.0 1.4 0.2 setosa -3.039216
87+
# 3: 4.7 3.2 1.3 0.2 setosa -3.240816
88+
# 4: 4.6 3.1 1.5 0.2 setosa -3.121277
89+
# 5: 5.0 3.6 1.4 0.2 setosa -3.513043
90+
# ---
91+
#146: 6.7 3.0 5.2 2.3 virginica -3.000000
92+
#147: 6.3 2.5 5.0 1.9 virginica -2.559701
93+
#148: 6.5 3.0 5.2 2.0 virginica -2.968254
94+
#149: 6.2 3.4 5.4 2.3 virginica -3.446154
95+
#150: 5.9 3.0 5.1 1.8 virginica -3.048387
96+
```
97+
98+
- uses multiple CPU threads; evaluation of UDF is inherently slow so this can be a great help.
99+
```r
100+
x = rnorm(1e5)
101+
n = 500
102+
setDTthreads(1)
103+
system.time(
104+
th1 <- frollapply(x, n, median, simplify=unlist)
105+
)
106+
# user system elapsed
107+
# 4.106 0.008 4.115
108+
setDTthreads(4)
109+
system.time(
110+
th4 <- frollapply(x, n, median, simplify=unlist)
111+
)
112+
# user system elapsed
113+
# 5.778 0.140 1.498
114+
all.equal(th1, th4)
115+
#[1] TRUE
116+
```
117+
59118
### BUG FIXES
60119

61120
1. Custom binary operators from the `lubridate` package now work with objects of class `IDate` as with a `Date` subclass, [#6839](https://github.com/Rdatatable/data.table/issues/6839). Thanks @emallickhossain for the report and @aitap for the fix.

R/froll.R

Lines changed: 83 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22
trimn = function(n, len, align) {
33
n = min(n, len) ## so frollsum(1:2, 3, partial=TRUE) works
44
if (align=="right")
5-
c(seq.int(n), rep.int(n, len-n))
5+
c(seq_len(n), rep.int(n, len-n))
66
else
7-
c(rep.int(n, len-n), rev(seq.int(n)))
7+
c(rep.int(n, len-n), rev(seq_len(n)))
88
}
99
trimnadaptive = function(n, align) {
1010
if (align=="right")
@@ -25,43 +25,102 @@ trimnadaptive = function(n, align) {
2525
# frollsum(list(1:4, 2:5), 2:3, partial=FALSE, adaptive=FALSE)
2626
# frollsum(list(1:4, 2:5), 2:3, partial=TRUE, adaptive=FALSE)
2727
partial2adaptive = function(x, n, align, adaptive) {
28+
## do not quote argument x and n arg names because frollapply has them in uppercase
2829
if (align=="center")
2930
stopf("'partial' cannot be used together with align='center'")
30-
if (is.list(x) && length(unique(lengths(x)))!=1L)
31-
stopf("'partial' does not support variable length of columns in 'x'")
32-
len = if (is.list(x)) length(x[[1L]]) else length(x)
31+
if (is.list(x)) {
32+
if (!is.data.frame(x) && !equal.lengths(x)) ## froll
33+
stopf("'partial' does not support variable length of columns in x")
34+
else if (all.data.frame(x) && !equal.nrows(x)) ## frollapply by.column=F, single DT already wrapped into list
35+
stopf("'partial' does not support variable nrow of data.tables in x")
36+
}
37+
len = if (is.list(x)) {
38+
if (is.data.frame(x[[1L]])) ## frollapply by.column
39+
nrow(x[[1L]])
40+
else ## froll, this will work for both x list and x dt on input
41+
length(x[[1L]])
42+
} else length(x)
3343
verbose = getOption("datatable.verbose")
3444
if (!adaptive) {
3545
if (is.list(n))
3646
stopf("n must be an integer, list is accepted for adaptive TRUE")
3747
if (!is.numeric(n))
3848
stopf("n must be an integer vector or a list of integer vectors")
3949
if (verbose)
40-
catf("partial2adaptive: froll partial=TRUE trimming 'n' and redirecting to adaptive=TRUE\n")
41-
if (length(n)>1L) {
50+
catf("partial2adaptive: froll partial=TRUE trimming n and redirecting to adaptive=TRUE\n")
51+
if (length(n) > 1L) {
4252
lapply(n, len, align, FUN=trimn)
4353
} else {
4454
trimn(n, len, align)
4555
}
4656
} else {
4757
if (!(is.numeric(n) || (is.list(n) && all(vapply_1b(n, is.numeric)))))
4858
stopf("n must be an integer vector or a list of integer vectors")
59+
if (is.list(n) && length(unique(lengths(n))) != 1L)
60+
stopf("adaptive windows provided in n must not to have different lengths")
61+
if ((is.list(n) && length(n[[1L]]) != len) || (is.numeric(n) && length(n) != len))
62+
stopf("length of vectors in x must match to length of adaptive window in n")
63+
if (verbose)
64+
catf("partial2adaptive: froll adaptive=TRUE and partial=TRUE trimming n\n")
4965
if (!is.list(n))
5066
n = list(n)
51-
if (length(unique(lengths(n))) != 1L)
52-
stopf("adaptive window provided in 'n' must not to have different lengths")
53-
if (length(n[[1L]]) != len)
54-
stopf("length of vectors in 'x' must match to length of adaptive window in 'n'")
55-
if (verbose)
56-
catf("partial2adaptive: froll adaptive=TRUE and partial=TRUE trimming 'n'\n")
5767
lapply(n, align, FUN=trimnadaptive)
5868
}
5969
}
6070

61-
froll = function(fun, x, n, fill=NA, algo, align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, FUN, rho, give.names=FALSE) {
71+
make.roll.names = function(x.len, n.len, n, x.nm, n.nm, fun, adaptive) {
72+
if (is.null(n.nm)) {
73+
if (!adaptive) {
74+
if (!is.numeric(n))
75+
stopf("internal error: misuse of make.names, n must be numeric for !adaptive") ## nocov
76+
n.nm = paste0("roll", fun, as.character(as.integer(n)))
77+
} else {
78+
n.nm = paste0("aroll", fun, seq_len(n.len))
79+
}
80+
} else if (!length(n.nm) && !adaptive)
81+
stopf("internal error: misuse of make.names, non-null length 0 n is not possible for !adaptive") ## nocov
82+
if (is.null(x.nm)) {
83+
x.nm = paste0("V", seq_len(x.len))
84+
}
85+
ans = if (length(x.nm)) { ## is.list(x) && !is.data.frame(x)
86+
if (length(n.nm)) { ## !adaptive || is.list(n)
87+
paste(rep(x.nm, each=length(n.nm)), n.nm, sep="_")
88+
} else { ## adaptive && is.numeric(n)
89+
x.nm
90+
}
91+
} else { ## (by.column && is.atomic(x)) || (!by.column && is.data.frame(x))
92+
if (length(n.nm)) { ## !adaptive || is.list(n)
93+
n.nm
94+
} else { ## adaptive && is.numeric(n)
95+
NULL
96+
}
97+
}
98+
if (!is.null(ans) && length(ans) != x.len*n.len)
99+
stopf("internal error: make.names generated names of wrong length") ## nocov
100+
ans
101+
}
102+
103+
froll = function(fun, x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, give.names=FALSE, hasNA) {
104+
stopifnot(!missing(fun), is.character(fun), length(fun)==1L, !is.na(fun))
105+
if (!missing(hasNA)) {
106+
if (!is.na(has.nf))
107+
stopf("hasNA is deprecated, use has.nf instead")
108+
warning("hasNA is deprecated, use has.nf instead")
109+
has.nf = hasNA
110+
} # remove check on next major release
111+
algo = match.arg(algo)
62112
align = match.arg(align)
63-
if (isTRUE(give.names))
113+
if (isTRUE(give.names)) {
64114
orig = list(n=n, adaptive=adaptive)
115+
xnam = if (is.list(x)) names(x) else character()
116+
nnam = if (isTRUE(adaptive)) {
117+
if (is.list(n)) names(n) else character()
118+
} else names(n)
119+
nx = if (is.list(x)) length(x) else 1L
120+
nn = if (isTRUE(adaptive)) {
121+
if (is.list(n)) length(n) else 1L
122+
} else length(n)
123+
}
65124
if (isTRUE(partial)) {
66125
if (!length(n))
67126
stopf("n must be non 0 length")
@@ -78,51 +137,25 @@ froll = function(fun, x, n, fill=NA, algo, align=c("right","left","center"), na.
78137
n = rev2(n)
79138
align = "right"
80139
} ## support for left adaptive added in #5441
81-
if (missing(FUN))
82-
ans = .Call(CfrollfunR, fun, x, n, fill, algo, align, na.rm, has.nf, adaptive)
83-
else
84-
ans = .Call(CfrollapplyR, FUN, x, n, fill, align, adaptive, rho)
140+
ans = .Call(CfrollfunR, fun, x, n, fill, algo, align, na.rm, has.nf, adaptive)
85141
if (leftadaptive) {
86142
if (verbose)
87143
catf("froll: adaptive=TRUE && align='left' post-processing from align='right'\n")
88144
ans = rev2(ans)
89145
}
90146
if (isTRUE(give.names) && is.list(ans)) {
91-
n = orig$n
92-
adaptive = orig$adaptive
93-
nx = names(x)
94-
nn = names(n)
95-
if (is.null(nx)) nx = paste0("V", if (is.atomic(x)) 1L else seq_along(x))
96-
if (is.null(nn)) nn = if (adaptive) paste0("N", if (is.atomic(n)) 1L else seq_along(n)) else paste("roll", as.character(n), sep="_")
97-
setattr(ans, "names", paste(rep(nx, each=length(nn)), nn, sep="_"))
147+
nms = make.roll.names(x.len=nx, n.len=nn, n=orig$n, x.nm=xnam, n.nm=nnam, fun=fun, adaptive=orig$adaptive)
148+
setattr(ans, "names", nms)
98149
}
99150
ans
100151
}
101152

102-
frollfun = function(fun, x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, hasNA, give.names=FALSE) {
103-
stopifnot(!missing(fun), is.character(fun), length(fun)==1L, !is.na(fun))
104-
if (!missing(hasNA)) {
105-
if (!is.na(has.nf))
106-
stopf("hasNA is deprecated, use has.nf instead")
107-
warningf("hasNA is deprecated, use has.nf instead")
108-
has.nf = hasNA
109-
} # remove check on next major release
110-
algo = match.arg(algo)
111-
froll(fun=fun, x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, has.nf=has.nf, adaptive=adaptive, partial=partial, give.names=give.names)
112-
}
113-
114-
frollmean = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, hasNA, give.names=FALSE) {
115-
frollfun(fun="mean", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, has.nf=has.nf, adaptive=adaptive, partial=partial, hasNA=hasNA, give.names=give.names)
153+
frollmean = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, give.names=FALSE, hasNA) {
154+
froll(fun="mean", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, has.nf=has.nf, adaptive=adaptive, partial=partial, hasNA=hasNA, give.names=give.names)
116155
}
117-
frollsum = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, hasNA, give.names=FALSE) {
118-
frollfun(fun="sum", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, has.nf=has.nf, adaptive=adaptive, partial=partial, hasNA=hasNA, give.names=give.names)
156+
frollsum = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, give.names=FALSE, hasNA) {
157+
froll(fun="sum", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, has.nf=has.nf, adaptive=adaptive, partial=partial, hasNA=hasNA, give.names=give.names)
119158
}
120-
frollmax = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, hasNA, give.names=FALSE) {
121-
frollfun(fun="max", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, has.nf=has.nf, adaptive=adaptive, partial=partial, hasNA=hasNA, give.names=give.names)
122-
}
123-
124-
frollapply = function(x, n, FUN, ..., fill=NA, align=c("right","left","center"), adaptive=FALSE, partial=FALSE, give.names=FALSE) {
125-
FUN = match.fun(FUN)
126-
rho = new.env()
127-
froll(FUN=FUN, rho=rho, x=x, n=n, fill=fill, align=align, adaptive=adaptive, partial=partial, give.names=give.names)
159+
frollmax = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, give.names=FALSE, hasNA) {
160+
froll(fun="max", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, has.nf=has.nf, adaptive=adaptive, partial=partial, hasNA=hasNA, give.names=give.names)
128161
}

0 commit comments

Comments
 (0)