Skip to content

Commit 126c7b6

Browse files
authored
frollapply simplify more conservative (#7320)
* frollapply simplify more conservative * fix lintr * codecov * use .selfref.ok()
1 parent 345e336 commit 126c7b6

File tree

4 files changed

+179
-112
lines changed

4 files changed

+179
-112
lines changed

NEWS.md

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -177,20 +177,20 @@
177177
```r
178178
x = data.table(v1=rnorm(120), v2=rnorm(120))
179179
f = function(x) coef(lm(v2 ~ v1, data=x))
180-
coef.fill = c("(Intercept)"=NA_real_, "v1"=NA_real_)
181-
frollapply(x, 4, f, by.column=FALSE, fill=coef.fill)
180+
frollapply(x, 4, f, by.column=FALSE)
182181
# (Intercept) v1
182+
# <num> <num>
183183
# 1: NA NA
184184
# 2: NA NA
185185
# 3: NA NA
186-
# 4: 0.65456931 0.3138012
187-
# 5: -1.07977441 -2.0588094
186+
# 4: -0.04648236 -0.6349687
187+
# 5: 0.09208733 -0.4964023
188188
#---
189-
#116: 0.15828417 0.3570216
190-
#117: -0.09083424 1.5494507
191-
#118: -0.18345878 0.6424837
192-
#119: -0.28964772 0.6116575
193-
#120: -0.40598313 0.6112854
189+
#116: -0.21169439 0.7421358
190+
#117: -0.19729119 0.4926939
191+
#118: -0.04217896 0.0452713
192+
#119: 0.22472549 -0.5245874
193+
#120: 0.54540359 -0.1638333
194194
```
195195
- uses multiple CPU threads (on a decent OS); evaluation of UDF is inherently slow so this can be a great help.
196196
```r

R/frollapply.R

Lines changed: 107 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,57 +1,117 @@
1+
all_atomic = function(x) all(vapply_1b(x, is.atomic, use.names=FALSE))
2+
all_data.frame = function(x) all(vapply_1b(x, is.data.frame, use.names=FALSE))
3+
all_list = function(x) all(vapply_1b(x, is.list, use.names=FALSE))
4+
all_types = function(x) vapply_1c(x, typeof, use.names=FALSE)
5+
all_names = function(x) lapply(x, names)
6+
len_unq = function(x) length(unique(x))
7+
any_NA_neg = function(x) anyNA(x) || any(x < 0L)
8+
any_NA_names = function(x) anyNA(names(x))
9+
all_NULL_names = function(x) all(vapply_1b(x, function(y) is.null(names(y)), use.names=FALSE))
10+
equal.lengths = function(x) len_unq(lengths(x)) <= 1L
11+
equal.nrows = function(x) len_unq(lapply(x, nrow)) <= 1L
12+
equal.dims = function(x) len_unq(lapply(x, dim)) <= 1L
13+
equal.types = function(x) len_unq(all_types(x)) <= 1L
14+
equal.names = function(x) len_unq(all_names(x)) <= 1L
15+
116
## ansmask is to handle leading values from fill to match type of the ans
217
simplifylist = function(x, fill, ansmask) {
3-
all.t = vapply_1c(x, typeof, use.names=FALSE)
18+
all.t = all_types(x)
419
all.ut = unique(all.t)
520
if (length(all.ut) > 1L) {
6-
ans.t = vapply_1c(x[ansmask], typeof, use.names=FALSE)
7-
ans.ut = unique(ans.t)
8-
## ans postprocessing to match types
9-
if ((length(ans.ut) == 2L) && all(ans.ut %in% c("double","integer","logical"))) { ## align numeric and integer when function is not type stable: median #7313
10-
if ("double" %in% ans.ut) {
11-
if ("integer" %in% ans.ut)
12-
x[ansmask & all.t=="integer"] = lapply(x[ansmask & all.t=="integer"], as.numeric) ## coerce integer to double
13-
if ("logical" %in% ans.ut)
14-
x[ansmask & all.t=="logical"] = lapply(x[ansmask & all.t=="logical"], as.numeric) ## coerce logical to double
15-
ans.ut = "double"
16-
} else if ("integer" %in% ans.ut) {
17-
if ("logical" %in% ans.ut)
18-
x[ansmask & all.t=="logical"] = lapply(x[ansmask & all.t=="logical"], as.integer) ## coerce logical to integer
19-
else
20-
internal_error("simplifylist aligning return types, at that place there should have been some logical types in the answer") # nocov
21-
ans.ut = "integer"
22-
}
21+
ans.ut = unique(all_types(x[ansmask]))
22+
## coerce int to double when varies within answer results: median #7313
23+
if (
24+
length(ans.ut) == 2L && ## simplifylist(list(NA, 1, 1L, TRUE), NA, ansmask=c(F,T,T,T))
25+
all(c("double","integer") %in% ans.ut) && ## simplifylist(list(NA, 1, TRUE), NA, ansmask=c(F,T,T))
26+
equal.lengths(x[ansmask]) ## simplifylist(list(NA, 1, 1:2), NA, ansmask=c(F,T,T))
27+
) {
28+
x[ansmask & all.t=="integer"] = lapply(x[ansmask & all.t=="integer"], as.numeric) ## coerce integer to double
29+
ans.ut = "double"
2330
}
24-
## file postprocessing to match types
25-
if (length(ans.ut) == 1L && equal.lengths(x[ansmask])) {
26-
if (identical(fill, NA)) { ## different typeof of ans and default fill=NA and lengths of ans equal
27-
filli = which(!ansmask)
28-
ans1 = x[[which.first(ansmask)]] ## first ans from full window
29-
x[filli] = rep_len(list(ans1[NA]), length(filli)) ## this will make NA of matching type to ans1 and recycle for all filli
30-
all.ut = ans.ut
31-
} else if (typeof(fill) != ans.ut && all(c(typeof(fill), ans.ut) %in% c("double","integer","logical"))) { ## fill=-2, ans=1L
32-
filli = which(!ansmask)
33-
cast = switch(ans.ut, double = as.numeric, integer = as.integer, logical = as.logical)
34-
x[filli] = rep_len(list(cast(fill)), length(filli))
35-
all.ut = ans.ut
31+
## coerce fill to answers type and length
32+
if (
33+
length(ans.ut) == 1L && ## simplifylist(list(NA, 1, TRUE), NA, ansmask=c(F,T,T))
34+
equal.lengths(x[ansmask]) && ## simplifylist(list(NA, 1L, 1:2), NA, ansmask=c(F,T,T))
35+
is.atomic(fill) ## simplifylist(list(list(NA), list(1), list(2)), list(NA), ansmask=c(F,T,T))
36+
) {
37+
fill.t = typeof(fill)
38+
ans1 = x[[which.first(ansmask)]] ## first ans from full window, all ans same type by now
39+
## coerce fill to type
40+
if (identical(fill, NA)) {
41+
if (ans.ut == "list") {
42+
fill = lapply(ans1, `[`, NA) ## we want list(NA) rather than list(NULL), this also propagates names
43+
} else {
44+
fill = setNames(ans1[NA], names(ans1))
45+
}
46+
fill.t = ans.ut
47+
} else if (
48+
fill.t != ans.ut && ## simplifylist(list(-1, 1, 2), -1, ansmask=c(F,T,T))
49+
fill.t %in% c("double","integer") && ## simplifylist(list(NULL, 1, 2), NULL, ansmask=c(F,T,T))
50+
ans.ut %in% c("double","integer") ## simplifylist(list(1, "a", "b"), 1, ansmask=c(F,T,T))
51+
) { ## fill=-2, ans=1L
52+
if (fill.t == "integer" && ans.ut == "double") {
53+
fill = as.double(fill)
54+
} else if (fill.t == "double" && ans.ut == "integer") {
55+
fill = as.integer(fill)
56+
} else {
57+
internal_error("coerce fill type reached a branch of unexpected fill type and answer type") # nocov
58+
}
59+
fill.t = ans.ut
3660
}
61+
## name fill if all ans have same names
62+
if (
63+
ans.ut != "list" &&
64+
length(fill) == length(ans1) &&
65+
is.null(names(fill)) &&
66+
!is.null(names(ans1)) && ## simplifylist(list(NA, c(1,2), c(1,2)), NA, ansmask=c(F,T,T))
67+
len_unq(vapply_1b(x[ansmask], any_NA_names, use.names=FALSE)) <= 1L && ## simplifylist(list(NA, c(a=1,b=2), setNames(c(1, 2), c(NA,"b"))), NA, ansmask=c(F,T,T))
68+
equal.names(x[ansmask]) ## simplifylist(list(NA, c(a=1,b=2), c(x=1,y=2)), NA, ansmask=c(F,T,T))
69+
) {
70+
fill = setNames(fill, names(ans1))
71+
}
72+
## recycle fill
73+
filli = which(!ansmask)
74+
x[filli] = rep_len(list(fill), length(filli))
3775
}
76+
all.ut = unique(all_types(x))
77+
}
78+
if (
79+
!is.null(names(fill)) &&
80+
all_NULL_names(x[ansmask]) &&
81+
equal.lengths(x)
82+
) {
83+
nm = names(fill)
84+
x[ansmask] = lapply(x[ansmask], `names<-`, nm)
3885
}
39-
all.ut = unique(vapply_1c(x, typeof, use.names=FALSE))
40-
if ((length(all.ut) == 1L) && all(all.ut %in% c("integer","logical","double","complex","character","raw"))) {
41-
if (identical(unique(lengths(x)), 1L)) { ## length 1
42-
return(unlist(x, recursive=FALSE, use.names=FALSE))
43-
} else if (equal.lengths(x)) { ## length 2+ and equal
44-
return(rbindlist(lapply(x, as.list)))
86+
if (length(all.ut) == 1L) {
87+
if (all.ut %in% c("integer","logical","double","complex","character","raw")) {
88+
if (identical(unique(lengths(x)), 1L)) { ## all length 1
89+
return(unlist(x, recursive=FALSE, use.names=FALSE))
90+
} else if (
91+
equal.lengths(x) &&
92+
equal.names(x)
93+
) { ## length 2+ and equal
94+
return(rbindlist(lapply(x, as.list)))
95+
}
96+
} else if (identical(all.ut,"list")) {
97+
if (
98+
all_data.frame(x) && ## simplifylist(list(NA, list(a=1L, b=2L), data.table(a=1L, b=2L)), NA, ansmask=c(F,T,T))
99+
equal.dims(x) && ## simplifylist(list(NA, data.table(a=1L, b=2L), data.table(a=1L)), NA, ansmask=c(F,T,T))
100+
equal.types(x) && ## simplifylist(list(NA, data.table(a=1L, b=2L), data.table(a=1L, b="b")), NA, ansmask=c(F,T,T))
101+
equal.names(x) ## simplifylist(list(NA, data.table(a=1L, b=2L), data.table(x=1L, y=2L)), NA, ansmask=c(F,T,T))
102+
) {
103+
return(rbindlist(x))
104+
} else if (
105+
equal.lengths(x) &&
106+
len_unq(lapply(x, lengths, use.names=FALSE)) <= 1L && ## nested lengths
107+
len_unq(lapply(lapply(x, unlist, recursive=FALSE, use.names=FALSE), typeof)) <= 1L &&
108+
equal.names(x)
109+
) { ## same length lists: list(list(1:2, 1:2), list(2:3, 2:3))
110+
return(rbindlist(x)) ## simplifylist(list(NA, list(1:2, 1:2), list(2:3, 2:3)), NA, ansmask=c(F,T,T))
111+
}
45112
}
46-
} else if (identical(all.ut, "list")) {
47-
if (all_data.frame(x)) ## list(data.table(...), data.table(...))
48-
return(rbindlist(x))
49-
if (equal.lengths(x)) ## same length lists: list(list(1:2, 1:2), list(2:3, 2:3))
50-
return(rbindlist(x))
51113
}
52-
## not simplified, return as is
53-
# not length stable
54-
# not type stable
114+
## not simplified, return as is, see #7317
55115
# NULL, closure, special, builtin, environment, S4, ...
56116
x
57117
}
@@ -65,13 +125,6 @@ fixselfref = function(x) {
65125
x
66126
}
67127

68-
all_atomic = function(x) all(vapply_1b(x, is.atomic, use.names=FALSE))
69-
all_data.frame = function(x) all(vapply_1b(x, is.data.frame, use.names=FALSE))
70-
all_list = function(x) all(vapply_1b(x, is.list, use.names=FALSE))
71-
equal.lengths = function(x) length(unique(lengths(x))) <= 1L
72-
equal.nrows = function(x) length(unique(vapply(x, nrow, 0L))) <= 1L
73-
anyNAneg = function(x) anyNA(x) || any(x < 0L)
74-
75128
frollapply = function(X, N, FUN, ..., by.column=TRUE, fill=NA, align=c("right","left","center"), adaptive=FALSE, partial=FALSE, give.names=FALSE, simplify=TRUE, x, n) {
76129
if (!missing(x)) {
77130
warningf("'x' is deprecated in frollapply, use 'X' instead")
@@ -159,7 +212,7 @@ frollapply = function(X, N, FUN, ..., by.column=TRUE, fill=NA, align=c("right","
159212
nnam = names(N) ## used for give.names
160213
if (!is.integer(N))
161214
N = as.integer(N)
162-
if (anyNAneg(N))
215+
if (any_NA_neg(N))
163216
stopf("'N' must be non-negative integer values (>= 0)")
164217
nn = length(N) ## top level loop for vectorized n
165218
} else {
@@ -170,7 +223,7 @@ frollapply = function(X, N, FUN, ..., by.column=TRUE, fill=NA, align=c("right","
170223
stopf("length of integer vector(s) provided as list to 'N' argument must be equal to number of observations provided in 'X'")
171224
if (!is.integer(N))
172225
N = as.integer(N)
173-
if (anyNAneg(N))
226+
if (any_NA_neg(N))
174227
stopf("'N' must be non-negative integer values (>= 0)")
175228
nn = 1L
176229
N = list(N)
@@ -184,7 +237,7 @@ frollapply = function(X, N, FUN, ..., by.column=TRUE, fill=NA, align=c("right","
184237
stopf("'N' must be an integer vector or list of integer vectors")
185238
if (!all(vapply_1b(N, is.integer, use.names=FALSE)))
186239
N = lapply(N, as.integer)
187-
if (any(vapply_1b(N, anyNAneg, use.names=FALSE)))
240+
if (any(vapply_1b(N, any_NA_neg, use.names=FALSE)))
188241
stopf("'N' must be non-negative integer values (>= 0)")
189242
nn = length(N)
190243
nnam = names(N)

0 commit comments

Comments
 (0)