diff --git a/R/frollapply.R b/R/frollapply.R index 9105250427..e547b1f6a9 100644 --- a/R/frollapply.R +++ b/R/frollapply.R @@ -1,49 +1,58 @@ ## ansmask is to handle leading values from fill to match type of the ans simplifylist = function(x, fill, ansmask) { - l = lengths(x) - ul = unique(l) - if (length(ul)!=1L) ## different lenghts - return(x) - t = vapply_1c(x, typeof, use.names=FALSE) - ut = unique(t) - if (length(ut)==2L) { - all.ut = ut - t = vapply_1c(x[ansmask], typeof, use.names=FALSE) - ut = unique(t) - if (length(ut)!=1L) - return(x) ## different typeof even excluding fill, a FUN was not type stable - if (!(ut=="integer"||ut=="logical"||ut=="double"||ut=="complex"||ut=="character"||ut=="raw")) - return(x) ## ans is not atomic - if (identical(fill, NA)) { ## different typeof, try to handle fill=NA logical type - filli = which(!ansmask) - ans1 = x[[which.first(ansmask)]] - x[filli] = rep_len(list(ans1[NA]), length(filli)) ## this will recycle to length of ans1 - } else if (all(c("integer","double") %in% all.ut)) { ## typeof numeric and int, fill is coerced to the type FUN - filli = which(!ansmask) - cast = if (ut=="double") as.numeric else as.integer - x[filli] = rep_len(list(cast(fill)), length(filli)) - } else { ## length == 2L but no easy way to match type - return(x) + all.t = vapply_1c(x, typeof, use.names=FALSE) + all.ut = unique(all.t) + if (length(all.ut) > 1L) { + ans.t = vapply_1c(x[ansmask], typeof, use.names=FALSE) + ans.ut = unique(ans.t) + ## ans postprocessing to match types + if ((length(ans.ut) == 2L) && all(ans.ut %in% c("double","integer","logical"))) { ## align numeric and integer when function is not type stable: median #7313 + if ("double" %in% ans.ut) { + if ("integer" %in% ans.ut) + x[ansmask & all.t=="integer"] = lapply(x[ansmask & all.t=="integer"], as.numeric) ## coerce integer to double + if ("logical" %in% ans.ut) + x[ansmask & all.t=="logical"] = lapply(x[ansmask & all.t=="logical"], as.numeric) ## coerce logical to double + ans.ut = "double" + } else if ("integer" %in% ans.ut) { + if ("logical" %in% ans.ut) + x[ansmask & all.t=="logical"] = lapply(x[ansmask & all.t=="logical"], as.integer) ## coerce logical to integer + else + internal_error("simplifylist aligning return types, at that place there should have been some logical types in the answer") # nocov + ans.ut = "integer" + } + } + ## file postprocessing to match types + if (length(ans.ut) == 1L && equal.lengths(x[ansmask])) { + if (identical(fill, NA)) { ## different typeof of ans and default fill=NA and lengths of ans equal + filli = which(!ansmask) + ans1 = x[[which.first(ansmask)]] ## first ans from full window + x[filli] = rep_len(list(ans1[NA]), length(filli)) ## this will make NA of matching type to ans1 and recycle for all filli + all.ut = ans.ut + } else if (typeof(fill) != ans.ut && all(c(typeof(fill), ans.ut) %in% c("double","integer","logical"))) { ## fill=-2, ans=1L + filli = which(!ansmask) + cast = switch(ans.ut, double = as.numeric, integer = as.integer, logical = as.logical) + x[filli] = rep_len(list(cast(fill)), length(filli)) + all.ut = ans.ut + } } - } else if (length(ut)>2L) { ## unique typeof length > 2L - return(x) } - if (ut=="integer"||ut=="logical"||ut=="double"||ut=="complex"||ut=="character"||ut=="raw") { - if (ul==1L) ## length 1 + all.ut = unique(vapply_1c(x, typeof, use.names=FALSE)) + if ((length(all.ut) == 1L) && all(all.ut %in% c("integer","logical","double","complex","character","raw"))) { + if (identical(unique(lengths(x)), 1L)) { ## length 1 return(unlist(x, recursive=FALSE, use.names=FALSE)) - else ## length 2+ + } else if (equal.lengths(x)) { ## length 2+ and equal return(rbindlist(lapply(x, as.list))) - } else if (ut=="list") { - if (all(vapply_1b(x, is.data.frame, use.names=FALSE))) ## list(data.table(...), data.table(...)) - return(rbindlist(x)) - ll = lapply(x, lengths) ## length of each column of each x - ull = unique(ll) - if (length(ull)==1L) ## list(list(1:2, 1:2), list(2:3, 2:3)) + } + } else if (identical(all.ut, "list")) { + if (all_data.frame(x)) ## list(data.table(...), data.table(...)) return(rbindlist(x)) - lu = function(x) length(unique(x)) - if (all(vapply_1i(ull, lu, use.names=FALSE)==1L)) ## within each x column lengths the same, each could be DF: list(list(1, 2), list(1:2, 2:3)) + if (equal.lengths(x)) ## same length lists: list(list(1:2, 1:2), list(2:3, 2:3)) return(rbindlist(x)) - } ## else NULL, closure, special, builtin, environment, S4, ... + } + ## not simplified, return as is + # not length stable + # not type stable + # NULL, closure, special, builtin, environment, S4, ... x } @@ -86,6 +95,8 @@ frollapply = function(X, N, FUN, ..., by.column=TRUE, fill=NA, align=c("right"," align = match.arg(align) FUN = match.fun(FUN) verbose = getOption("datatable.verbose") + if (!length(X)) + return(vector(mode=typeof(X), length=0L)) if (give.names) orig = list(N=N, adaptive=adaptive) @@ -284,26 +295,31 @@ frollapply = function(X, N, FUN, ..., by.column=TRUE, fill=NA, align=c("right"," cat("frollapply running on single CPU thread\n") ans = vector("list", nx*nn) ## vectorized x + for (i in seq_len(nx)) { thisx = X[[i]] thislen = len[i] - if (!thislen) - next - if (!use.fork0) { - use.fork = use.fork0 - } else { - # throttle - DTths = getDTthreadsC(thislen, TRUE) - use.fork = DTths > 1L - if (verbose) { - if (DTths < DTths0) - catf("frollapply run on %d CPU threads throttled to %d threads, input length %d\n", DTths0, DTths, thislen) - else - catf("frollapply running on %d CPU threads\n", DTths) + if (thislen) { + if (!use.fork0) { + use.fork = use.fork0 + } else { + # throttle + DTths = getDTthreadsC(thislen, TRUE) + use.fork = DTths > 1L + if (verbose) { + if (DTths < DTths0) + catf("frollapply run on %d CPU threads throttled to %d threads, input length %d\n", DTths0, DTths, thislen) + else + catf("frollapply running on %d CPU threads\n", DTths) + } } } ## vectorized n for (j in seq_len(nn)) { + if (!thislen) { + ans[[(i-1L)*nn+j]] = vector(mode=typeof(thisx), length=0L) + next + } thisn = N[[j]] w = allocWindow(thisx, thisn) ## prepare window, handles adaptive ansmask = ansMask(thislen, thisn) diff --git a/inst/tests/froll.Rraw b/inst/tests/froll.Rraw index 88711e0c7b..a248f28331 100644 --- a/inst/tests/froll.Rraw +++ b/inst/tests/froll.Rraw @@ -1565,21 +1565,24 @@ if (getDTthreads()>1L) { ## check for consistency } #### corner cases from examples - handled properly after frollapply rewrite to R -test(6010.101, frollapply(1:5, 3, function(x) head(x, 2)), list(NA, NA, 1:2, 2:3, 3:4)) +test(6010.101, frollapply(1:5, 3, function(x) head(x, 2)), data.table(V1 = c(NA, NA, 1L, 2L, 3L), V2 = c(NA, NA, 2L, 3L, 4L))) f = function(x) { - n = length(x) - # length 1 will be returned only for first iteration where we check length - if (n==x[n]) x[1L] else range(x) # range(x)[2L] is silently ignored + n = length(x) # length 1 will be returned only for first iteration + if (n==x[n]) x[1L] else range(x) } test(6010.102, frollapply(1:5, 3, f), list(NA,NA,1L,c(2L,4L),c(3L,5L))) +f = function(x) { + n = length(x) # length 1 will be returned only for last iteration + if (n==x[n]) range(x) else x[1L] +} +test(6010.1021, frollapply(1:5, 3, f), list(NA,NA,c(1L,3L),2L,3L)) test(6010.103, frollapply(c(1,2,1,1,1,2,3,2), 3, uniqueN), c(NA,NA,2L,2L,1L,2L,3L,2L)) test(6010.104, frollapply(c(1,2,1,1,NA,2,NA,2), 3, anyNA), c(NA,NA,FALSE,FALSE,TRUE,TRUE,TRUE,TRUE)) f = function(x) { - n = length(x) - # double type will be returned only for first iteration where we check type - if (n==x[n]) 1 else NA # NA logical turns into garbage without coercion to double + n = length(x) # double type will be returned only for first iteration + if (n==x[n]) 1 else NA # NA logical coerced properly } -test(6010.105, head(frollapply(1:5, 3, f), 3L), list(NA, NA, 1)) +test(6010.105, frollapply(1:5, 3, f), c(NA,NA,1,NA,NA)) ## partial x = 1:6/2 @@ -1615,7 +1618,7 @@ test(6010.2014, frollapply(1:5, rep(3, 5), toString, adaptive=TRUE), c(NA,NA,"1, test(6010.2015, frollapply(1:2, 1:2, mean, adaptive=TRUE, align="right"), c(1, 1.5)) test(6010.2016, frollapply(1:2, 1:2, mean, adaptive=TRUE, align="center"), error="using adaptive TRUE and align 'center' is not implemented") test(6010.2017, frollapply(list(1:2, 1:3), list(1:2), mean, adaptive=TRUE), error="adaptive rolling function can only process 'X' having equal length of elements; If you want to call rolling function on list having variable length of elements call it for each field separately") -test(6010.2018, frollapply(1:5, rep(3, 5), function(x) head(x, 2), adaptive=TRUE), list(NA, NA, 1:2, 2:3, 3:4)) +test(6010.2018, frollapply(1:5, rep(3, 5), function(x) head(x, 2), adaptive=TRUE), data.table(V1 = c(NA, NA, 1L, 2L, 3L), V2 = c(NA, NA, 2L, 3L, 4L))) test(6010.2019, frollapply(1:10, list(1:5), mean, adaptive=TRUE), error="length of integer vector(s) provided as list to 'N' argument must be equal to number of observations provided in 'X'") test(6010.202, frollapply(1:10, 1:5, mean, adaptive=TRUE), error="length of integer vector(s) provided as list to 'N' argument must be equal to number of observations provided in 'X'") options(datatable.verbose=TRUE) @@ -1652,7 +1655,9 @@ test(6010.522, frollapply(c(1:3,NA,5:6), 4L, sum), rep(NA_integer_,6)) test(6010.523, frollapply(c(1:3,NA,5:6), 4L, sum, na.rm=TRUE), c(NA,NA,NA,6L,10L,14L)) test(6010.524, frollapply(c(1,2,3,NA,NA,NA,NA), 3L, mean), c(NA,NA,2,NA,NA,NA,NA)) test(6010.525, frollapply(c(1,2,3,NA,NA,NA,NA), 3L, mean, na.rm=TRUE), c(NA,NA,2,2.5,3,NaN,NaN)) -test(6010.526, frollapply(numeric(), 3L, sum), list()) +test(6010.526, frollapply(numeric(), 3L, sum), numeric()) +test(6010.5261, frollapply(integer(), 3L, sum), integer()) +test(6010.5262, frollapply(logical(), 3L, sum), logical()) test(6010.527, frollapply(1:5, 3L, toString), c(NA, NA, "1, 2, 3", "2, 3, 4", "3, 4, 5")) ma = function(x, n, na.rm=FALSE) { ans = rep(NA_real_, nx<-length(x)) @@ -1705,9 +1710,9 @@ test(6010.6062, frollapply(as.list(x), rep(3,5), function(x) c(length(x[[1L]]), test(6010.607, frollapply(list(), 3, identity, by.column=FALSE), list()) test(6010.608, frollapply(list(numeric(), numeric()), 3, identity, by.column=FALSE), list()) test(6010.609, frollapply(list(numeric(), 1:3), 3, identity, by.column=FALSE), error="all vectors must have equal lengths") -test(6010.610, frollapply(numeric(), 3, identity), list()) -test(6010.611, frollapply(list(numeric(), numeric()), 3, identity), list(NULL,NULL)) -test(6010.612, frollapply(list(numeric(), 1:3), 3, identity), list(NULL, list(NA,NA,1:3))) +test(6010.610, frollapply(numeric(), 3, identity), numeric()) +test(6010.611, frollapply(list(numeric(), numeric()), 3, identity), list(numeric(),numeric())) +test(6010.612, frollapply(list(numeric(), 1:3), 3, identity), list(numeric(), data.table(c(NA,NA,1L),c(NA,NA,2L),c(NA,NA,3L)))) ## codecov memcpy calls #7304 x = data.table(v1=1:2, v2=c(1,2), v3=c("a","b"), v4=list(1,2), v5=as.complex(1:2), v6=as.raw(1:2)) @@ -1763,7 +1768,7 @@ rm(X, ans, n) test(6010.701, frollapply(1:5, 2, sum), c(NA,3L,5L,7L,9L)) test(6010.702, frollapply(1:5, 2, sum, simplify=unlist), c(NA,3L,5L,7L,9L)) test(6010.703, frollapply(1:5, 2, sum, simplify=FALSE), list(NA,3L,5L,7L,9L)) -test(6010.704, frollapply(1:5, 2, range), list(NA,1:2,2:3,3:4,4:5)) ## fill=NA could possibly be recycled to length of FUN results +test(6010.704, frollapply(1:5, 2, range), data.table(c(NA,1:4),c(NA,2:5))) ## fill=NA could possibly be recycled to length of FUN results, it is now #7313 test(6010.705, frollapply(1:5, 2, range, simplify=FALSE), list(NA,1:2,2:3,3:4,4:5)) test(6010.706, frollapply(1:5, 2, range, fill=c(NA_integer_,NA_integer_)), data.table(V1=c(NA,1:4), V2=c(NA,2:5))) test(6010.707, frollapply(1:5, 2, range, fill=c(min=NA_integer_, max=NA_integer_)), data.table(min=c(NA,1:4), max=c(NA,2:5))) @@ -1773,6 +1778,15 @@ test(6010.710, frollapply(1:5, 2, function(x) as.list(range(x)), fill=list(min=N test(6010.711, frollapply(1:5, 2, function(x) as.list(range(x)), fill=list(NA_integer_, NA_integer_), simplify=FALSE), list(list(NA_integer_, NA_integer_), as.list(1:2), as.list(2:3), as.list(3:4), as.list(4:5))) test(6010.712, as.null(frollapply(1:3, 1, function(x) if (x==1L) sum else if (x==2L) mean else `[`, simplify=TRUE)), NULL) ## as.null as we are only interested in codecov here test(6010.713, as.null(frollapply(1:3, 1, function(x) `[`, simplify = TRUE)), NULL) ## as.null as we are only interested in codecov here +# frollapply simplifylist could be more smart about median results #7313 +test(6010.751, frollapply(FUN=median, adaptive=TRUE, list(1:3,2:4), list(c(2,0,2), c(0,2,0))), list(c(NA,NA_real_,2.5), c(NA_real_,1.5,NA_real_), c(NA,NA_real_,3.5), c(NA_real_,2.5,NA_real_))) +test(6010.752, frollapply(FUN=median, adaptive=TRUE, 1:3, c(2,0,2), fill=99), c(99,NA_real_,2.5)) +test(6010.753, frollapply(FUN=median, adaptive=TRUE, c(1L,2L,4L), c(2,0,2), fill=99L), c(99,NA_real_,3)) +test(6010.754, frollapply(FUN=median, adaptive=TRUE, c(1L,2L,3L), c(2,0,2), fill=99), c(99,NA_real_,2.5)) +test(6010.755, frollapply(1:2, 1, function(i) if (i==1L) 1L else FALSE), c(1L,0L)) +test(6010.756, frollapply(1:3, 2, fill=9, function(i) if (i[1L]==1L) 1L else FALSE), c(9L,1L,0L)) ## matches fun answer +test(6010.757, frollapply(1:3, 2, fill=9L, function(i) if (i[1L]==1L) 1 else FALSE), c(9,1,0)) ## matches fun answer +test(6010.758, frollapply(1:3, 2, fill=0, function(i) TRUE), c(FALSE,TRUE,TRUE)) ## matches fun answer #### mutlithreading throttle caveats from manual: copy, fixing .internal.selfref use.fork = .Platform$OS.type!="windows" && getDTthreads()>1L @@ -1785,8 +1799,8 @@ if (use.fork) { test(6010.763, frollapply(c(1, 9), N=1L, FUN=identity), c(1,9)) ## good only because threads >= input test(6010.764, frollapply(c(1, 5, 9), N=1L, FUN=identity), c(5,5,9)) ## unexpected again is.ok = function(x) {stopifnot(is.data.table(x)); capture.output(print(attr(x, ".internal.selfref", TRUE)))!=""} - ans = frollapply(1:2, 2, data.table) ## default: fill=NA - test(6010.770, is.ok(ans[[2L]])) ## mismatch of 'fill' type so simplify=TRUE did not run rbindlist but frollapply detected DT and fixed + ans = frollapply(1:2, 2, data.table, simplify=FALSE) ## default: fill=NA + test(6010.770, is.ok(ans[[2L]])) ## frollapply detected DT and fixed ans = frollapply(1:2, 2, data.table, fill=data.table(NA)) ## fill type match test(6010.771, is.ok(ans)) ## simplify=TRUE did run rbindlist, but frollapply fixed anyway ans = frollapply(1:2, 2, data.table, fill=data.table(NA), simplify=FALSE) @@ -1892,8 +1906,8 @@ test(6010.9968, frollapply(FUN=sum, list(c(1,2,3)), list(n1=c(2,2,2)), adaptive= test(6010.9969, frollapply(FUN=sum, list(x1=c(1,2,3)), list(n1=c(2,2,2)), adaptive=TRUE, partial=TRUE, give.names=TRUE), list(x1_n1=c(1,3,5))) # frollapply doesn't handle zero-length output #7054 -test(6010.9991, frollapply(list(integer()), 0, function(x) 1), list(NULL)) -test(6010.9992, frollapply(list(integer()), list(integer()), str, adaptive=TRUE), list(NULL)) +test(6010.9991, frollapply(list(integer()), 0, function(x) 1), list(integer())) +test(6010.9992, frollapply(list(integer()), list(integer()), str, adaptive=TRUE), list(integer())) ## frolladapt test(6015.000, frolladapt(1:3, 2, align="center"), error="'align' other than 'right' has not yet been implemented")