diff --git a/.Rhistory b/.Rhistory index fd2266e0..670f1c82 100644 --- a/.Rhistory +++ b/.Rhistory @@ -1,512 +1,512 @@ -if (getOption("mosaic:parallelMessage", TRUE)) { -message("Using parallel package.\n", -" * Set seed with set.rseed().\n", -" * Disable this message with options(`mosaic:parallelMessage` = FALSE)\n") -} -parallel::mclapply( integer(n), function(...) { cull(lazyeval::f_eval(e2_lazy)) }, mc.cores = 1 ) +#' +#' @examples +#' # These should give identical results, even if the `parallel' package is loaded. +#' set.rseed(123); do(3) * resample(1:10, 2) +#' set.rseed(123); do(3) * resample(1:10, 2) +#' @export +set.rseed <- function(seed) { +if ("package:parallel" %in% search()) { +set.seed(seed, kind = "L'Ecuyer-CMRG") +parallel::mc.reset.stream() } else { -lapply( integer(n), function(...) { cull(lazyeval::f_eval(e2_lazy)) } ) +set.seed(seed) } -if (out.mode=='default') { # is there any reason to be fancier? -out.mode = 'data.frame' } -result <- switch(out.mode, -"list" = resultsList, -"data.frame" = .list2tidy.data.frame( resultsList ), -"matrix" = as.matrix( do.call( rbind, resultsList) ), -"vector" = unlist(resultsList) -) -class(result) <- c(paste('do', class(result)[1], sep="."), class(result)) -if (inherits( result, "data.frame")) { -# we get mutliple parts here if expression involves, for example, :: -# just grab last part. (paste()ing would be out of order -alt_name <- tryCatch( -tail(as.character(rhs(e2_lazy)[[1]]), 1), -error = function(e) "result" -) -names(result) <- mosaicCore::nice_names(names(result)) -names(result)[names(result) == "..result.."] <- -if(mosaicCore::nice_names(alt_name) == alt_name) alt_name else "result" +#' Do Things Repeatedly +#' +#' `do()` provides a natural syntax for repetition tuned to assist +#' with replication and resampling methods. +#' +#' @rdname do +#' @param n number of times to repeat +#' +#' @param object an object +#' +#' @param cull function for culling output of objects being repeated. If NULL, +#' a default culling function is used. The default culling function is +#' currently aware of objects of types +#' `lme`, +#' `lm`, +#' `htest`, +#' `table`, +#' `cointoss`, and +#' `matrix`. +#' +#' @param mode target mode for value returned +#' +#' @param algorithm a number used to select the algorithm used. Currently numbers below 1 +#' use an older algorithm and numbers >=1 use a newer algorithm which is faster in some +#' situations. +#' @param parallel a logical indicating whether parallel computation should be attempted +#' using the \pkg{parallel} package (if it is installed and loaded). +#' +#' @param e1 an object (in cases documented here, the result of running `do`) +#' @param e2 an object (in cases documented here, an expression to be repeated) +#' @param ... additional arguments +#' +#' @note `do` is a thin wrapper around `Do` to avoid collision with +#' [dplyr::do()] from the \pkg{dplyr} package. +#' @return `do` returns an object of class `repeater` which is only useful in +#' the context of the operator `*`. See the examples. +#' @author Daniel Kaplan (\email{kaplan@@macalaster.edu}) +#' and Randall Pruim (\email{rpruim@@calvin.edu}) +#' +#' @section Naming: +#' The names used in the object returned from `do()` are inferred from the +#' objects created in each replication. Roughly, this the strategy employed. +#' +#' * If the objects have names, those names are inherited, if possible. +#' * If the objects do not have names, but `do()` is used with a simple +#' function call, the name of that function is used. +#' Example: `do(3) * mean(~height, data = Galton)` produces a data frame with +#' a variable named `mean`. +#' * In cases where names are not easily inferred and a single result is produced, +#' it is named `result`. +#' +#' To get different names, one can rename the objects as they are created, or +#' rename the result returned from `do()`. Example of the former: +#' `do(3) * c(mean_height = mean(~height, data = resample(Galton)))`. +#' +#' @seealso [replicate()], [set.rseed()] +#' +#' @examples +#' do(3) * rnorm(1) +#' do(3) * "hello" +#' do(3) * 1:4 +#' do(3) * mean(rnorm(25)) +#' do(3) * lm(shuffle(height) ~ sex + mother, Galton) +#' do(3) * anova(lm(shuffle(height) ~ sex + mother, Galton)) +#' do(3) * c(sample.mean = mean(rnorm(25))) +#' # change the names on the fly +#' do(3) * mean(~height, data = resample(Galton)) +#' do(3) * c(mean_height = mean(~height, data = resample(Galton))) +#' set.rseed(1234) +#' do(3) * tally( ~sex|treat, data=resample(HELPrct)) +#' set.rseed(1234) # re-using seed gives same results again +#' do(3) * tally( ~sex|treat, data=resample(HELPrct)) +#' @keywords iteration +#' @export +do <- function(object, ...) { +UseMethod("do") } -attr(result, "lazy") <- e2_lazy -if (out.mode == "data.frame") attr(result, "culler") <- cull -return(result) -}) -do(5000) * c(mean_height = mean(~height, data = resample(Galton))) -}) -utils::globalVariables(c('.')) -#' @importFrom mosaicCore mosaic_formula_q mosaic_formula -# evaluate a lazy object and return the unevaluated expression if the expression -# doesn't evaluate to an existing object. -safe_eval <- function(x) { -tryCatch(eval_tidy(x), -error = function(e) x$expr) -} -.fetchFromDots <- function( dots, name, class='data.frame', n=1, default=NULL ) { -result <- dots[[name]] -if (is.null(result)) { -if (length(result) < n) return(default) -result <- dots[[n]] -if (! inherits(result, 'class') ) result <- default +#' @rdname do +#' @export +do.numeric <- function(object, ...) { +Do(n=object, ...) } +#' @rdname do +#' @export +do.default <- function(object, ...) { +dplyr::do(object, ...) +} +#' @rdname do +#' @export +Do <- function(n=1L, cull=NULL, mode='default', algorithm=1.0, parallel=TRUE) { +new( 'repeater', n=n, cull=cull, mode=mode, algorithm=algorithm, parallel=parallel) +} +#' @rdname mosaic-internal +#' @keywords internal +#' @details +#' `.make.data.frame` converts things to a data frame +#' @param x object to be converted +#' @return a data frame +.make.data.frame <- function( x ) { +if (is.data.frame(x)) return(x) +if (is.vector(x)) { +nn <- names(x) +result <- as.data.frame( matrix(x, nrow=1) ) +if (! is.null(nn) ) names(result) <- nn return(result) } -#' Check if formula +return(as.data.frame(x)) +} +null2na <- function(x) if (is.null(x)) NA else x +#' Repeater objects #' -#' @param x an object -#' @return TRUE for a formula, FALSE otherwise, even if evaluation throws an error +#' Repeater objects can be used with the `*` operator to repeat +#' things multiple time using a different syntax and different output +#' format from that used by, for example, [replicate()]. #' +#' @rdname repeater-class +#' @name repeater-class +#' @seealso [do()] +#' @section Slots: +#' \describe{ +#' \item{`n`:}{Object of class `"numeric"` indicating how many times to repeat something.} +#' \item{`cull`:}{Object of class `"function"` that culls the output from each repetition.} +#' \item{`mode`:}{Object of class `"character"` indicating the output mode +#' ('default', 'data.frame', 'matrix', 'vector', or 'list'). For most purposes 'default' (the default) +#' should suffice.} +#' \item{`algorithm`:}{an algorithm number.} +#' \item{`parallel`:}{a logical indicating whether to attempt parallel execution.} +#' } +#' @exportClass repeater +setClass('repeater', +representation = representation(n='numeric', cull='ANY', mode='character', +algorithm='numeric', parallel='logical'), +prototype = prototype(n=1, cull=NULL, mode="default", algorithm=1, parallel=TRUE) +) +# old version +if(FALSE) { +.merge_data_frames <- function(a, b) { +a <- .make.data.frame(a) +b <- .make.data.frame(b) +if (nrow(b) < 1) return (a) +if (nrow(a) < 1) return (b) +a$mosaic_merge_id <- paste('A',1:nrow(a)) +b$mosaic_merge_id <- paste('B',1:nrow(b)) +result <- merge(a,b,all=TRUE) +w <- which(names(result) == 'mosaic_merge_id') +result <- result[, -w] +return(result) +} +} #' @rdname mosaic-internal #' @keywords internal -.is.formula <- function(x) -tryCatch( inherits(x, 'formula'), error = function(e) {FALSE} ) -#' Check for simple formula +#' @details `.merge_data_frames` is a wrapper around merge #' -#' @param x a formula -#' -#' @return TRUE if formula has no left-hand side or a simple right-hand side -#' (e.g., `NULL`, ., 1, or 0) +#' @param a a data frame +#' @param b a data frame #' +#' @return a data frame +.merge_data_frames = function(a,b) { +a <- .make.data.frame(a) +b <- .make.data.frame(b) +if (nrow(b) < 1) return (a) +if (nrow(a) < 1) return (b) +missing.from.b = setdiff(names(a),names(b)) +missing.from.a = setdiff(names(b),names(a)) +for (var in missing.from.b) b[[var]] = NA +for (var in missing.from.a) a[[var]] = NA +dplyr::bind_rows(a,b) +} #' @rdname mosaic-internal #' @keywords internal -.is.simple.formula <- function(x){ -inherits(x, "formula") && -(length(x) == 2 || is.null(x[[3]]) || -(length(x[[3]]) == 1 && -((is.numeric(x[[3]]) && (x[[3]] == 0 || x[[3]] == 1)) || -(all.names(x[[3]]) %in% c("."))))) -} -# This could use a better name and a better description -#' Extract simple part from formula +#' @details +#' `.squash_names` squashes names of a data frame into a single string #' -#' @param x a formula +#' @param object an object +#' @param sep a character #' -#' @return simple part of formula or NULL if formula is not simple -#' -#' @rdname mosaic-internal -#' @keywords internal -.simple.part <- function(x) { -if (! .is.simple.formula(x) ) { -return(NULL) -} else { -return(x[[2]]) +#' @return a character vector +.squash_names <- function(object,sep=":") { +if ( ncol(object) < 1 ) {return(rep("",nrow(object)))} +result <- object[,1] +if ( ncol(object) < 2 ) {return(as.character(result))} +for (c in 2:ncol(object)) { +result <- paste(result, as.character(object[,c]), sep=sep) } +return(result) } -.flatten <- function(x) { -result <- c() -for (item in x) result <- c(result, item) +#' @rdname do +#' @param x an object created by `do`. +#' @export +print.repeater <- function(x, ...) +{ +message(paste('This repeats a command',x@n,'times. Use with *.')) +return(invisible(x)) +} +.list2tidy.data.frame <- function (l) { +# see if we really just have a vector +ul <- unlist( l ) +if ( length(ul) == length(l) ) { +result <- data.frame(..result.. = as.vector(ul)) +row.names(result) <- NULL +if( !is.null(names(l[[1]])) ) names(result) <- names(l[[1]]) +return(result) +} +# if each element is a data frame, combine them with bind_rows +if ( all( sapply( l, is.data.frame ) ) ) { +return( +parallel::mclapply(l, function(x) {mutate(x, .row= 1:n())}) %>% +dplyr::bind_rows() %>% +mutate(.index = c(1, 1 + cumsum( diff(.row) != 1 ))) +) +} +# If rbind() works, do it +tryCatch( +return ( as.data.frame( do.call( rbind, l) ) ), +error=function(e) {} +) +if (all (sapply(l, length) ) == length(l[[1]]) ) { +result <- as.data.frame( matrix( ul, nrow=length(l) ) ) +names(result) <- names(l[[1]]) return(result) } -#' Aggregate for mosaic +# nothing worked. Just return the list as is. +return( l ) +} +#' Cull objects used with do() #' -#' Compute function on subsets of a variable in a data frame. +#' The [do()] function facilitates easy replication for +#' randomization tests and bootstrapping (among other things). Part of what +#' makes this particularly useful is the ability to cull from the objects +#' produced those elements that are useful for subsequent analysis. +#' `cull_for_do` does this culling. It is generic, and users +#' can add new methods to either change behavior or to handle additional +#' classes of objects. #' -#' @rdname aggregatingAux -#' @return a vector -#' @param formula a formula. Left side provides variable to be summarized. Right side and condition -#' describe subsets. If the left side is empty, right side and condition are -#' shifted over as a convenience. -#' @param data a data frame. -#' Note that the default is `data=parent.frame()`. This makes it convenient to -#' use this function interactively by treating the working environment as if it were -#' a data frame. But this may not be appropriate for programming uses. -#' When programming, it is best to use an explicit `data` argument -#' -- ideally supplying a data frame that contains the variables mentioned -#' in `formula`. -#' @param FUN a function to apply to each subset -#' @param groups grouping variable that will be folded into the formula (if there is room for it). -#' This offers some additional flexibility in how formulas can be specified. -#' @param subset a logical indicating a subset of `data` to be processed. -#' @param drop a logical indicating whether unused levels should be dropped. -#' @param \dots additional arguments passed to `FUN` -#' @param .format format used for aggregation. `"default"` and `"flat"` are equivalent. -# #' @param format format used for aggregation. \code{"default"} and \code{"flat"} are equivalent. -# #' Ignored if \code{.format} is not \code{NULL}. -#' @param .overall currently unused -#' @param .name a name used for the resulting object -# #' @param name a name used for the resulting object. Ignored if \code{.format} is not \code{NULL}. -#' @param .envir an environment in which to evaluate expressions -# #' @param envir an environment in which to evaluate expressions. -# #' Ignored if \code{.envir} is not \code{NULL}. -#' @param .multiple a logical indicating whether FUN returns multiple values -# #' @param multiple a logical indicating whether FUN returns multiple values -#' Ignored if `.multiple` is not `NULL`. +#' @param object an object to be culled +#' @param ... additional arguments (currently ignored) #' -#' @examples -#' if (require(mosaicData)) { -#' maggregate( cesd ~ sex, HELPrct, FUN=mean ) -#' # using groups instead -#' maggregate( ~ cesd, groups = sex, HELPrct, FUN=sd ) -#' # the next four all do the same thing -#' maggregate( cesd ~ sex + homeless, HELPrct, FUN=mean ) -#' maggregate( cesd ~ sex | homeless, HELPrct, FUN=sd ) -#' maggregate( ~ cesd | sex , groups= homeless, HELPrct, FUN=sd ) -#' maggregate( cesd ~ sex, groups = homeless, HELPrct, FUN=sd ) -#' # this is unusual, but also works. -#' maggregate( cesd ~ NULL , groups = sex, HELPrct, FUN=sd ) -#' } +#' @details When `do(n) * expression` is evaluated, `expression` +#' is evaluated `n` times to produce a list of `n` result objects. +#' `cull_for_do` is then applied to each element of this list to +#' extract from it the information that should be stored. For example, +#' when applied to a object of class `"lm"`, +#' the default `cull_for_do` extracts the coefficients, coefficient +#' of determinism, an the estimate for the variance, etc. #' #' @export -maggregate <- -function( -formula, -data=parent.frame(), -FUN, -groups=NULL, -subset, -drop=FALSE, -..., -.format = c('default', 'table', 'flat'), -.overall = mosaic.par.get("aggregate.overall"), -# .multiple = NULL, -.multiple=FALSE, -# .name = NULL, -.name = deparse(substitute(FUN)), -# .envir = NULL, -.envir = parent.frame () -) { -# if (inherits(formula, c("environment", "data.frame")) && -# inherits(data, "formula")) { -# temp <- formula -# formula <- data -# data <- temp -# } -# -if (! inherits(data, c("environment", "data.frame")) ) { -if (inherits(data, c("tbl"))) -stop ("Your tbl is not a data.frame. Perhaps you need dplyr functions here.", -call. = FALSE) -else -stop("data must be an environment or data.frame.", call. = FALSE) -} -formula <- mosaicCore::mosaic_formula_q(formula, groups = groups, envir = .envir) -if (length(formula) == 2) { -return(FUN( eval(formula[[2]], data, .envir), ...)) -} -dots <- list(...) -groupName <- ".group" # gets changed to something better later when possible. -.format <- match.arg(.format, c('default', 'table', 'flat')) -evalF <- mosaicCore::evalFormula(formula, data=data) -if (!missing(subset)) { -subset <- eval(substitute(subset), data, environment(formula)) -if (!is.null(evalF$left)) evalF$left <- evalF$left[subset,] -if (!is.null(evalF$right)) evalF$right <- evalF$right[subset,] -if (!is.null(evalF$condition)) evalF$condition <- evalF$condition[subset,] -} -# this should now be standardized by the call to mosaic_formula_q() above. -# if ( is.null( evalF$left ) ) { -# evalF$left <- evalF$right -# evalF$right <- evalF$condition -# evalF$condition <- NULL -# } -if ( is.null(evalF$left) || ncol(evalF$left) < 1 ) { -if (ncol(evalF$right) > 1) warning("Too many variables in rhs; ignoring all but first.") -if (.format=="table") { -if (.multiple) stop ("table view unavailable for this function.") -ldata <- evalF$right[,1,drop=FALSE] -gdata <- group_by(data) -res <- as.data.frame( -dplyr::do(gdata, foo = FUN( as.data.frame(.)[, 1], ...) ) ) -names(res)[ncol(res)] <- gsub(".*::", "", .name) -return(res) -return(evalF$right[,1,drop=FALSE] %>% -group_by() %>% -dplyr::do( do.call(FUN, list(evalF$right[,1], ...)) ) %>% -as.data.frame() -) -# return(plyr::ddply(evalF$right[,1,drop=FALSE], names(NULL), -# function(x) do.call(FUN, list(evalF$right[,1], ...)) -# )[,-1]) # remove the .id column since it is uninteresting here. +#' @examples +#' cull_for_do(lm(length ~ width, data = KidsFeet)) +#' do(1) * lm(length ~ width, data = KidsFeet) +cull_for_do <- function(object, ...) { +UseMethod("cull_for_do") } -return( do.call(FUN, alist(evalF$right[,1], ...) ) ) -} else { -if (ncol(evalF$left) > 1) warning("Too many variables in lhs; ignoring all but first.") -if (.format=='table') { -if (.multiple) stop ("table view unavailable for this function.") -ldata <- mosaicCore::joinFrames(evalF$left[,1,drop=FALSE], evalF$right, evalF$condition) -ldata$.var <- ldata[, 1] -gdata <- do.call( group_by, c(list(ldata), -lapply(union(names(evalF$right), names(evalF$condition)), -as.name )) ) -res <- as.data.frame( -dplyr::do(gdata, foo = FUN( as.data.frame(.)[, 1], ...) ) ) -names(res)[ncol(res)] <- gsub(".*::", "", .name) -# res <- plyr::ddply( -# mosaicCore::joinFrames(evalF$left[,1,drop=FALSE], evalF$right, evalF$condition), -# union(names(evalF$right), names(evalF$condition)), -# function(x) do.call(FUN, list(x[,1], ...)) -# ) -} else { -res <- lapply( split( evalF$left[, 1], -mosaicCore::joinFrames(evalF$right, evalF$condition), -drop=drop), -function(x) { do.call(FUN, alist(x, ...) ) } -) -groupName <- paste(c(names(evalF$right), names(evalF$condition)), collapse=".") -if (! .multiple ) res <- unlist(res) -if (! is.null(evalF$condition) ) { -if (ncol(evalF$left) > 1) message("Too many variables in lhs; ignoring all but first.") -res2 <- lapply( split( evalF$left[, 1], evalF$condition, drop=drop), -function(x) { do.call(FUN, alist(x, ...) ) } -) -groupName <- paste(names(evalF$condition), collapse=".") -if (!.multiple) { -res <- c( res, unlist(res2) ) -} else { -res <- c(res, res2) +#' @export +cull_for_do.default <- function(object, ...) { +object } +#' @export +cull_for_do.fitdistr <- function(object, ...) { +est <- object$estimate +names(est) <- paste0(names(est), ".est") +se <- object$sd +names(se) <- paste0(names(se), ".se") +c(est, se) } -if (.multiple) { -result <- res -res <- result[[1]] -for (item in result[-1]) { -res <- as.data.frame(rbind(res,item)) +#' @export +cull_for_do.aov <- function(object, ...) { +cull_for_do(anova(object)) } -if ( nrow(res) == length(names(result)) ) { -res[groupName] <- names(result) -} else { -res[groupName] <- rep(names(result), each=nrow(res) / length(names(result)) ) +#' @export +cull_for_do.anova <- function(object, ...) { +res <- as.data.frame(object) +res <- cbind (data.frame(source=row.names(res)), res) +names(res)[names(res) == "Df"] <- "df" +names(res)[names(res) == "Sum Sq"] <- "SS" +names(res)[names(res) == "Mean Sq"] <- "MS" +names(res)[names(res) == "F value"] <- "F" +names(res)[names(res) == "Pr(>F)"] <- "pval" +names(res)[names(res) == "Sum of Sq"] <- "diff.SS" +names(res)[names(res) == "Res.Df"] <- "res.df" +return(res) +return( data.frame( +SSTotal= sum(object$`Sum Sq`), +SSModel= object$`Sum Sq`[1], +SSError= object$`Sum Sq`[2], +MSTotal= sum(object$`Sum Sq`), +MSModel= object$`Mean Sq`[1], +MSError= object$`Mean Sq`[2], +F=object$`F value`[1], +dfModel=object$Df[1], +dfError=object$Df[2], +dfTotal=sum(object$Df) +) ) } -res <- res[, c(ncol(res), 1:(ncol(res) -1))] +#' @export +cull_for_do.table <- function(object, ...) { +result <- data.frame(object) +res <- result[[ncol(result)]] +nms <- as.character(result[[1]]) +if (ncol(result) > 2) { +for (k in 2:(ncol(result)-1)) { +nms <- paste(nms, result[[k]],sep=".") } } +names(res) <- nms +return(res) } -w <- grep("V[[:digit:]]+", names(res)) -if (length(w) == 1) { -names(res)[w] <- gsub(".*:{2,3}", "", .name) -} else { -names(res)[w] <- paste0( gsub(".*:{2,3}", "", .name), 1:length(w) ) -} -row.names(res) <- NULL -return( res ) -} -# for handling functions of two inputs -# under construction still -maggregate2 <- function(formula, data=parent.frame(), FUN, subset, -overall=mosaic.par.get("aggregate.overall"), -.format=c('default', 'table', 'flat'), drop=FALSE, -.multiple=FALSE, -groups=NULL, -.name = deparse(substitute(FUN)), -...) { -dots <- list(...) -formula <- mosaicCore::mosaic_formula_q(formula, groups=groups, as.environment(data)) -.format <- match.arg(.format) -evalF <- mosaicCore::evalFormula(formula, data=data) -if (!missing(subset)) { -subset <- eval(substitute(subset), data, environment(formula)) -if (!is.null(evalF$left)) evalF$left <- evalF$left[subset,] -if (!is.null(evalF$right)) evalF$right <- evalF$right[subset,] -if (!is.null(evalF$condition)) evalF$condition <- evalF$condition[subset,] -} -# this should now be standardized by the call to mosaic_formula_q() above. -# if ( is.null( evalF$left ) ) { -# evalF$left <- evalF$right -# evalF$right <- evalF$condition -# evalF$condition <- NULL -# } -if ( is.null(evalF$left) || ncol(evalF$left) < 1 ) -stop("formula must have lhs.") -if (ncol(evalF$left) > 1) stop("Too many variables in lhs.") -if (.format=='table') { -if (.multiple) stop ("table view unavailable.") -ldata <- mosaicCore::joinFrames(evalF$left[,1,drop=FALSE], evalF$right, evalF$condition) -ldata$.var <- ldata[, 1] -gdata <- do.call( group_by, c(list(ldata), as.name(names(evalF$condition)) ) ) -res <- as.data.frame( dplyr::do(gdata, foo = FUN( as.data.frame(.)[,1], as.data.frame(.)[,2], ...) ) ) -names(res)[ncol(res)] <- gsub(".*::", "", .name) +#' @export +cull_for_do.aggregated.stat <- function(object, ...) { +result <- object +res <- as.vector(result[, "S"]) # ncol(result)] +names(res) <- +paste( attr(object, 'stat.name'), +.squash_names(object[,1:(ncol(object)-3),drop=FALSE]), sep=".") +return(res) +} +#' @export +cull_for_do.lme <- function(object, ...) { +result <- object +names(result) <- mosaicCore::nice_names(names(result)) +return( object$coef$fixed ) +} +#' @export +cull_for_do.lm <- function(object, ...) { +sobject <- summary(object) +Fstat <- sobject$fstatistic[1] +DFE <- sobject$fstatistic["dendf"] +DFM <- sobject$fstatistic["numdf"] +if (!is.null(Fstat)) { +names(Fstat) <- "F" +result <- c(coef(object), sigma=sobject$sigma, +r.squared = sobject$r.squared, +Fstat, +DFM, +DFE) } else { -res <- lapply( split( evalF$left[,1], -mosaicCore::joinFrames(evalF$right, evalF$condition), -drop=drop), -function(x) { do.call(FUN, alist(x, ...) ) } +result <- c(coef(object), sigma=sobject$sigma, +r.squared = sobject$r.squared ) -groupName <- paste(c(names(evalF$right), names(evalF$condition)), collapse=".") -if (! .multiple ) res <- unlist(res) -if (! is.null(evalF$condition) ) { -if (ncol(evalF$left) > 1) message("Too many variables in lhs; ignoring all but first.") -res2 <- lapply( split( evalF$left[,1], evalF$condition, drop=drop), -function(x) { do.call(FUN, alist(x, ...) ) } +} +mosaicCore::vector2df(result, nice_names = TRUE) +} +# @export +# cull_for_do.groupwiseModel <- function(object, ...) { +# sobject <- summary(object) +# Fstat <- sobject$fstatistic[1] +# DFE <- sobject$fstatistic["dendf"] +# DFM <- sobject$fstatistic["numdf"] +# if (!is.null(Fstat)) { +# names(Fstat) <- "F" +# result <- c(coef(object), sigma=sobject$sigma, +# r.squared = sobject$r.squared, +# Fstat, +# DFM, +# DFE) +# } else { +# result <- c(coef(object), sigma=sobject$sigma, +# r.squared = sobject$r.squared +# ) +# } +# names(result) <- nice_names(names(result)) +# return(result) +# } +# +#' @export +cull_for_do.htest <- function(object, ...) { +if (is.null(object$conf.int)) { +result <- data.frame( +statistic = null2na(object$statistic), +parameter = null2na(object$parameter), +p.value = null2na(object$p.value), +method = null2na(object$method), +alternative = null2na(object$alternative), +data = null2na(object$data.name) ) -groupName <- paste(names(evalF$condition), collapse=".") -if (!.multiple) { -res <- c( res , unlist(res2) ) } else { -res <- c(res, res2) +result <- data.frame( +statistic = null2na(object$statistic), +parameter = null2na(object$parameter), +p.value = null2na(object$p.value), +conf.level = attr(object$conf.int,"conf.level"), +lower = object$conf.int[1], +upper = object$conf.int[2], +method = null2na(object$method), +alternative = null2na(object$alternative), +data = null2na(object$data.name) +) } +if ( !is.null(names(object$statistic)) ) +names(result)[1] <- names(object$statistic) +if ( !is.null(names(object$parameter)) ) +names(result)[2] <- names(object$parameter) +return(result) } -if (.multiple) { -result <- res -res <- result[[1]] -for (item in result[-1]) { -res <- as.data.frame(rbind(res,item)) +# if (inherits(object, 'table') ) { +# nm <- names(object) +# result <- as.vector(object) +# names(result) <- nm +# return(result) +# } +#' @export +cull_for_do.cointoss <- function(object, ...) { +return( c(n=attr(object,'n'), +heads=sum(attr(object,'sequence')=='H'), +tails=sum(attr(object,'sequence')=='T'), +prop=sum(attr(object,'sequence')=="H") / attr(object,'n') +) ) } -if ( nrow(res) == length(names(result)) ) { -res[groupName] <- names(result) +#' @export +cull_for_do.matrix <- function(object, ...) { +if (ncol(object) == 1) { +nn <- rownames(object) +object <- as.vector(object) +if (is.null(nn)) { +names(object) <- paste('v',1:length(object),sep="") } else { -res[groupName] <- rep(names(result), each=nrow(res) / length(names(result)) ) +names(object) <- nn +} +return(object) } -res <- res[, c(ncol(res), 1:(ncol(res) -1))] +if (nrow(object) > 1) { +res <- as.data.frame(object) +res[[".row"]] <- row.names(object) +return(res) } +# if we get here, we have a 1-row or empty matrix +row.names(object) <- NULL +object +} +#' @rdname do +#' @aliases *,repeater,ANY-method +#' @export +setMethod( +"*", +signature(e1 = "repeater", e2="ANY"), +function (e1, e2) +{ +e2_lazy <- rlang::enquo(e2) +# e2unevaluated = substitute(e2) +# if ( ! is.function(e2) ) { +# frame <- parent.frame() +# e2 = function(){eval(e2unevaluated, envir=frame) } +# } +n = e1@n +cull = e1@cull +if (is.null(cull)) { +cull <- cull_for_do +} +out.mode <- if (!is.null(e1@mode)) e1@mode else 'default' +resultsList <- if( e1@parallel && "package:parallel" %in% search() ) { +if (getOption("mosaic:parallelMessage", TRUE)) { +message("Using parallel package.\n", +" * Set seed with set.rseed().\n", +" * Disable this message with options(`mosaic:parallelMessage` = FALSE)\n") } -w <- grep("V[[:digit:]]+", names(res)) -if (length(w) == 1) { -names(res)[w] <- gsub(".*:{2,3}", "", .name) +parallel::mclapply( integer(n), function(...) { cull(rlang::eval_tidy(e2_lazy)) }, mc.cores = 4 ) } else { -names(res)[w] <- paste0( gsub(".*:{2,3}", "", .name), 1:length(w) ) -} -row.names(res) <- NULL -return( res ) -} -safe_eval(9) -?rlang::lifecycle -covr::report() -library("rlang", lib.loc="~/R/win-library/3.5") -install.packages("rlang") -install_github("https://github.com/r-lib/rlang.git") -install_github.packages("https://github.com/r-lib/rlang.git") -install_github.packages(r-lib/rlang) -install_github(r-lib/rlang) -library(devtools) -library("devtools") -library("devtools", lib.loc="~/R/win-library/3.5") -remove.packages(c("git2r", "devtools", "rversions")) -install.packages("devtools") -install_github("r-lib/rlang") -devtools::install_github("r-lib/rlang") -install.packages("rlang") -library("rlang", lib.loc="~/R/win-library/3.5") -?parent.frame -install.packages("rlang") -install.packages("rlang") -library(mosaic) -mplot(KidsFeet) -install.packages("manipulate") -mplot(KidsFeet) -methods("mplot") -mplot(lm(length ~ width, data = KidsFeet)) -freqpolygon( ~ Sepal.Length, data = iris) -historam( ~ Sepal.Length, data = iris) -histogram( ~ Sepal.Length, data = iris) -freqpolygon(~Sepal.Length | Species, data = iris) -example(freqpolygon) -library(mosaic) -mplot(KidsFeet) -2 -source('~/GitHub/ExampleRPackage/mosaic/R/mPlots.R') -mplot(KidsFeet) -detach(mosaic) -source('~/GitHub/ExampleRPackage/mosaic/R/mPlots.R') -mplot(KidsFeet) -source('~/GitHub/ExampleRPackage/mosaic/R/mPlots.R') -source('~/GitHub/ExampleRPackage/mosaic/R/mPlots.R') -mplot(KidsFeet) -source('~/GitHub/ExampleRPackage/mosaic/R/mplot.R') -mplot(KidsFeet) -install.packages("manipulate") -mplot(kidsFeet) -install.packages("manipulate") -install.packages("manipulate") -library(mosaicData) -mplot(KidsFeet) -source('~/GitHub/ExampleRPackage/mosaic/R/mplot.R') -mplot(KidsFeet) -source('~/GitHub/ExampleRPackage/mosaic/R/mPlots.R') -mplot(KidsFeet) -source('~/GitHub/ExampleRPackage/mosaic/R/mPlots.R') -mplot(KidsFeet) -library(mosaic) -mplot(KidsFeet) -library("manipulate", lib.loc="~/R/win-library/3.5") -mplot(KidsFeet) -source('~/GitHub/ExampleRPackage/mosaic/R/utils.R') -mplot(KidsFeet) -source('~/GitHub/ExampleRPackage/mosaic/R/mplot.R') -source('~/GitHub/ExampleRPackage/mosaic/R/mPlots.R') -source('~/GitHub/ExampleRPackage/mosaic/R/mPlots.R') -mplot(KidsFeet) -library(mosaic) -mplot(KidsFeet) -library(mosaic) -mplot(KidsFeet) -library(mosaic) -mplot(KidsFeet) -source('~/GitHub/ExampleRPackage/mosaic/R/mPlots.R') -mplot(KidsFeet) -library("manipulate", lib.loc="~/R/win-library/3.5") -mplot(KidsFeet) -mplot(KidsFeet) -library(mosaic) -mplot(KidsFeet) -library("mosaic", lib.loc="~/R/win-library/3.5") -mplot(KidsFeet) -mosaic::mplot(KidsFeet) -library(ggformula) -mplot(KidsFeet) -library("ggformula", lib.loc="~/R/win-library/3.5") -library("ggplot2", lib.loc="~/R/win-library/3.5") -mplot(KidsFeet) -debug(mplot(KidsFeet)) -debugonce(mplot(KidsFeet)) -debugonce(mplot) -KidsFeet -debug() -debug({ mplot(KidsFeet) }) -devtools::load_all(".") -mplot(KidsFeet) -library(mosaic) -mplot(KidsFeet) -library("manipulate", lib.loc="~/R/win-library/3.5") -mplot(KidsFeet) -library("ggplot2", lib.loc="~/R/win-library/3.5") -library("ggformula", lib.loc="~/R/win-library/3.5") -mplot(KidsFeet) -mplot(KidsFeet) -mplot(KidsFeet) -library(mosaic) -mplot(KidsFeet) -library(mosaic) -mplot(KidsFeet) -library(mosaic) -mplot(KidsFeet) -library(mosaic) -mplot(KidsFeet) -library(mosaic) -mplot(KidsFeet) -library(mosaic) -mplot(KidsFeet) -library(mosaic) -mplot(KidsFeet) -library(mosaic) -mplot(KidsFeet) -library(mosaic) -mplot(KidsFeet) -library(mosaic) -library(mosaic) -library(mosaic) -library(mosaic) -library(mosaic) -library(mosaic) -library(mosaic) -bootstrap <- do(500) * diffmean( age ~ sex, data = resample(HELPrct) ) -confint(bootstrap) -library(mosaic) -library(mosaic) -bootstrap <- do(500) * diffmean( age ~ sex, data = resample(HELPrct) ) -confint(bootstrap) -library(mosaic) -bootstrap <- do(500) * diffmean( age ~ sex, data = resample(HELPrct) ) -confint(bootstrap) -library(mosaic) -bootstrap <- do(500) * diffmean( age ~ sex, data = resample(HELPrct) ) -confint(bootstrap) -confint(bootstrap, method ="stderr") -library(mosaic) -confint(bootstrap, method ="stderr") %>% dput() -library(mosaic) -confint(bootstrap, method ="stderr") %>% dput() -library(mosaic) -bootstrap <- do(500) * diffmean( age ~ sex, data = resample(HELPrct) ) %>% dput() -bootstrap <- do(500) * diffmean( age ~ sex, data = resample(HELPrct) ) -bootstrap <- do(500) * diffmean( age ~ sex, data = resample(HELPrct) ) %>% dput() -bootstrap <- do(50) * diffmean( age ~ sex, data = resample(HELPrct) ) %>% dput() +lapply( integer(n), function(...) { cull(rlang::eval_tidy(e2_lazy)) } ) +} +if (out.mode=='default') { # is there any reason to be fancier? +out.mode = 'data.frame' +} +result <- switch(out.mode, +"list" = resultsList, +"data.frame" = .list2tidy.data.frame( resultsList ), +"matrix" = as.matrix( do.call( rbind, resultsList) ), +"vector" = unlist(resultsList) +) +class(result) <- c(paste('do', class(result)[1], sep="."), class(result)) +if (inherits( result, "data.frame")) { +# we get mutliple parts here if expression involves, for example, :: +# just grab last part. (paste()ing would be out of order +alt_name <- tryCatch( +tail(as.character(rhs(e2_lazy)[[1]]), 1), +error = function(e) "result" +) +names(result) <- mosaicCore::nice_names(names(result)) +names(result)[names(result) == "..result.."] <- +if(mosaicCore::nice_names(alt_name) == alt_name) alt_name else "result" +} +attr(result, "lazy") <- e2_lazy +if (out.mode == "data.frame") attr(result, "culler") <- cull +return(result) +}) +do(3000) * rnorm(1) +do(3000) * "hello" +do(3000) * 1:4 +do(3000) * mean(rnorm(25)) +do(3000) * lm(shuffle(height) ~ sex + mother, Galton) +do(3000) * anova(lm(shuffle(height) ~ sex + mother, Galton)) +do(3000) * c(sample.mean = mean(rnorm(25))) +# change the names on the fly +do(3000) * mean(~height, data = resample(Galton)) +do(3000) * c(mean_height = mean(~height, data = resample(Galton))) +set.rseed(1234) +do(3000) * tally( ~sex|treat, data=resample(HELPrct)) +set.rseed(1234) # re-using seed gives same results again +do(3000) * tally( ~sex|treat, data=resample(HELPrct)) +}) diff --git a/R/binom.test.R b/R/binom.test.R index ef8b13fc..7feeb9ff 100644 --- a/R/binom.test.R +++ b/R/binom.test.R @@ -175,7 +175,7 @@ setMethod( { if (is.null(data)) { if (! is.null(n)) stop("Improper `n'; did you forget `data = ' perhaps?", call. = FALSE) - data <- rlang::fn_env(x) + data <- environment(x) } formula <- mosaic_formula_q(x, groups = NULL, max.slots = 1) diff --git a/R/do.R b/R/do.R index 35ec5032..39d6c823 100644 --- a/R/do.R +++ b/R/do.R @@ -6,7 +6,6 @@ require(compiler) require(rlang) parallel::detectCores() NA - #' Set seed in parallel compatible way #' @@ -270,7 +269,7 @@ print.repeater <- function(x, ...) # if each element is a data frame, combine them with bind_rows if ( all( sapply( l, is.data.frame ) ) ) { return( - lapply(l, function(x) {mutate(x, .row= 1:n())}) %>% + parallel::mclapply(l, function(x) {mutate(x, .row= 1:n())}) %>% dplyr::bind_rows() %>% mutate(.index = c(1, 1 + cumsum( diff(.row) != 1 ))) ) @@ -573,4 +572,4 @@ setMethod( attr(result, "lazy") <- e2_lazy if (out.mode == "data.frame") attr(result, "culler") <- cull return(result) - }) + }) \ No newline at end of file diff --git a/RBuildIgnore/Do Profile with Speedup.Rprofvis b/RBuildIgnore/Do Profile with Speedup.Rprofvis new file mode 100644 index 00000000..6da77419 --- /dev/null +++ b/RBuildIgnore/Do Profile with Speedup.Rprofvis @@ -0,0 +1,24 @@ + + +
+ +