diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..863d843 --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +**DS_Store** +**Rcheck** +**tar.gz +**Rapp.history +**.pdf +**.RData +**.o +**.so +forLater/josh/** \ No newline at end of file diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..f8073fb --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "C-software"] + path = C-software + url = https://github.com/selective-inference/C-software.git diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..76ab515 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,21 @@ +language: r +cache: packages +sudo: false +r: + - oldrel + - release + - devel +addons: + apt: + packages: libmpfr-dev +warnings_are_errors: true +before_install: + - tlmgr install index # for texlive and vignette? + - R -e 'install.packages(c("Rcpp", "intervals", "adaptMCMC", "glmnet"), repos="http://cloud.r-project.org")' + - cd C-software + - git submodule init + - git submodule update + - cd .. + - make src + - make Rcpp + - cd selectiveInference diff --git a/C-software b/C-software new file mode 160000 index 0000000..563bf1a --- /dev/null +++ b/C-software @@ -0,0 +1 @@ +Subproject commit 563bf1aa370b55f8343693224717047f1df0d0c3 diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..671099f --- /dev/null +++ b/Makefile @@ -0,0 +1,17 @@ +Rcpp: + - rm -f selectiveInference/src/RcppExports.cpp + - rm -f selectiveInference/R/RcppExports.R + Rscript -e "library(Rcpp); Rcpp::compileAttributes('selectiveInference')" + +install: Rcpp src + R CMD INSTALL selectiveInference + +build: src + R CMD build selectiveInference + +src: + cp C-software/src/* selectiveInference/src + +check: Rcpp build + R CMD build selectiveInference + R CMD check selectiveInference_1.2.2.tar.gz # fix this to be a script variable \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..0a60855 --- /dev/null +++ b/README.md @@ -0,0 +1,31 @@ +# R-software +R software for [selective inference](http://cran.r-project.org/web/packages/selectiveInference/). +Authors: Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid +Maintainer: Rob Tibshirani + +New tools for inference after selection, for use with forward stepwise regression, least angle regression, the lasso, and the many means problem. The package is available on [CRAN](http://cran.r-project.org/web/packages/selectiveInference/). See [this paper](http://www.pnas.org/content/112/25/7629.full) for a high level introduction to selective inference. + +Code is in the directory selectiveInference/R. +* funs.common.R: Basic functions used by many other functions, such as standardization. +* funs.fixed.R: Inference for LASSO at a fixed, deterministic value of lambda. +* funs.fs.R: Inference for forward stepwise. +* funs.groupfs.R: Inference for forward stepwise with groups of variables, e.g. factors. +* funs.inf.R: Common functions for inference with fixed, fs, lar, and manymeans (but not group). +* funs.lar.R: Inference for least angle regression. +* funs.max.R: Some numerical approximations. Deprecated? + +## Installation +The latest release of the package can be installed through CRAN: + +```R +install.packages("selectiveInference") +``` +Code in repo is under development and may be unstable. + +## For development + +You will have to run + +``` +make Rcpp +``` \ No newline at end of file diff --git a/forLater/estimateLambda.Rd b/forLater/estimateLambda.Rd new file mode 100644 index 0000000..9cb1cdb --- /dev/null +++ b/forLater/estimateLambda.Rd @@ -0,0 +1,70 @@ +\name{estimateLambda} +\alias{estimateLambda} +%- Also NEED an '\alias' for EACH other topic documented here. +\title{ +Estimates the lasso tuning parameter lambda. +} +\description{ +Estimates the lasso tuning parameter lambda, for use in the selectiveInference +package +} +\usage{ +estimateLambda(x, sigma, nsamp=1000) +} +\arguments{ + \item{x}{ +Matrix of predictors (n by p) +} + \item{sigma}{ +Estimate of error standard deviation +} +\item{nsamp}{Number of Monte carlo samples used for the estimation.} +} +\details{ +This function estimates the lasso tuning parameter lambda, using the estimate +2*E(||X^T eps||_infty) where eps ~ N(0,sigma^2), a vector of length n. +This estimate was proposed by Negahban et al (2012). +} +\value{ +\item{sigmahat}{The estimate of sigma} +\item{df}{The degrees of freedom of lasso fit used} +} +\references{ +Negahban, S. N., +Ravikumar, P., +Wainwright, M. J. +and Yu, B. +(2012). A unified +framework for high-dimensional analysis of +M-estimators with decomposable regularizers. +Statistical Science vol. 27, p 538-557. +} + +\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} + +\examples{ +#NOT RUN +#set.seed(43) +#n=50 +#p=10 +#sigma=.7 +#x=matrix(rnorm(n*p),n,p) +#x=scale(x,T,F) +#beta=c(3,2,0,0,rep(0,p-4)) +#y=x%*%beta+sigma*rnorm(n) +#y=y-mean(y) +# +#estimate lambda usingthe known value of sigma +#lamhat=estimateLambda(x,sigma=.7) +# +#first estimate sigma +#sigmahat=estimateSigma(x,y)$sigmahat +#lamhat=estimateLambda(x,sigma=sigmahat) + +#compare to estimate from cv + +#out=cv.glmnet(x,y) +#out$lambda.min*n #remember that value from glmnet must be + # multiplied by n, to make it comparable. +} + diff --git a/forLater/fixedLassoPoly.Rd b/forLater/fixedLassoPoly.Rd new file mode 100644 index 0000000..88c5446 --- /dev/null +++ b/forLater/fixedLassoPoly.Rd @@ -0,0 +1,81 @@ +\name{fixedLassoPoly} +\alias{fixedLassoPoly} + +\title{ +Compute polyhedral constraints for a LASSO problem with +a fixed value of lambda. +} +\description{ +Compute polyhedral representation of the selection region of Lee et al. (2016). +By construction, y should satisfy A %*% y elementwise less then or equal b. +} +\usage{ +fixedLassoPoly(X, y, lambda, beta, active, inactive = FALSE) +} +\arguments{ +\item{X}{ +Design matrix of LASSO problem. +} +\item{y}{ +Response of LASSO problem. +} +\item{lambda}{ +Value of regularization parameter. +} +\item{beta}{ +Solution of LASSO problem with regularization parameter set to lambda. +} +\item{active}{ +Active set of the LASSO problem as a boolean vector. Should correspond +to the non-zeros of beta. +} +\item{inactive}{ +Form the inactive constraints as well? +} +} +\details{ +This function computes +the polyhedral representation of the selection region of Lee et al. (2016). +} + +\value{ +\item{A}{Linear part of the affine inequalities.} +\item{b}{RHS offset the affine inequalities.} +} + +\references{ +Jason Lee, Dennis Sun, Yuekai Sun, and Jonathan Taylor (2016). +Exact post-selection inference, with application to the lasso. Annals of Statistics, 44(3), 907-927. + +Jonathan Taylor and Robert Tibshirani (2017) Post-selection inference for math L1-penalized likelihood models. +Canadian Journal of Statistics, xx, 1-21. (Volume still not posted) +} +\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} + +\examples{ + +set.seed(43) +n = 50 +p = 10 +sigma = 1 + +x = matrix(rnorm(n*p),n,p) +x = scale(x,TRUE,TRUE) + +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) + +# first run glmnet +gfit = glmnet(x,y,standardize=FALSE) + +# extract coef for a given lambda; note the 1/n factor! +# (and we don't save the intercept term) +lambda = .8 +beta = coef(gfit, s=lambda/n, exact=TRUE)[-1] +active = (beta != 0) + +fixedLassoPoly(x, y, lambda, beta, active) +fixedLassoPoly(x, y, lambda, beta, active, inactive=TRUE) + +} + \ No newline at end of file diff --git a/forLater/funs.fixed.R b/forLater/funs.fixed.R new file mode 100644 index 0000000..ac00545 --- /dev/null +++ b/forLater/funs.fixed.R @@ -0,0 +1,198 @@ +# Lasso inference function (for fixed lambda). Note: here we are providing inference +# for the solution of +# min 1/2 || y - \beta_0 - X \beta ||_2^2 + \lambda || \beta ||_1 + +fixedLassoInf <- function(x, y, beta, lambda, intercept=TRUE, sigma=NULL, alpha=0.1, + type=c("partial","full"), tol.beta=1e-5, tol.kkt=0.1, + gridrange=c(-100,100), bits=NULL, verbose=FALSE) { + + this.call = match.call() + type = match.arg(type) + checkargs.xy(x,y) + if (missing(beta) || is.null(beta)) stop("Must supply the solution beta") + if (missing(lambda) || is.null(lambda)) stop("Must supply the tuning parameter value lambda") + checkargs.misc(beta=beta,lambda=lambda,sigma=sigma,alpha=alpha, + gridrange=gridrange,tol.beta=tol.beta,tol.kkt=tol.kkt) + if (!is.null(bits) && !requireNamespace("Rmpfr",quietly=TRUE)) { + warning("Package Rmpfr is not installed, reverting to standard precision") + bits = NULL + } + + n = nrow(x) + p = ncol(x) + beta = as.numeric(beta) + if (length(beta) != p) stop("beta must have length equal to ncol(x)") + + # If glmnet was run with an intercept term, center x and y + if (intercept==TRUE) { + obj = standardize(x,y,TRUE,FALSE) + x = obj$x + y = obj$y + } + + # Check the KKT conditions + g = t(x)%*%(y-x%*%beta) / lambda + if (any(abs(g) > 1+tol.kkt * sqrt(sum(y^2)))) + warning(paste("Solution beta does not satisfy the KKT conditions", + "(to within specified tolerances)")) + + vars = which(abs(beta) > tol.beta / sqrt(colSums(x^2))) + if(length(vars)==0){ + cat("Empty model",fill=T) + return() + } + if (any(sign(g[vars]) != sign(beta[vars]))) + warning(paste("Solution beta does not satisfy the KKT conditions", + "(to within specified tolerances). You might try rerunning", + "glmnet with a lower setting of the", + "'thresh' parameter, for a more accurate convergence.")) + + # Get lasso polyhedral region, of form Gy >= u + out = fixedLasso.poly(x,y,beta,lambda,vars) + G = out$G + u = out$u + + # Check polyhedral region + tol.poly = 0.01 + if (min(G %*% y - u) < -tol.poly * sqrt(sum(y^2))) + stop(paste("Polyhedral constraints not satisfied; you must recompute beta", + "more accurately. With glmnet, make sure to use exact=TRUE in coef(),", + "and check whether the specified value of lambda is too small", + "(beyond the grid of values visited by glmnet).", + "You might also try rerunning glmnet with a lower setting of the", + "'thresh' parameter, for a more accurate convergence.")) + + # Estimate sigma + if (is.null(sigma)) { + if (n >= 2*p) { + oo = intercept + sigma = sqrt(sum(lsfit(x,y,intercept=oo)$res^2)/(n-p-oo)) + } + else { + sigma = sd(y) + warning(paste(sprintf("p > n/2, and sd(y) = %0.3f used as an estimate of sigma;",sigma), + "you may want to use the estimateSigma function")) + } + } + + k = length(vars) + pv = vlo = vup = numeric(k) + vmat = matrix(0,k,n) + ci = tailarea = matrix(0,k,2) + sign = numeric(k) + + if (type=="full" & p > n) + warning(paste("type='full' does not make sense when p > n;", + "switching to type='partial'")) + + if (type=="partial" || p > n) { + xa = x[,vars,drop=F] + M = pinv(crossprod(xa)) %*% t(xa) + } + else { + M = pinv(crossprod(x)) %*% t(x) + M = M[vars,,drop=F] + } + + for (j in 1:k) { + if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j])) + + vj = M[j,] + mj = sqrt(sum(vj^2)) + vj = vj / mj # Standardize (divide by norm of vj) + sign[j] = sign(sum(vj*y)) + vj = sign[j] * vj + a = poly.pval(y,G,u,vj,sigma,bits) + pv[j] = a$pv + vlo[j] = a$vlo * mj # Unstandardize (mult by norm of vj) + vup[j] = a$vup * mj # Unstandardize (mult by norm of vj) + vmat[j,] = vj * mj # Unstandardize (mult by norm of vj) + + a = poly.int(y,G,u,vj,sigma,alpha,gridrange=gridrange, + flip=(sign[j]==-1),bits=bits) + ci[j,] = a$int * mj # Unstandardize (mult by norm of vj) + tailarea[j,] = a$tailarea + } + + out = list(type=type,lambda=lambda,pv=pv,ci=ci, + tailarea=tailarea,vlo=vlo,vup=vup,vmat=vmat,y=y, + vars=vars,sign=sign,sigma=sigma,alpha=alpha, + call=this.call) + class(out) = "fixedLassoInf" + return(out) +} + +############################## + +fixedLasso.poly <- function(x, y, beta, lambda, a) { + xa = x[,a,drop=F] + xac = x[,!a,drop=F] + xai = pinv(crossprod(xa)) + xap = xai %*% t(xa) + za = sign(beta[a]) + if (length(za)>1) dz = diag(za) + if (length(za)==1) dz = matrix(za,1,1) + + P = diag(1,nrow(xa)) - xa %*% xap + G = -rbind(1/lambda * t(xac) %*% P, + -1/lambda * t(xac) %*% P, + -dz %*% xap) + u = -c(1 - t(xac) %*% t(xap) %*% za, + 1 + t(xac) %*% t(xap) %*% za, + -lambda * dz %*% xai %*% za) + + return(list(G=G,u=u)) +} + +# Moore-Penrose pseudo inverse for symmetric matrices + +pinv <- function(A, tol=.Machine$double.eps) { + e = eigen(A) + v = Re(e$vec) + d = Re(e$val) + d[d > tol] = 1/d[d > tol] + d[d < tol] = 0 + if (length(d)==1) return(v*d*v) + else return(v %*% diag(d) %*% t(v)) +} + +############################## + +print.fixedLassoInf <- function(x, tailarea=TRUE, ...) { + cat("\nCall:\n") + dput(x$call) + + cat(sprintf("\nStandard deviation of noise (specified or estimated) sigma = %0.3f\n", + x$sigma)) + + cat(sprintf("\nTesting results at lambda = %0.3f, with alpha = %0.3f\n",x$lambda,x$alpha)) + cat("",fill=T) + tab = cbind(x$vars, + round(x$sign*x$vmat%*%x$y,3), + round(x$sign*x$vmat%*%x$y/(x$sigma*sqrt(rowSums(x$vmat^2))),3), + round(x$pv,3),round(x$ci,3)) + colnames(tab) = c("Var", "Coef", "Z-score", "P-value", "LowConfPt", "UpConfPt") + if (tailarea) { + tab = cbind(tab,round(x$tailarea,3)) + colnames(tab)[(ncol(tab)-1):ncol(tab)] = c("LowTailArea","UpTailArea") + } + rownames(tab) = rep("",nrow(tab)) + print(tab) + + cat(sprintf("\nNote: coefficients shown are %s regression coefficients\n", + ifelse(x$type=="partial","partial","full"))) + invisible() +} + +#estimateLambda <- function(x, sigma, nsamp=1000){ +# checkargs.xy(x,rep(0,nrow(x))) +# if(nsamp < 10) stop("More Monte Carlo samples required for estimation") +# if (length(sigma)!=1) stop("sigma should be a number > 0") + # if (sigma<=0) stop("sigma should be a number > 0") + + # n = nrow(x) + # eps = sigma*matrix(rnorm(nsamp*n),n,nsamp) + # lambda = 2*mean(apply(t(x)%*%eps,2,max)) + # return(lambda) +#} + diff --git a/forLater/funs.fs.R b/forLater/funs.fs.R new file mode 100644 index 0000000..b75923d --- /dev/null +++ b/forLater/funs.fs.R @@ -0,0 +1,744 @@ +# We compute the forward stepwise regression (FS) path given +# a response vector y and predictor matrix x. We assume +# that x has columns in general position. + +fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, + verbose=FALSE) { + + this.call = match.call() + checkargs.xy(x=x,y=y) + + # Center and scale, etc. + obj = standardize(x,y,intercept,normalize) + x = obj$x + y = obj$y + bx = obj$bx + by = obj$by + sx = obj$sx + n = nrow(x) + p = ncol(x) + + ##### + # To keep consistent with the lar function, we parametrize + # so that the first step has all zero coefficients, + # Also, an interesting note: the effective "lambda" (maximal + # correlation with the residual) may increase with stepwise! + # So we don't keep track of it + + ##### + # Find the first variable to enter and its sign + working_scale = sqrt(colSums(x^2)) + working_x = scale(x,center=F,scale=working_scale) + working_score = t(working_x)%*%y + i_hit = which.max(abs(working_score)) # Hitting coordinate + sign_hit = Sign(working_score[i_hit]) # Sign + signs = sign_hit # later signs will be appended to `signs` + + if (verbose) { + cat(sprintf("1. Adding variable %i, |A|=%i...",i_hit,1)) + } + + # Now iteratively find the new FS estimates + + # Things to keep track of, and return at the end + # JT: I guess the "buf" just saves us from making huge + # matrices we don't need? + + buf = min(maxsteps,500) + action = numeric(buf) # Actions taken + df = numeric(buf) # Degrees of freedom + beta = matrix(0,p,buf) # FS estimates + + # Buffered objects for selective maxZ test + + offset_pos_maxZ = matrix(Inf, p, buf) # upper bounds for selective maxZ + offset_neg_maxZ = matrix(Inf, p, buf) # lower bounds for selective maxZ + scale_maxZ = matrix(0, p, buf) # lower bounds for selective maxZ + realized_maxZ = numeric(buf) # lower bounds for selective maxZ + + action[1] = i_hit + df[1] = 0 + beta[,1] = 0 + + ##### + # Variables needed to compute truncation limits for + # selective maxZ test + + realized_maxZ[1] = c(sign_hit * working_score[i_hit]) + offset_pos_maxZ[,1] = Inf + offset_neg_maxZ[,1] = Inf + scale_maxZ[,1] = working_scale + working_resid_maxZ = y - x %*% beta[,1] + + # Gamma matrix! + gbuf = max(2*p*3,2000) # Space for 3 steps, at least + gi = 0 # index into rows of Gamma matrix + zi = 0 # index into rows of Gamma_maxZ matrix + + Gamma = matrix(0,gbuf,n) + Gamma[gi+Seq(1,p-1),] = t(sign_hit*working_x[,i_hit]+working_x[,-i_hit]); gi = gi+p-1 + Gamma[gi+Seq(1,p-1),] = t(sign_hit*working_x[,i_hit]-working_x[,-i_hit]); gi = gi+p-1 + Gamma[gi+1,] = t(sign_hit*working_x[,i_hit]); gi = gi+1 + + # Gamma_maxZ is the rbind + # of residualized X_inactive's + + Gamma_maxZ = matrix(0,gbuf,n) + Gamma_maxZ[zi+Seq(1,p),] = t(x); zi = zi+p + + # nconstraint + nconstraint = numeric(buf) + vreg = matrix(0,buf,n) + nconstraint[1] = gi + vreg[1,] = sign_hit*x[,i_hit] / sum(x[,i_hit]^2) + + # Other things to keep track of, but not return + r = 1 # Size of active set + A = i_hit # Active set -- JT: isn't this basically the same as action? + I = Seq(1,p)[-i_hit] # Inactive set + X_active = x[,i_hit,drop=FALSE] # Matrix X[,A] + X_inactive = x[,-i_hit,drop=FALSE] # Matrix X[,I] + k = 2 # What step are we at? + # JT Why keep track of r and k instead of just saying k=r+1? + + # Compute a skinny QR decomposition of X_active + # JT: obs was used as variable name above -- this is something different, no? + # changed it to qr_X + + qr_X = qr(X_active) + Q = qr.Q(qr_X,complete=TRUE) + Q_active = Q[,1,drop=FALSE]; + Q_inactive = Q[,-1,drop=FALSE] + R = qr.R(qr_X) + + # Throughout the algorithm, we will maintain + # the decomposition X_active = Q_active*R. Dimensions: + # X_active: n x r + # Q_active: n x r + # Q_inactive: n x (n-r) + # R: r x r + + while (k<=maxsteps) { + ########## + # Check if we've reached the end of the buffer + if (k > length(action)) { + buf = length(action) + action = c(action,numeric(buf)) + df = c(df,numeric(buf)) + beta = cbind(beta,matrix(0,p,buf)) + nconstraint = c(nconstraint,numeric(buf)) + vreg = rbind(vreg,matrix(0,buf,n)) + + offset_pos_maxZ = cbind(offset_pos_maxZ, matrix(0, p, buf)) + offset_neg_maxZ = cbind(offset_neg_maxZ, matrix(0, p, buf)) + scale_maxZ = cbind(scale_maxZ, matrix(0, p, buf)) + realized_maxZ = c(realized_maxZ, numeric(buf)) + } + + # Key quantities for the next entry + + keepLs=backsolve(R,t(Q_active)%*%X_inactive) + + prev_scale = working_scale[-i_hit] # this variable used later for maxZ + X_inactive_resid = X_inactive - X_active %*% keepLs + working_scale = sqrt(colSums(X_inactive_resid^2)) # this variable used later for maxZ + working_x = scale(X_inactive_resid,center=F,scale=working_scale) + working_score = as.numeric(t(working_x)%*%y) + + beta_cur = backsolve(R,t(Q_active)%*%y) # must be computed before the break + # so we have it if we have + # completed the path + + # If the inactive set is empty, nothing will hit + if (r==min(n-intercept,p)) break + + # Otherwise find the next hitting time + else { + sign_score = Sign(working_score) + abs_score = sign_score * working_score + i_hit = which.max(abs_score) + sign_hit = sign_score[i_hit] + # keep track of necessary quantities for selective maxZ + + offset_shift = t(X_inactive) %*% (y - working_resid_maxZ) + realized_Z_scaled = realized_maxZ[k-1] * prev_scale + offset_pos_maxZ[I,k] = realized_Z_scaled + offset_shift + offset_neg_maxZ[I,k] = realized_Z_scaled - offset_shift + scale_maxZ[I,k] = working_scale + + working_resid_maxZ = y - X_active %*% beta_cur + } + + # Record the solution + # what is the difference between "action" and "A"? + + action[k] = I[i_hit] + df[k] = r + beta[A,k] = beta_cur + + # store the X_inactive_resid in Gamma_maxZ + + if (gi + p-r > nrow(Gamma_maxZ)) Gamma_maxZ = rbind(Gamma_maxZ,matrix(0,p-r,n)) + Gamma_maxZ[zi+Seq(1,p-r),] = t(X_inactive_resid); zi = zi+p-r + + # update maxZ variable + realized_maxZ[k] = sign_hit * working_score[i_hit] + + # Gamma matrix! + + if (gi + 2*p > nrow(Gamma)) Gamma = rbind(Gamma,matrix(0,2*p+gbuf,n)) + working_x = t(sign_score*t(working_x)) + #Gamma[gi+Seq(1,p-r),] = t(working_x); gi = gi+p-r + Gamma[gi+Seq(1,p-r-1),] = t(working_x[,i_hit]+working_x[,-i_hit]); gi = gi+p-r-1 + Gamma[gi+Seq(1,p-r-1),] = t(working_x[,i_hit]-working_x[,-i_hit]); gi = gi+p-r-1 + Gamma[gi+1,] = t(working_x[,i_hit]); gi = gi+1 + + # nconstraint, regression contrast + nconstraint[k] = gi + vreg[k,] = sign_hit*X_inactive_resid[,i_hit] / sum(X_inactive_resid[,i_hit]^2) + + # Update all of the variables + r = r+1 + A = c(A,I[i_hit]) + I = I[-i_hit] + signs = c(signs,sign_hit) + X_active = cbind(X_active,X_inactive[,i_hit]) + X_inactive = X_inactive[,-i_hit,drop=FALSE] + + # Update the QR decomposition + updated_qr = updateQR(Q_active,Q_inactive,R,X_active[,r]) + Q_active = updated_qr$Q1 + + # JT: why do we store Q_inactive? Doesn't seem to be used. + Q_inactive = updated_qr$Q2 + R = updated_qr$R + + if (verbose) { + cat(sprintf("\n%i. Adding variable %i, |A|=%i...",k,A[r],r)) + } + + # Step counter + k = k+1 + } + + # Trim + action = action[Seq(1,k-1)] + df = df[Seq(1,k-1),drop=FALSE] + beta = beta[,Seq(1,k-1),drop=FALSE] + Gamma = Gamma[Seq(1,gi),,drop=FALSE] + nconstraint = nconstraint[Seq(1,k-1)] + vreg = vreg[Seq(1,k-1),,drop=FALSE] + + offset_pos_maxZ = offset_pos_maxZ[,Seq(1,k-1),drop=FALSE] + offset_neg_maxZ = offset_neg_maxZ[,Seq(1,k-1),drop=FALSE] + scale_maxZ = scale_maxZ[,Seq(1,k-1),drop=FALSE] + Gamma_maxZ = Gamma_maxZ[Seq(1,zi),,drop=FALSE] + + # If we reached the maximum number of steps + if (k>maxsteps) { + if (verbose) { + cat(sprintf("\nReached the maximum number of steps (%i),",maxsteps)) + cat(" skipping the rest of the path.") + } + completepath = FALSE + bls = NULL + } + + # Otherwise, note that we completed the path + else { + completepath = TRUE + + # Record the least squares solution. Note that + # we have already computed this + bls = rep(0,p) + if(length(keepLs)>0) bls[A] = keepLs + + } + + if (verbose) cat("\n") + + # Adjust for the effect of centering and scaling + if (intercept) df = df+1 + if (normalize) beta = beta/sx + if (normalize && completepath) bls = bls/sx + + # Assign column names + colnames(beta) = as.character(Seq(1,k-1)) + + out = list(action=action,sign=signs,df=df,beta=beta, + completepath=completepath,bls=bls, + Gamma=Gamma,nconstraint=nconstraint,vreg=vreg,x=x,y=y,bx=bx,by=by,sx=sx, + intercept=intercept,normalize=normalize,call=this.call, + offset_pos_maxZ=offset_pos_maxZ,offset_neg_maxZ=offset_neg_maxZ, + scale_maxZ=scale_maxZ,Gamma_maxZ=Gamma_maxZ,realized_maxZ=realized_maxZ) + class(out) = "fs" + return(out) +} + +############################## + +# Coefficient function for fs + +coef.fs <- function(object, s, ...) { + if (object$completepath) { + k = length(object$action)+1 + beta = cbind(object$beta,object$bls) + } else { + k = length(object$action) + beta = object$beta + } + + if (min(s)<0 || max(s)>k) stop(sprintf("s must be between 0 and %i",k)) + knots = 1:k + dec = FALSE + return(coef.interpolate(beta,s,knots,dec)) +} + +# Prediction function for fs + +predict.fs <- function(object, newx, s, ...) { + beta = coef.fs(object,s) + if (missing(newx)) newx = scale(object$x,FALSE,1/object$sx) + else newx = scale(newx,object$bx,FALSE) + return(newx %*% beta + object$by) +} + +############################## + +# FS inference function + +fsInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","aic"), + gridrange=c(-100,100), bits=NULL, mult=2, ntimes=2, verbose=FALSE) { + + this.call = match.call() + type = match.arg(type) + checkargs.misc(sigma=sigma,alpha=alpha,k=k, + gridrange=gridrange,mult=mult,ntimes=ntimes) + if (class(obj) != "fs") stop("obj must be an object of class fs") + if (is.null(k) && type=="active") k = length(obj$action) + if (is.null(k) && type=="all") stop("k must be specified when type = all") + if (!is.null(bits) && !requireNamespace("Rmpfr",quietly=TRUE)) { + warning("Package Rmpfr is not installed, reverting to standard precision") + bits = NULL + } + + k = min(k,length(obj$action)) # Round to last step + x = obj$x + y = obj$y + p = ncol(x) + n = nrow(x) + G = obj$Gamma + nconstraint = obj$nconstraint + sx = obj$sx + + if (is.null(sigma)) { + if (n >= 2*p) { + oo = obj$intercept + sigma = sqrt(sum(lsfit(x,y,intercept=oo)$res^2)/(n-p-oo)) + } + else { + sigma = sd(y) + warning(paste(sprintf("p > n/2, and sd(y) = %0.3f used as an estimate of sigma;",sigma), + "you may want to use the estimateSigma function")) + } + } + + khat = NULL + + if (type == "active") { + pv = vlo = vup = numeric(k) + vmat = matrix(0,k,n) + ci = tailarea = matrix(0,k,2) + vreg = obj$vreg[1:k,,drop=FALSE] + sign = obj$sign[1:k] + vars = obj$action[1:k] + + for (j in 1:k) { + if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j])) + + Gj = G[1:nconstraint[j],] + uj = rep(0,nconstraint[j]) + vj = vreg[j,] + mj = sqrt(sum(vj^2)) + vj = vj / mj # Standardize (divide by norm of vj) + a = poly.pval(y,Gj,uj,vj,sigma,bits) + pv[j] = a$pv + sxj = sx[vars[j]] + vlo[j] = a$vlo * mj / sxj # Unstandardize (mult by norm of vj / sxj) + vup[j] = a$vup * mj / sxj # Unstandardize (mult by norm of vj / sxj) + vmat[j,] = vj * mj / sxj # Unstandardize (mult by norm of vj / sxj) + + a = poly.int(y,Gj,uj,vj,sigma,alpha,gridrange=gridrange, + flip=(sign[j]==-1),bits=bits) + ci[j,] = a$int * mj / sxj # Unstandardize (mult by norm of vj / sxj) + tailarea[j,] = a$tailarea + } + + khat = forwardStop(pv,alpha) + } + + else { + if (type == "aic") { + out = aicStop(x,y,obj$action[1:k],obj$df[1:k],sigma,mult,ntimes) + khat = out$khat + m = out$stopped * ntimes + G = rbind(out$G,G[1:nconstraint[khat+m],]) # Take ntimes more steps past khat + u = c(out$u,rep(0,nconstraint[khat+m])) # (if we need to) + kk = khat + } + else { + G = G[1:nconstraint[k],] + u = rep(0,nconstraint[k]) + kk = k + } + + pv = vlo = vup = numeric(kk) + vmat = matrix(0,kk,n) + ci = tailarea = matrix(0,kk,2) + sign = numeric(kk) + vars = obj$action[1:kk] + xa = x[,vars] + M = pinv(crossprod(xa)) %*% t(xa) + + for (j in 1:kk) { + if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j])) + + vj = M[j,] + mj = sqrt(sum(vj^2)) + vj = vj / mj # Standardize (divide by norm of vj) + sign[j] = sign(sum(vj*y)) + vj = sign[j] * vj + Gj = rbind(G,vj) + uj = c(u,0) + + a = poly.pval(y,Gj,uj,vj,sigma,bits) + pv[j] = a$pv + sxj = sx[vars[j]] + vlo[j] = a$vlo * mj / sxj # Unstandardize (mult by norm of vj / sxj) + vup[j] = a$vup * mj / sxj # Unstandardize (mult by norm of vj / sxj) + vmat[j,] = vj * mj / sxj # Unstandardize (mult by norm of vj / sxj) + + a = poly.int(y,Gj,uj,vj,sigma,alpha,gridrange=gridrange, + flip=(sign[j]==-1),bits=bits) + ci[j,] = a$int * mj / sxj # Unstandardize (mult by norm of vj / sxj) + tailarea[j,] = a$tailarea + } + } + + # JT: why do we output vup, vlo? Are they used somewhere else? + + out = list(type=type,k=k,khat=khat,pv=pv,ci=ci, + tailarea=tailarea,vlo=vlo,vup=vup,vmat=vmat,y=y, + vars=vars,sign=sign,sigma=sigma,alpha=alpha, + call=this.call) + class(out) = "fsInf" + return(out) +} + +############################## + +############################## + +# selected maxZ tests + +fsInf_maxZ = function(obj, sigma=NULL, alpha=0.1, k=NULL, + ndraw=8000, burnin=2000, verbose=FALSE) { + + this.call = match.call() + + checkargs.misc(sigma=sigma,alpha=alpha,k=k) + + if (class(obj) != "fs") stop("obj must be an object of class fs") + + k = min(k,length(obj$action)) # Round to last step + x = obj$x + y = obj$y + p = ncol(x) + n = nrow(x) + pv = c() + + if (is.null(sigma)) { + # TODO we need a sampler on a unit sphere + if (n >= 2*p) { + oo = obj$intercept + sigma = sqrt(sum(lsfit(x,y,intercept=oo)$res^2)/(n-p-oo)) + } + else { + sigma = sd(y) + warning(paste(sprintf("p > n/2, and sd(y) = %0.3f used as an estimate of sigma;",sigma), + "you may want to use the estimateSigma function")) + } + } + + khat = NULL + + vars = obj$action[1:k] + zi = 0 + for (j in 1:k) { + if(verbose) cat(c("Step=",j),fill=T) + # the inactive set here does not + # include the variable at the j-th step + # so, at j==1, the inactive set is every variable + # at j==2, the inactive set is everything but the first one + + if (j > 1) { + active = vars[1:(j-1)] + inactive = (1:p)[-active] + } else { + inactive = 1:p + } + + collapsed_pos = apply(obj$offset_pos_maxZ[inactive,1:j,drop=FALSE], 1, min) + collapsed_neg = apply(obj$offset_neg_maxZ[inactive,1:j,drop=FALSE], 1, min) + cur_scale = obj$scale_maxZ[,j][inactive] + + # the matrix cur_adjusted_Xt is used to compute (always as length(y) columns) + # the maxZ or maxT for the sampled variables + # + cur_adjusted_Xt = obj$Gamma_maxZ[zi + Seq(1,p-j+1),,drop=FALSE]; zi = zi+p-j+1 # Xt for transpose + + # cur_X is used to enforce conditioning on + # the ever_active sufficient_statistics + + cur_X = obj$x[,inactive,drop=FALSE] + + # now we condition on solution up to now + # this is equivalent to finding vector of + # fitted values up to now and appropriately + # adjusting the box limits + + if (j > 1) { + cur_fitted = predict(obj, s=j) + cur_fitted = cur_fitted - mean(cur_fitted) + cur_offset = as.numeric(t(cur_X) %*% cur_fitted) + } + else { + cur_fitted = rep(0, length(y)) + cur_offset = rep(0, length(inactive)) + } + + final_upper = collapsed_pos - cur_offset + final_lower = -(collapsed_neg + cur_offset) + + # now, we sample from Y_star, a centered Gaussian with covariance sigma^2 I + # subject to the constraint + # t(cur_adjusted_Xt) %*% Y_star < final_upper + # -t(cur_adjusted_Xt) %*% Y_star < -final_lower + + # really, we want the covariance of Y_star to be \sigma^2 (I - cur_P) + # where P is projection on the j-1 previous variables + # but this doesn't matter as everything we do with the samples + # will be a function of (I - cur_P) Y_star and the constraints are + # expressible in terms of (I - cur_P) Y_star because + # we have adjusted X + + # IMPORTANT: after sampling Y_star, we have to add back cur_fitted + + # if n >= p, we could actually just draw cur_adjusted_Xt %*% Y_star + # because this has a simple box constraint + # with a generically non-degenerate covariance + + if (nrow(cur_adjusted_Xt) > length(y)) { + linear_part = rbind(cur_adjusted_Xt, -cur_adjusted_Xt) + offset = c(final_upper, -final_lower) + covariance = diag(rep(sigma^2, length(y))) + mean_param = cur_fitted # rep(0, length(y)) + initial_point = y + + truncated_y = sample_from_constraints(linear_part, + offset, + mean_param, + covariance, + initial_point, + burnin=burnin, + ndraw=ndraw) + + truncated_noise = truncated_y %*% t(cur_adjusted_Xt) + sample_maxZ = apply(abs(t(truncated_noise) / cur_scale), 2, max) + } else { # sample from a smaller dimensional gaussian + if (nrow(cur_adjusted_Xt) > 1) { + linear_part = rbind(diag(rep(1, nrow(cur_adjusted_Xt))), + diag(rep(-1, nrow(cur_adjusted_Xt)))) + covariance = sigma^2 * (cur_adjusted_Xt %*% t(cur_adjusted_Xt)) + offset = c(final_upper, -final_lower) + mean_param = cur_adjusted_Xt %*% cur_fitted # rep(0, nrow(cur_adjusted_Xt)) + initial_point = cur_adjusted_Xt %*% y + } else { + mean_param = as.numeric(sum(as.numeric(cur_adjusted_Xt) * as.numeric(cur_fitted))) + covariance = matrix(sigma^2 * sum(cur_adjusted_Xt^2)) + linear_part = matrix(c(1,-1), 2, 1) + offset = c(final_upper, -final_lower) + initial_point = as.numeric(sum(as.numeric(cur_adjusted_Xt) * as.numeric(y))) + } + truncated_noise = sample_from_constraints(linear_part, + offset, + mean_param, + covariance, + initial_point, + burnin=burnin, + ndraw=ndraw) + sample_maxZ = apply(abs(t(truncated_noise) / cur_scale), 2, max) + + } + + observed_maxZ = obj$realized_maxZ[j] + pval = sum(sample_maxZ > observed_maxZ) / ndraw + pval = 2 * min(pval, 1 - pval) + pv = c(pv, pval) + } + + khat = forwardStop(pv,alpha) + + out = list(pv=pv, + k=k, + khat=khat, + sigma=sigma, + vars=vars, + sign=obj$sign, + alpha=alpha, + realized_maxZ=obj$realized_maxZ, + call=this.call) + class(out) = "fsInf_maxZ" + return(out) +} + +############################## +# +# Print methods +# +############################## + +print.fs <- function(x, ...) { + cat("\nCall:\n") + dput(x$call) + + cat("\nSequence of FS moves:\n") + nsteps = length(x$action) + tab = cbind(1:nsteps,x$action,x$sign) + colnames(tab) = c("Step","Var","Sign") + rownames(tab) = rep("",nrow(tab)) + print(tab) + invisible() +} + +print.fsInf <- function(x, tailarea=TRUE, ...) { + cat("\nCall:\n") + dput(x$call) + + cat(sprintf("\nStandard deviation of noise (specified or estimated) sigma = %0.3f\n", + x$sigma)) + + if (x$type == "active") { + cat(sprintf("\nSequential testing results with alpha = %0.3f\n",x$alpha)) + tab = cbind(1:length(x$pv),x$vars, + round(x$sign*x$vmat%*%x$y,3), + round(x$sign*x$vmat%*%x$y/(x$sigma*sqrt(rowSums(x$vmat^2))),3), + round(x$pv,3),round(x$ci,3)) + colnames(tab) = c("Step", "Var", "Coef", "Z-score", "P-value", + "LowConfPt", "UpConfPt") + if (tailarea) { + tab = cbind(tab,round(x$tailarea,3)) + colnames(tab)[(ncol(tab)-1):ncol(tab)] = c("LowTailArea","UpTailArea") + } + rownames(tab) = rep("",nrow(tab)) + print(tab) + + cat(sprintf("\nEstimated stopping point from ForwardStop rule = %i\n",x$khat)) + } + + else if (x$type == "all") { + cat(sprintf("\nTesting results at step = %i, with alpha = %0.3f\n",x$k,x$alpha)) + tab = cbind(x$vars, + round(x$sign*x$vmat%*%x$y,3), + round(x$sign*x$vmat%*%x$y/(x$sigma*sqrt(rowSums(x$vmat^2))),3), + round(x$pv,3),round(x$ci,3)) + colnames(tab) = c("Var", "Coef", "Z-score", "P-value", "LowConfPt", "UpConfPt") + if (tailarea) { + tab = cbind(tab,round(x$tailarea,3)) + colnames(tab)[(ncol(tab)-1):ncol(tab)] = c("LowTailArea","UpTailArea") + } + rownames(tab) = rep("",nrow(tab)) + print(tab) + } + + else if (x$type == "aic") { + cat(sprintf("\nTesting results at step = %i, with alpha = %0.3f\n",x$khat,x$alpha)) + tab = cbind(x$vars, + round(x$sign*x$vmat%*%x$y,3), + round(x$sign*x$vmat%*%x$y/(x$sigma*sqrt(rowSums(x$vmat^2))),3), + round(x$pv,3),round(x$ci,3)) + colnames(tab) = c("Var", "Coef", "Z-score", "P-value", "LowConfPt", "UpConfPt") + if (tailarea) { + tab = cbind(tab,round(x$tailarea,3)) + colnames(tab)[(ncol(tab)-1):ncol(tab)] = c("LowTailArea","UpTailArea") + } + rownames(tab) = rep("",nrow(tab)) + print(tab) + + cat(sprintf("\nEstimated stopping point from AIC rule = %i\n",x$khat)) + } + + invisible() +} + + +print.fsInf_maxZ <- function(obj) { + + cat("\nCall:\n") + dput(obj$call) + + cat(sprintf("\nStandard deviation of noise (specified or estimated) sigma = %0.3f\n", + obj$sigma)) + + cat(sprintf("\nSequential testing results with alpha = %0.3f\n",obj$alpha)) + + tab = cbind(1:length(obj$pv), + obj$vars, + round(obj$sign*obj$realized_maxZ, 3), + round(obj$pv,3)) + colnames(tab) = c("Step", "Var", "Z-score", "P-value") + rownames(tab) = rep("",nrow(tab)) + print(tab) + + cat(sprintf("\nEstimated stopping point from ForwardStop rule = %i\n",obj$khat)) + + invisible() +} + +############################## +# +# Plot methods +# +############################## + +plot.fs <- function(x, breaks=TRUE, omit.zeros=TRUE, var.labels=TRUE, ...) { + if (x$completepath) { + k = length(x$action)+1 + beta = cbind(x$beta,x$bls) + } else { + k = length(x$action) + beta = x$beta + } + p = nrow(beta) + + xx = 1:k + xlab = "Step" + + if (omit.zeros) { + good.inds = matrix(FALSE,p,k) + good.inds[beta!=0] = TRUE + changes = t(apply(beta,1,diff))!=0 + good.inds[cbind(changes,rep(F,p))] = TRUE + good.inds[cbind(rep(F,p),changes)] = TRUE + beta[!good.inds] = NA + } + + plot(c(),c(),xlim=range(xx,na.rm=T),ylim=range(beta,na.rm=T), + xlab=xlab,ylab="Coefficients",main="Forward stepwise path",...) + abline(h=0,lwd=2) + matplot(xx,t(beta),type="l",lty=1,add=TRUE) + if (breaks) abline(v=xx,lty=2) + if (var.labels) axis(4,at=beta[,k],labels=1:p,cex=0.8,adj=0) + invisible() +} + diff --git a/forLater/josh/cv.R b/forLater/josh/cv.R deleted file mode 100644 index 2787990..0000000 --- a/forLater/josh/cv.R +++ /dev/null @@ -1,103 +0,0 @@ -# ------------------------------------------------ -# Cross-validation, preliminary - - -cv_make_folds <- function(x, nfolds = 10) { - #inds <- sample(1:nrow(x), replace=FALSE) - inds <- 1:nrow(x) - foldsize <- floor(nrow(x)/nfolds) - lapply(1:nfolds, function(f) return(inds[1:foldsize+(f-1)*foldsize])) -} - -cv_hat_matrix <- function(x, folds, active.sets) { - nfolds <- length(folds) - lapply(1:nfolds, function(f) { - fold <- folds[[f]] - active <- active.sets[[f]] - x_tr <- x[ -fold, active] - x_te <- x[fold, active] - hatm <- matrix(0, nrow=length(fold), ncol=nrow(x)) - hatm[, -fold] <- x_te %*% ginv(x_tr) - return(hatm) - }) -} - -product_cv_hat <- function(folds, inds, finds, ginds, hat_matrices) { - nfolds <- length(folds) - terms <- lapply(inds, function(h) { - t(hat_matrices[[h]][, finds]) %*% hat_matrices[[h]][, ginds] - }) - return(Reduce('+', terms)) -} - -cv_RSSquad <- function(x, folds, active.sets) { - hat_matrices <- cv_hat_matrix(x, folds, active.sets) - nfolds <- length(folds) - rows <- lapply(1:nfolds, function(f) { - do.call(cbind, lapply(1:nfolds, function(g) { - ginds <- folds[[g]] - finds <- folds[[f]] - if (f == g) { - return(product_cv_hat(folds, setdiff(1:nfolds, f), finds, ginds, hat_matrices)) - } else { - return( - product_cv_hat(folds, setdiff(1:nfolds, c(f,g)), finds, ginds, hat_matrices) - hat_matrices[[f]][, ginds] - t(hat_matrices[[g]][, finds])) - } - })) - }) - Q <- do.call(rbind, rows) - return(Q) -} - -cv_fs <- function(x, y, steps, nfolds = 10) { - - n <- nrow(x) - if (steps >= n*(1-1/nfolds)) stop("Too many steps") - - folds <- cv_make_folds(x, nfolds) - nfolds <- length(folds) - projections <- list() - active.sets <- list() - cv_perm <- sample(1:n) - Y <- y[cv_perm] - mean(y) - X <- x[cv_perm, ] - - for (f in 1:nfolds) { - fold <- folds[[f]] - fit <- groupfs(X[-fold,], Y[-fold], steps=steps) - path.projs <- fit$projections - path.projs <- lapply(path.projs, function(step.projs) { - lapply(step.projs, function(proj) { - expanded.proj <- matrix(0, n, n) - expanded.proj[-fold, -fold] <- proj - return(expanded.proj) - }) - }) - projections[[f]] <- path.projs - active.sets[[f]] <- fit$variable - } - projections <- do.call(c, projections) - - RSSquads <- list() - for (s in 1:steps) { - initial.active <- lapply(active.sets, function(a) a[1:s]) - RSSquads[[s]] <- cv_RSSquad(X, folds, initial.active) - } - - RSSs <- lapply(RSSquads, function(Q) t(Y) %*% Q %*% Y) - sstar <- which.min(RSSs) - quadstar <- RSSquads[sstar][[1]] - - RSSquads <- lapply(RSSquads, function(quad) quad - quadstar) - RSSquads[[sstar]] <- NULL # remove the all zeroes case - - fit <- groupfs(X, Y, steps=sstar) - fit$projections <- flatten(fit$projections) - fit$foldprojections <- flatten(projections) - fit$rssprojections <- flatten(RSSquads) - - fit$cvperm <- cv_perm - - invisible(fit) -} - diff --git a/forLater/josh/funs.sims.R b/forLater/josh/funs.sims.R new file mode 100644 index 0000000..53af73f --- /dev/null +++ b/forLater/josh/funs.sims.R @@ -0,0 +1,46 @@ +# Functions for simulation/testing + +randomGroupSizes <- function(G, lambda = 2) return(2 + rpois(G, lambda)) + +randomGroups <- function(G, lambda = 2) { + rles <- randomGroupSizes(G, lambda) + return(rep(1:G, rles)) +} + +randomIndexFixedP <- function(p, G) sort(c(sample(1:G), sample(1:G, size = p-G, replace=T))) + +randomFactorDesign <- function(n, G, lambda = 2) { + if (n < (1+lambda)*G) stop("Larger n required to avoid duplicate columns") + rles <- randomGroupSizes(G, lambda) + print(rles) + df <- data.frame(do.call(cbind, lapply(rles, function(g) { + sample(LETTERS[1:g], n, replace = TRUE, prob = runif(g)) + })), stringsAsFactors = TRUE) + if (any(apply(df, 2, function(col) length(unique(col))) == 1)) return(randomFactorDesign(n, G, lambda)) + fd <- factorDesign(df) + if (any(duplicated(fd$x, MARGIN = 2))) return(randomFactorDesign(n, G, lambda)) + return(list(df=df, fd=fd)) +} + +randomFactorsFixedP <- function(p, G) { +# index <- +} + +randomGaussianFixedP <- function(n, p, G = p, sparsity = 0, snr = 0, sigma = 1, rho = 0) { + index <- 1:p + if (G < p) index <- randomIndexFixedP(p, G) + x <- matrix(rnorm(n*p), nrow=n) + if (rho != 0) { + z <- matrix(rep(t(rnorm(n)), p), nrow = n) + x <- sqrt(1-rho)*x + sqrt(rho)*z + } + beta <- rep(0, p) + if (sparsity > 0 && snr > 0) { + for (j in 1:sparsity) { + inds <- which(index == j) + beta[inds] <- snr * sqrt(2*log(G)/(n*length(inds))) * sample(c(-1,1), length(inds), replace=T) + } + } + y <- x %*% beta + sigma * rnorm(n) + return(list(x=x, y=y, beta = beta, index=index, sigma = sigma)) +} diff --git a/forLater/josh/selectiveInference/R/cv.R b/forLater/josh/selectiveInference/R/cv.R new file mode 100644 index 0000000..046a462 --- /dev/null +++ b/forLater/josh/selectiveInference/R/cv.R @@ -0,0 +1,177 @@ +# ------------------------------------------------ +# Cross-validation, preliminary + +cvMakeFolds <- function(x, nfolds = 5) { + inds <- sample(1:nrow(x), replace=FALSE) + #inds <- 1:nrow(x) + foldsize <- floor(nrow(x)/nfolds) + folds <- lapply(1:nfolds, function(f) return(inds[1:foldsize+(f-1)*foldsize])) + if (nfolds*foldsize < nrow(x)) { + # remainder observations added to first several folds + for (i in 1:(nrow(x) - nfolds*foldsize)) { + folds[[i]] <- c(folds[[i]], inds[nfolds*foldsize + i]) + } + } + return(folds) +} + +############################################ +# Can this be optimized using svdu_thresh? # +############################################ +cvHatMatrix <- function(x, folds, active.sets) { + nfolds <- length(folds) + lapply(1:nfolds, function(f) { + fold <- folds[[f]] + active <- active.sets[[f]] + x_tr <- x[ -fold, active] + x_te <- x[fold, active] + hatm <- matrix(0, nrow=length(fold), ncol=nrow(x)) + svdtr <- svd(x_tr) + inds <- svdtr$d > .Machine$double.eps * svdtr$d[1] + xtrinv <- svdtr$v[, inds, drop = FALSE] %*% ((1/svdtr$d[inds]) * t(svdtr$u[, inds, drop = FALSE])) + hatm[, -fold] <- x_te %*% xtrinv + return(hatm) + }) +} + +cvProductHat <- function(folds, inds, finds, ginds, hat_matrices) { + nfolds <- length(folds) + terms <- lapply(inds, function(h) { + t(hat_matrices[[h]][, finds]) %*% hat_matrices[[h]][, ginds] + }) + return(Reduce('+', terms)) +} + +cvRSSquad <- function(x, folds, active.sets) { + hat_matrices <- cvHatMatrix(x, folds, active.sets) + nfolds <- length(folds) + rows <- lapply(1:nfolds, function(f) { + do.call(cbind, lapply(1:nfolds, function(g) { + ginds <- folds[[g]] + finds <- folds[[f]] + if (f == g) { + return(cvProductHat(folds, setdiff(1:nfolds, f), finds, ginds, hat_matrices)) + } else { + return( + cvProductHat(folds, setdiff(1:nfolds, c(f,g)), finds, ginds, hat_matrices) - hat_matrices[[f]][, ginds] - t(hat_matrices[[g]][, finds])) + } + })) + }) + Q <- do.call(rbind, rows) + return(Q) +} + +cvfs <- function(x, y, index = 1:ncol(x), maxsteps, sigma = NULL, intercept = TRUE, center = TRUE, normalize = TRUE, nfolds = 5) { + + n <- nrow(x) + if (maxsteps >= n*(1-1/nfolds)) { + maxsteps <- floor(n*(1-1/nfolds)) + warning(paste("maxsteps too large for training fold size, set to", maxsteps)) + } + + folds <- cvMakeFolds(x, nfolds) + nfolds <- length(folds) + projections <- list(1:nfolds) + maxprojs <- list(1:nfolds) + active.sets <- list(1:nfolds) + cvobj <- list(1:nfolds) + cv_perm <- sample(1:n) + Y <- y[cv_perm] + X <- x[cv_perm, ] + + # Initialize copies of data for loop + by <- mean(Y) + if (intercept) Y <- Y - by + + # Center and scale design matrix + xscaled <- scaleGroups(X, index, center, normalize) + xm <- xscaled$xm + xs <- xscaled$xs + X <- xscaled$x + + # Flatten list or something? + for (f in 1:nfolds) { + fold <- folds[[f]] + fit <- groupfs(X[-fold,], Y[-fold], index=index, maxsteps=maxsteps, sigma=sigma, intercept=FALSE, center=FALSE, normalize=FALSE) + fit$fold <- fold + # Why is this commented out? + ## projections[[f]] <- lapply(fit$projections, function(step.projs) { + ## lapply(step.projs, function(proj) { + ## # Reduce from n by n matrix to svdu_thresh + ## expanded.proj <- matrix(0, n, ncol(proj)) + ## expanded.proj[-fold, ] <- proj + ## return(expanded.proj) + ## }) + ## }) + active.sets[[f]] <- fit$action + cvobj[[f]] <- fit + } + #projections <- do.call(c, projections) + + RSSquads <- list() + for (s in 1:maxsteps) { + initial.active <- lapply(active.sets, function(a) a[1:s]) + RSSquads[[s]] <- cvRSSquad(X, folds, initial.active) + } + + RSSs <- lapply(RSSquads, function(Q) t(Y) %*% Q %*% Y) + sstar <- which.min(RSSs) + quadstar <- RSSquads[sstar][[1]] + + RSSquads <- lapply(RSSquads, function(quad) quad - quadstar) + RSSquads[[sstar]] <- NULL # remove the all zeroes case + + fit <- groupfs(X, Y, index=index, maxsteps=sstar, sigma=sigma, intercept=intercept, center=center, normalize=normalize) + fit$cvobj <- cvobj + fit$cvquad <- RSSquads + + fit$cvperm <- cv_perm + + invisible(fit) +} + + +cvlar <- function(x, y) { # other args + folds <- cvMakeFolds(x) + models <- lapply(folds, function(fold) { + x.train <- X + y.train <- Y + x.train[fold,] <- 0 + y.train[fold] <- 0 + x.test <- X[fold,] + y.test <- Y[fold] + larpath.train <- lar(x.train, y.train, maxsteps = maxsteps, intercept = F, normalize = F) + return(lff) + }) + + active.sets <- lapply(models, function(model) model$action) + lambdas <- lapply(models, function(model) model$lambda) + lmin <- min(unlist(lambdas)) + +# Interpolate lambda grid or parametrize by steps? +# interpolation probably requires re-writing cvRSSquads for +# penalized fits in order to make sense + +# do steps for now just to have something that works? + + RSSquads <- list() + for (s in 1:maxsteps) { + initial.active <- lapply(active.sets, function(a) a[1:s]) + RSSquads[[s]] <- cvRSSquad(X, folds, initial.active) + } + + RSSs <- lapply(RSSquads, function(Q) t(Y) %*% Q %*% Y) + sstar <- which.min(RSSs) + quadstar <- RSSquads[sstar][[1]] + + RSSquads <- lapply(RSSquads, function(quad) quad - quadstar) + RSSquads[[sstar]] <- NULL # remove the all zeroes case + + fit <- lar(X, Y, maxsteps=sstar, intercept = F, normalize = F) + +# Very tall Gamma encoding all cv-model paths + Gamma <- do.call(rbind, lapply(models, function(model) return(model$Gamma))) + +# more to do here +} + diff --git a/forLater/josh/sim.aicstop.R b/forLater/josh/sim.aicstop.R new file mode 100644 index 0000000..af150ec --- /dev/null +++ b/forLater/josh/sim.aicstop.R @@ -0,0 +1,54 @@ +library(intervals) +source("funs.sims.R") +source("../../selectiveInference/R/funs.groupfs.R") +source("../../selectiveInference/R/funs.quadratic.R") +source("../../selectiveInference/R/funs.common.R") + +set.seed(1) +known <- FALSE +niters <- 500 +n <- 50 +p <- 150 +G <- 75 +maxsteps <- 10 +sparsity <- 4 +snr <- 3 +rho <- 0 +aicstop <- 1 + +instance <- function(n, p, G, sparsity, snr, rho, maxsteps, aicstop) { + simd <- randomGaussianFixedP(n, p, G, sparsity, snr, sigma = 1, rho) + x <- simd$x + y <- simd$y + index <- simd$index + if (known) { + fit <- groupfs(x, y, index, maxsteps, sigma = 1, k = 2*log(G), aicstop = aicstop, verbose = T) + } else { + fit <- groupfs(x, y, index, maxsteps, k = 2*log(G), aicstop = aicstop, verbose = T) + } + pvals <- groupfsInf(fit, verbose=T) + return(list(variable = fit$action, pvals = pvals$pv, stopped = attr(fit, "stopped"))) +} + +time <- system.time({ + output <- replicate(niters, instance(n, p, G, sparsity, snr, rho, maxsteps, aicstop)) +}) + +stopped <- do.call(c, list(output[3,])) +pvals <- do.call(c, list(output[2,])) +vars <- do.call(c, list(output[1,])) + +save(pvals, vars, stopped, + file = paste0( + "results/aic", + "_", ifelse(known, "TC", "TF"), + "_n", n, + "_p", p, + "_g", G, + "_rho", gsub(".", "pt", rho, fixed=T), + "_maxsteps", maxsteps, + "_sparsity", sparsity, + "_snr", round(snr), + ".RData")) + +print(time) diff --git a/forLater/josh/sim.carve.R b/forLater/josh/sim.carve.R new file mode 100644 index 0000000..7d1ae59 --- /dev/null +++ b/forLater/josh/sim.carve.R @@ -0,0 +1,82 @@ +library(intervals) +source("selectiveInference/R/cv.R") +source("../../selectiveInference/R/funs.groupfs.R") +source("../../selectiveInference/R/funs.quadratic.R") +source("../../selectiveInference/R/funs.common.R") + +set.seed(1) +niters <- 400 +known <- FALSE +n <- 100 +p <- 50 +maxsteps <- 20 +sparsity <- 10 +snr <- 1 +rho <- 0.1 +ratio <- 0.75 +train <- 1:(ratio*n) +test <- setdiff(1:n, train) +index <- 1:p +nfolds <- 5 + +instance <- function(n, p, sparsity, snr, maxsteps, rho) { + + x <- matrix(rnorm(n*p), nrow=n) + if (rho != 0) { + z <- matrix(rep(t(rnorm(n)), p), nrow = n) + x <- sqrt(1-rho)*x + sqrt(rho)*z + } + y <- rnorm(n) + + if (sparsity > 0) { + beta <- rep(0, p) + beta[1:sparsity] <- snr * sample(c(-1,1), sparsity, replace=T) + y <- y + x %*% beta + } + + ytr <- y[train] + xtr <- x[train, ] + yte <- y[test] + xte <- x[test, ] + + if (known) { + trfit <- cvfs(xtr, ytr, maxsteps=maxsteps, sigma = 1, nfolds=nfolds) + fit <- cvfs(x, y, maxsteps=maxsteps, sigma = 1, nfolds=nfolds) + } else { + trfit <- cvfs(xtr, ytr, maxsteps=maxsteps, nfolds=nfolds) + fit <- cvfs(x, y, maxsteps=maxsteps, nfolds=nfolds) + } + + trcols <- which(1:p %in% trfit$action) + tepv <- summary(lm(yte~xte[, trcols]-1))$coefficients[,4] + names(tepv) <- as.character(sort(trfit$action)) + pv <- groupfsInf(fit) + trpv <- groupfsInf(trfit) + return(list(vars = fit$action, pvals = pv$pv, + splitvars = sort(trfit$action), splitpvals = tepv, + trpvals = trpv$pv)) +} + +time <- system.time({ + output <- replicate(niters, instance(n, p, sparsity, snr, maxsteps, rho)) +}) + +vars <- do.call(c, list(output[1,])) +pvals <- do.call(c, list(output[2,])) +splitvars <- do.call(c, list(output[3,])) +splitpvals <- do.call(c, list(output[4,])) +trpvals <- do.call(c, list(output[5,])) + +save(vars, pvals, splitvars, splitpvals, trpvals, + file = paste0("results/carvecv", + "_", ifelse(known, "TC", "TF"), + "_n", n, + "_p", p, + "_rho", gsub(".", "pt", rho, fixed=T), + "_sparsity", sparsity, + "_ratio", gsub(".", "pt", round(ratio, 2), fixed=T), + "_snr", as.character(snr), + ".RData")) + +print(time) + diff --git a/forLater/josh/sim.cv.R b/forLater/josh/sim.cv.R new file mode 100644 index 0000000..7d20e8c --- /dev/null +++ b/forLater/josh/sim.cv.R @@ -0,0 +1,73 @@ +library(intervals) +source("selectiveInference/R/cv.R") +source("../../selectiveInference/R/funs.groupfs.R") +source("../../selectiveInference/R/funs.quadratic.R") +source("../../selectiveInference/R/funs.common.R") + +set.seed(1) +niters <- 50 +known <- FALSE +n <- 100 +p <- 50 +maxsteps <- 8 +sparsity <- 5 +snr <- 2 +rho <- 0.1 +nfolds <- 5 + +instance <- function(n, p, sparsity, snr, maxsteps, nfolds, rho) { + + x <- matrix(rnorm(n*p), nrow=n) + if (rho != 0) { + z <- matrix(rep(t(rnorm(n)), p), nrow = n) + x <- sqrt(1-rho)*x + sqrt(rho)*z + } + y <- rnorm(n) + + if (sparsity > 0) { + beta <- rep(0, p) + beta[1:sparsity] <- snr * sqrt(2*log(p)/n) * sample(c(-1,1), sparsity, replace=T) + y <- y + x %*% beta + } + + if (known) { + fit <- cvfs(x, y, maxsteps=maxsteps, sigma = 1, nfolds=nfolds) + } else { + fit <- cvfs(x, y, maxsteps=maxsteps, nfolds=nfolds) + } + vars <- fit$action + pvals <- groupfsInf(fit, verbose=T) + fit$cvobj <- NULL + nocvpv <- groupfsInf(fit, verbose=T) + Y <- y - mean(y) + cols <- which(1:p %in% vars) + noselpv <- summary(lm(Y~x[, cols]-1))$coefficients[,4] + names(noselpv) <- as.character(sort(vars)) + return(list(vars = vars, pvals = pvals$pv, + nocvvars = vars, nocvpvals = nocvpv$pv, + noselvars = sort(vars), noselpvals = noselpv)) +} + +time <- system.time({ + output <- replicate(niters, instance(n, p, sparsity, snr, maxsteps, nfolds, rho)) +}) + +vars <- do.call(c, list(output[1,])) +pvals <- do.call(c, list(output[2,])) +nocvvars <- do.call(c, list(output[3,])) +nocvpvals <- do.call(c, list(output[4,])) +noselvars <- do.call(c, list(output[5,])) +noselpvals <- do.call(c, list(output[6,])) + +save(vars, pvals, nocvvars, nocvpvals, noselvars, noselpvals, + file = paste0("results/cv", + "_", ifelse(known, "TC", "TF"), + "_n", n, + "_p", p, + "_rho", gsub(".", "pt", rho, fixed=T), + "_sparsity", sparsity, + "_maxsteps", maxsteps, + "_snr", snr, + ".RData")) + +print(time) diff --git a/forLater/josh/sim.cvlar.R b/forLater/josh/sim.cvlar.R new file mode 100644 index 0000000..9169cc3 --- /dev/null +++ b/forLater/josh/sim.cvlar.R @@ -0,0 +1,58 @@ +# Choices + +# RSS: least-squares or penalized beta? +# depends on final model. Go with least-squares for now + +# fixed vs lar? (lar, apparently) +# fixed probably slower, but advantage of same lambda grid? +# is same lambda grid necessary? -- doesn't lar algorithm give all possible models anyway? +# i.e. for non-knot lambda just find where it is in lar path, take corresponding model + +# groups? later + +# TODO + +# copy larInf or groupfsInf? +# larInf: add CV quadratic constraints* & break/fix p-value computation +# -------- *but can we even use the ydecomp we use for quadratic? +# groupfsInf: some ugly rewriting, no cumprojs etc, but straightforward +# -------- downside: need to implement larInf basically + +# larInf +# [ ] is.null(sigma) don't estimate it + +# plan: +# expand Gamma for [-fold] indices? +# stack all the Gammas? or iterate through them? +# work backward from poly.pval <- larInf + + +# big picture / long term +# what OOP kind of design would lend itself to easily implementing more cv things? + +# Gamma: something x n +# Gamma %*% y >= 0 + +# pass 0-padded x[-fold] and y[-fold] to lar? + +library(selectiveInference) +setwd("/Users/joftius/Dropbox/work/R-software/forLater/josh") +source("selectiveInference/R/cv.R") + +set.seed(1) +n <- 100 +p <- 50 +maxsteps <- 10 +sparsity <- 3 +snr <- 2 +rho <- 0.1 +nfolds <- 5 + +x <- matrix(rnorm(n*p), nrow=n) +y <- rnorm(n) +beta <- rep(0, p) +beta[1:sparsity] <- 2* sqrt(2*log(p)/n) * sample(c(-1,1), sparsity, replace=T) +y <- y + x %*% beta +my <- mean(y) +y <- y - my + diff --git a/forLater/josh/sim.datasplit.R b/forLater/josh/sim.datasplit.R new file mode 100644 index 0000000..0e19335 --- /dev/null +++ b/forLater/josh/sim.datasplit.R @@ -0,0 +1,97 @@ +library(intervals) +source("selectiveInference/R/cv.R") +source("../../selectiveInference/R/funs.groupfs.R") +source("../../selectiveInference/R/funs.quadratic.R") +source("../../selectiveInference/R/funs.common.R") + +set.seed(19) +niters <- 500 +known <- FALSE +n <- 50 +p <- 100 +maxsteps <- 8 +sparsity <- 5 +snr <- 2 +rho <- 0.1 +ratio <- 0.6 +ratio2 <- 0.8 +train <- 1:(ratio*n) +test <- setdiff(1:n, train) +train2 <- 1:(ratio2*n) +test2 <- setdiff(1:n, train2) +index <- 1:p + +x <- matrix(rnorm(n*p), nrow=n) + if (rho != 0) { + z <- matrix(rep(t(rnorm(n)), p), nrow = n) + x <- sqrt(1-rho)*x + sqrt(rho)*z + } + +instance <- function(n, p, sparsity, snr, maxsteps, rho) { + + y <- rnorm(n) + + if (sparsity > 0) { + beta <- rep(0, p) + beta[1:sparsity] <- snr * sample(c(-1,1), sparsity, replace=T) + y <- y + x %*% beta + } + + ytr <- y[train] + xtr <- x[train, ] + yte <- y[test] + xte <- x[test, ] + + ytr2 <- y[train2] + xtr2 <- x[train2, ] + yte2 <- y[test2] + xte2 <- x[test2, ] + + if (known) { + trfit <- groupfs(xtr, ytr, index, maxsteps=maxsteps, sigma=1, aicstop=1, k = log(length(train))) + fit <- groupfs(xtr2, ytr2, index, maxsteps=maxsteps, sigma=1, aicstop=1, k = log(length(train2))) + } else { + trfit <- groupfs(xtr, ytr, index, maxsteps=maxsteps, aicstop=1, k = 2*log(p)) + fit <- groupfs(xtr2, ytr2, index, maxsteps=maxsteps, aicstop=1, k = 2*log(p)) + } + + trcols <- which(1:p %in% trfit$action) + tr2cols <- which(1:p %in% fit$action) + tepv <- summary(lm(yte~xte[, trcols]-1))$coefficients[,4] + tepv2 <- summary(lm(yte2~xte2[, tr2cols]-1))$coefficients[,4] + names(tepv) <- as.character(sort(trfit$action)) + names(tepv2) <- as.character(sort(fit$action)) + pv <- groupfsInf(fit) + trpv <- groupfsInf(trfit) + return(list(vars = fit$action, pvals = pv$pv, + splitvars = sort(trfit$action), splitpvals = tepv, + splitvars2 = sort(fit$action), splitpvals2 = tepv2, + trpvals = trpv$pv)) +} + +time <- system.time({ + output <- replicate(niters, instance(n, p, sparsity, snr, maxsteps, rho)) +}) + +vars <- do.call(c, list(output[1,])) +pvals <- do.call(c, list(output[2,])) +splitvars <- do.call(c, list(output[3,])) +splitpvals <- do.call(c, list(output[4,])) +splitvars2 <- do.call(c, list(output[5,])) +splitpvals2 <- do.call(c, list(output[6,])) +trpvals <- do.call(c, list(output[7,])) + +save(vars, pvals, splitvars, splitpvals, + splitvars2, splitpvals2, trpvals, + file = paste0("results/datasplit", + "_", ifelse(known, "TC", "TF"), + "_n", n, + "_p", p, + "_rho", gsub(".", "pt", rho, fixed=T), + "_sparsity", sparsity, + "_ratio", gsub(".", "pt", round(ratio, 2), fixed=T), + "_snr", as.character(snr), + "_bic.RData")) + +print(time) + diff --git a/forLater/josh/sim.groupfs.R b/forLater/josh/sim.groupfs.R new file mode 100644 index 0000000..5f053e5 --- /dev/null +++ b/forLater/josh/sim.groupfs.R @@ -0,0 +1,52 @@ +library(intervals) +source("funs.sims.R") +#source("selectiveInference/R/cv.R") +source("../../selectiveInference/R/funs.groupfs.R") +source("../../selectiveInference/R/funs.quadratic.R") +source("../../selectiveInference/R/funs.common.R") + +set.seed(1) +known <- TRUE +niters <- 300 +n <- 50 +p <- 150 +G <- 75 +maxsteps <- 8 +sparsity <- 4 +snr <- 2 +rho <- 0 + +instance <- function(n, p, G, sparsity, snr, rho, maxsteps) { + simd <- randomGaussianFixedP(n, p, G, sparsity, snr, sigma = 1, rho) + x <- simd$x + y <- simd$y + index <- simd$index + if (known) { + fit <- groupfs(x, y, index, maxsteps, sigma = 1, k = log(n)) + } else { + fit <- groupfs(x, y, index, maxsteps, k = log(n)) + } + pvals <- groupfsInf(fit, verbose=T) + return(list(variable = fit$action, pvals = pvals$pv)) +} + +time <- system.time({ + output <- replicate(niters, instance(n, p, G, sparsity, snr, rho, maxsteps)) +}) + +pvals <- do.call(c, list(output[2,])) +vars <- do.call(c, list(output[1,])) + +save(pvals, vars, + file = paste0("results/", + ifelse(known, "TC", "TF"), + "_n", n, + "_p", p, + "_g", G, + "_rho", gsub(".", "pt", rho, fixed=T), + "_maxsteps", maxsteps, + "_sparsity", sparsity, + "_snr", round(snr), + ".RData")) + +print(time) diff --git a/forLater/josh/sim.selectedmodel.R b/forLater/josh/sim.selectedmodel.R new file mode 100644 index 0000000..c278e98 --- /dev/null +++ b/forLater/josh/sim.selectedmodel.R @@ -0,0 +1,60 @@ +library(selectiveInference) +library(intervals) +setwd("~/Dropbox/work/R-software/forLater/josh") +source("selectiveInference/R/cv.R") +source("../../selectiveInference/R/funs.groupfs.R") +source("../../selectiveInference/R/funs.quadratic.R") +source("../../selectiveInference/R/funs.common.R") +source("../../selectiveInference/R/funs.fs.R") +source("../../selectiveInference/R/funs.lar.R") +source("../../selectiveInference/R/funs.inf.R") +library(MASS) +pinv = ginv + +set.seed(19) +niters <- 500 +known <- TRUE +n <- 50 +p <- 100 +maxsteps <- 8 +sparsity <- 5 +snr <- 2 +index <- 1:p + +x <- matrix(rnorm(n*p), nrow=n) + +instance <- function(n, p, sparsity, snr, maxsteps) { + y <- rnorm(n) + if (sparsity > 0) { + beta <- rep(0, p) + beta[1:sparsity] <- snr * sample(c(-1,1), sparsity, replace=T) + y <- y + x %*% beta + } + y <- y - mean(y) + fit <- groupfs(x, y, index, maxsteps=maxsteps, sigma=1, intercept=F, center=F, normalize=F) + fitfs <- fs(x, y, maxsteps=maxsteps, intercept=F, normalize=F) + if (any(fit$action != fitfs$action)) stop("Model paths did not agree") + pvfs <- fsInf(fitfs, sigma=1, k = maxsteps, type = "all") + pv <- groupfsInf(fit) + return(list(vars = fit$action, pvals = pv$pv, selpvals = pvfs$pv)) +} + +time <- system.time({ + output <- replicate(niters, instance(n, p, sparsity, snr, maxsteps)) +}) + +vars <- do.call(c, list(output[1,])) +pvals <- do.call(c, list(output[2,])) +selpvals <- do.call(c, list(output[3,])) + +save(vars, pvals, selpvals, + file = paste0("results/selected", + "_", ifelse(known, "TC", "TF"), + "_n", n, + "_p", p, + "_sparsity", sparsity, + "_snr", as.character(snr), + ".RData")) + +print(time) + diff --git a/forLater/josh/sim.splitcv.R b/forLater/josh/sim.splitcv.R new file mode 100644 index 0000000..cc5428e --- /dev/null +++ b/forLater/josh/sim.splitcv.R @@ -0,0 +1,77 @@ +library(intervals) +source("selectiveInference/R/cv.R") +source("../../selectiveInference/R/funs.groupfs.R") +source("../../selectiveInference/R/funs.quadratic.R") +source("../../selectiveInference/R/funs.common.R") + +set.seed(1) +niters <- 500 +known <- FALSE +n <- 100 +p <- 200 +maxsteps <- 20 +sparsity <- 5 +snr <- 1 +rho <- 0.1 +ratio <- 0.75 +train <- 1:(ratio*n) +test <- setdiff(1:n, train) +index <- 1:p + +instance <- function(n, p, sparsity, snr, maxsteps, rho) { + + x <- matrix(rnorm(n*p), nrow=n) + if (rho != 0) { + z <- matrix(rep(t(rnorm(n)), p), nrow = n) + x <- sqrt(1-rho)*x + sqrt(rho)*z + } + y <- rnorm(n) + + if (sparsity > 0) { + beta <- rep(0, p) + beta[1:sparsity] <- snr * sample(c(-1,1), sparsity, replace=T) + y <- y + x %*% beta + } + + ytr <- y[train] + xtr <- x[train, ] + yte <- y[test] + xte <- x[test, ] + + if (known) { + trfit <- groupfs(xtr, ytr, index, maxsteps=maxsteps, sigma=1, aicstop=1, k = 2*log(p)) + fit <- groupfs(x, y, index, maxsteps=maxsteps, sigma=1, aicstop=1, k = 2*log(p)) + } else { + trfit <- groupfs(xtr, ytr, index, maxsteps=maxsteps, aicstop=1, k = log(length(train))) + fit <- groupfs(x, y, index, maxsteps=maxsteps, aicstop=1, k = log(n)) + } + + trcols <- which(1:p %in% trfit$action) + tepv <- summary(lm(yte~xte[, trcols]-1))$coefficients[,4] + names(tepv) <- as.character(sort(trfit$action)) +# pv <- groupfsInf(fit) +# trpv <- groupfsInf(trfit) + return(list(vars = fit$action, splitvars = sort(trfit$action), splitpvals = tepv)) +} + +time <- system.time({ + output <- replicate(niters, instance(n, p, sparsity, snr, maxsteps, rho)) +}) + +vars <- do.call(c, list(output[1,])) +splitvars <- do.call(c, list(output[2,])) +splitpvals <- do.call(c, list(output[3,])) + +save(vars, pvals, splitvars, splitpvals, trpvals, + file = paste0("results/datasplit", + "_", ifelse(known, "TC", "TF"), + "_n", n, + "_p", p, + "_rho", gsub(".", "pt", rho, fixed=T), + "_sparsity", sparsity, + "_ratio", gsub(".", "pt", round(ratio, 2), fixed=T), + "_snr", as.character(snr), + "_bic.RData")) + +print(time) + diff --git a/forLater/josh/tests/robsNotes b/forLater/josh/tests/robsNotes deleted file mode 100644 index 8d7c3be..0000000 --- a/forLater/josh/tests/robsNotes +++ /dev/null @@ -1,37 +0,0 @@ -notes - -see bottom of test.groupfs.R - -1. with n=100, p=120,14 groups, groupfs(x, y,index) returned - - - Error in checkargs.groupfs(x, index, maxsteps) : - maxsteps is too large. If the largest groups are included the model will be saturated/overdetermined - -should have a sensible default so this doesn;t happen - - -2. a=groupfs(x,y,index,maxsteps=4) -groupfsInf(a) gives - - -Using sigma value: -Step 1 - computing p-value for group 14 -Step 2 - computing p-value for group 12 -Step 3 - computing p-value for group 1 -Step 4 - computing p-value for group 13 -Warning messages: -1: In groupfsInf(a) : - p > n/2, and sigmahat = 0.434 used as an estimate of sigma; you may want to use the estimateSigma function -2: In groupfsInf(a) : - P-value NaNs of the form 0/0 converted to 0. This typically occurs for numerical reasons in the presence of a large signal-to-noise ratio. -> aa - -Standard deviation of noise (specified or estimated) sigma = 0.434 -Error in lapply(pvals$support, size) : object 'pvals' not found - - - -3. groupfs has a sigma arg listed in the Rd file, but it's not in the function?? - -you need to do a R CMD check to pick up this stuff diff --git a/forLater/josh/tests/test.cv.R b/forLater/josh/tests/test.cv.R new file mode 100644 index 0000000..28b3f44 --- /dev/null +++ b/forLater/josh/tests/test.cv.R @@ -0,0 +1,21 @@ +library(intervals) +source("../selectiveInference/R/cv.R") +source("../../../selectiveInference/R/funs.groupfs.R") +source("../../../selectiveInference/R/funs.quadratic.R") +source("../../../selectiveInference/R/funs.common.R") + +set.seed(1) +n <- 50 +p <- 100 +maxsteps <- 10 +sparsity <- 5 +snr <- 1 +nfolds <- 5 +x <- matrix(rnorm(n*p), nrow=n) +y <- rnorm(n) +beta <- rep(0, p) +beta[1:sparsity] <- snr * sample(c(-1,1), sparsity, replace=T) +y <- y + x %*% beta +fit <- cvfs(x, y, maxsteps=maxsteps, nfolds=nfolds) +pvals <- groupfsInf(fit, verbose=T) + diff --git a/forLater/maxZ/forLater b/forLater/maxZ/forLater new file mode 100644 index 0000000..89aa886 --- /dev/null +++ b/forLater/maxZ/forLater @@ -0,0 +1,86 @@ +\name{fsInf_maxZ} +\alias{fsInf_maxZ} +\title{ +Selective inference for forward stepwise regression +} +\description{ +Computes maxZ selective p-values and confidence intervals for forward +stepwise regression +} +\usage{ + +fsInf_maxZ(obj, sigma=NULL, alpha=0.1, k=NULL, ndraw=8000, burnin=2000,verbose=FALSE) + +} + +\arguments{ + \item{obj}{ +Object returned by \code{\link{fs}} function +} +\item{sigma}{ +Estimate of error standard deviation. If NULL (default), this is estimated +using the mean squared residual of the full least squares fit when n >= 2p, and +using the standard deviation of y when n < 2p. In the latter case, the user +should use \code{\link{estimateSigma}} function for a more accurate estimate +} +\item{alpha}{ +Significance level for confidence intervals (target is miscoverage alpha/2 in each tail) +} +\item{k}{ +See "type" argument below. Default is NULL, in which case k is taken to be the +the number of steps computed in the forward stepwise path +} +\item{ndraw}{Number of Monte Carlo samples generated} +\item{burnin}{ +Number of samples discarded at the beginning of the chain +} +\item{verbose}{Print out progress along the way? Default is FALSE} +} + +\details{ +This function computes selective maxZ p-values +for forward stepwise regression. These p-values are independent the under null, +so that stopping via the forwardStop rule yields guaranteed FDR control +} + +\value{ +\item{pv}{P-values for each model in the sequence} +\item{k}{Value of k specified in call} +\item{khat}{When type is "active", this is an estimated stopping point +declared by \code{\link{forwardStop}}} +\item{sigma}{Value of error standard deviation (sigma) used} +\item{vars}{Variables in active set} +\item{sign}{Signs of active coefficients} +\item{alpha}{Desired coverage (alpha/2 in each tail)} +\item{realized_maxZ}{Value of maxZ statistic computed at each step} +\item{call}{The call to fsInf_maxZ} +} + +\references{ +Will Fithian, Jonathan Taylor, Ryan Tibshirani, and Rob Tibshirani (2015). +Selective sequential model selection. arXiv:1512.02565.. + + +} + +\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} + +\seealso{\code{\link{fs}}} + +\examples{ +set.seed(33) +n = 50 +p = 10 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) + +# run forward stepwise +fsfit = fs(x,y) + +# compute sequential p-values a +# (sigma estimated from full model) +out.seq = fsInf_maxZ(fsfit) +out.seq +} diff --git a/forLater/maxZ/funs.constraints.R b/forLater/maxZ/funs.constraints.R new file mode 100644 index 0000000..a04e34e --- /dev/null +++ b/forLater/maxZ/funs.constraints.R @@ -0,0 +1,230 @@ +# +# Some utilities for affine constraints +# + +# +# compute the square-root and inverse square-root of a non-negative +# definite matrix +# + +#' Compute the square-root and inverse square-root of a non-negative definite matrix. +#' @param S matrix +#' @param rank rank of svd +#' +#' +factor_covariance = function(S, rank=NA) { + if (is.na(rank)) { + rank = nrow(S) + } + svd_X = svd(S, nu=rank, nv=rank) + sqrt_cov = t(sqrt(svd_X$d[1:rank]) * t(svd_X$u[,1:rank])) + sqrt_inv = t((1. / sqrt(svd_X$d[1:rank])) * t(svd_X$u[,1:rank])) + + return(list(sqrt_cov=sqrt_cov, sqrt_inv=sqrt_inv)) +} + +# +# from a constraint, return an equivalent +# constraint and a whitening and inverse +# whitening map +# + +# law is Z \sim N(mean_param, covariance) subject to constraints linear_part %*% Z \leq offset + +#' Transform non-iid problem into iid problem +#' @param linear_part matrix, linear part of constraints +#' @param offset vector, bias of constraints +#' @param mean_param vector of unconditional means +#' @param covariance vector of unconditional covariance +#' @return new \code{linear_part} and \code{offset} for 0-mean iid covariance problem, +#' and functions that map between the two problems. +whiten_constraint = function(linear_part, offset, mean_param, covariance) { + + factor_cov = factor_covariance(as.matrix(covariance)) + sqrt_cov = factor_cov$sqrt_cov + sqrt_inv = factor_cov$sqrt_inv + + new_A = linear_part %*% sqrt_cov + new_b = offset - linear_part %*% mean_param + + # rescale rows to have length 1 + + scaling = sqrt(apply(new_A^2, 1, sum)) + new_A = new_A / scaling + new_b = new_b / scaling + + + inverse_map = function(Z) { + # broadcasting here + # the columns of Z are same length as mean_param + return(sqrt_cov %*% Z + as.numeric(mean_param)) + } + + forward_map = function(W) { + return(sqrt_inv %*% (W - mean_param)) + } + + return(list(linear_part=new_A, + offset=new_b, + inverse_map=inverse_map, + forward_map=forward_map)) +} + +#' Sample from multivariate normal distribution under affine restrictions +#' @description +#' \code{sample_from_constraints} returns a sample from the conditional +#' multivariate normal Z~ N(mean,covariance) s.t. A*Z <= B +#' +#' @param linear_part r x d matrix for r restrictions and d dimension of Z +#' @param offset r-dim vector of offsets +#' @param mean_param d-dim mean vector of the unconditional normal +#' @param covariance d x d covariance matrix of unconditional normal +#' @param initial_point d-dim vector that initializes the sampler (must meet restrictions) +#' @param ndraw size of sample +#' @param burnin samples to throw away before storing +#' @return Z ndraw x d matrix of samples +#' @export +#' @examples +#' +#' truncatedNorm = function(1000, c(0,0,0), identity(3), lower = -1, +#' upper = c(2,1,2), start.value = c(0,0,0)) +#' +#' constr = thresh2constraints(3, lower = c(1,1,1)) +#' +#' samp = sample_from_constraints(linear_part = constr$linear_part, +#' offset= constr$offset, +#' mean_param = c(0,0,0), +#' covariance = diag(3), +#' initial_point = c(1.5,1.5,1.5), +#' ndraw=100, +#' burnin=2000) +#' + +sample_from_constraints = function(linear_part, + offset, + mean_param, + covariance, + initial_point, + ndraw=8000, + burnin=2000) +{ + + whitened_con = whiten_constraint(linear_part, + offset, + mean_param, + covariance) + white_initial = whitened_con$forward_map(initial_point) + + + white_linear = whitened_con$linear_part + white_offset = whitened_con$offset + + # Inf cannot be used in C code + # In theory, these rows can be dropped + + rows_to_keep = white_offset < Inf + white_linear = white_linear[rows_to_keep,,drop=FALSE] + white_offset = white_offset[rows_to_keep] + + nstate = length(white_initial) + if (sum(rows_to_keep) > 0) { + if (ncol(white_linear) > 1) { + nconstraint = nrow(white_linear) + + directions = rbind(diag(rep(1, nstate)), + matrix(rnorm(nstate^2), nstate, nstate)) + + # normalize rows to have length 1 + + scaling = apply(directions, 1, function(x) { return(sqrt(sum(x^2))) }) + directions = directions / scaling + ndirection = nrow(directions) + + alphas = directions %*% t(white_linear) + U = white_linear %*% white_initial - white_offset + Z_sample = matrix(rep(0, nstate * ndraw), nstate, ndraw) + + result = .C("sample_truncnorm_white", + as.numeric(white_initial), + as.numeric(U), + as.numeric(t(directions)), + as.numeric(t(alphas)), + output=Z_sample, + as.integer(nconstraint), + as.integer(ndirection), + as.integer(nstate), + as.integer(burnin), + as.integer(ndraw), + package="selectiveInference") + Z_sample = result$output + } else { # the distribution is univariate + # we can just work out upper and lower limits + + white_linear = as.numeric(white_linear) + pos = (white_linear * white_offset) >= 0 + neg = (white_linear * white_offset) <= 0 + if (sum(pos) > 0) { + U = min((white_offset / white_linear)[pos]) + } else { + U = Inf + } + if (sum(neg) < 0) { + L = max((white_offset / white_linear)[neg]) + } else { + L = -Inf + } + Z_sample = matrix(qnorm((pnorm(U) - pnorm(L)) * runif(ndraw) + pnorm(L)), 1, ndraw) + } + } else { + Z_sample = matrix(rnorm(nstate * ndraw), nstate, ndraw) + } + + + Z = t(whitened_con$inverse_map(Z_sample)) + return(Z) +} + +#' Translate between coordinate thresholds and affine constraints +#' @description +#' \code{thresh2constraints} translates lower and upper constraints +#' on coordinates into linear and offset constraints (A*Z <= B). +#' lower and upper can have -Inf or Inf coordinates. +#' @param d dimension of vector +#' @param lower 1 or d-dim lower constraints +#' @param upper 1 or d-dim upper constraints +#' @export +thresh2constraints = function(d, lower = rep(-Inf, d), upper = rep(Inf,d)){ + stopifnot(is.element(length(lower),c(1,d))) + stopifnot(is.element(length(upper),c(1,d))) + + if (length(lower) == 1){ + lower = rep(lower, d) + } + if (length(upper) == 1){ + upper = rep(upper, d) + } + + + linear_part = matrix(ncol = d, nrow = 0) + offset = numeric(0) + lower_constraints = which(lower > -Inf) + for (l in lower_constraints){ + new_vec = rep(0,d) + new_vec[l] = -1 + linear_part = rbind(linear_part, new_vec) + offset = c(offset, -lower[l]) + } + upper_constraints = which(upper < Inf) + for (u in upper_constraints){ + new_vec = rep(0,d) + new_vec[u] = 1 + linear_part = rbind(linear_part, new_vec) + offset = c(offset, upper[u]) + } + + constraints = list(linear_part = linear_part, offset = offset) + return(constraints) +} + + + diff --git a/selectiveInference-currentCRAN/DESCRIPTION b/selectiveInference-currentCRAN/DESCRIPTION new file mode 100644 index 0000000..f7af810 --- /dev/null +++ b/selectiveInference-currentCRAN/DESCRIPTION @@ -0,0 +1,18 @@ +Package: selectiveInference +Type: Package +Title: Tools for Post-Selection Inference +Version: 1.2.4 +Date: 2017-09-18 +Author: Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, + Joshua Loftus, Stephen Reid +Maintainer: Rob Tibshirani +Depends: glmnet, intervals, survival, R (>= 3.4.0) +Suggests: Rmpfr +Description: New tools for post-selection inference, for use + with forward stepwise regression, least angle regression, the + lasso, and the many means problem. The lasso function implements Gaussian, logistic and Cox survival models. +License: GPL-2 +NeedsCompilation: yes +Packaged: 2017-09-19 23:49:11 UTC; tibs +Repository: CRAN +Date/Publication: 2017-09-20 03:14:10 UTC diff --git a/selectiveInference-currentCRAN/MD5 b/selectiveInference-currentCRAN/MD5 new file mode 100644 index 0000000..6675546 --- /dev/null +++ b/selectiveInference-currentCRAN/MD5 @@ -0,0 +1,35 @@ +89792835188231f03f117ab143c2fe46 *DESCRIPTION +5b8e448cf043849e190d2b71898eaad9 *NAMESPACE +9c5c032cb17908e6dea15a0b89d649a9 *R/funs.common.R +cf1d1199cf6cacb1d54fce08388d20cc *R/funs.fixed.R +faf5eed09c13d3e80270d82305f0b348 *R/funs.fixedCox.R +25e9f2957b4cbac8b11a283c69533f64 *R/funs.fixedLogit.R +c7af51c32236ef56a6ed0a525f52dce4 *R/funs.fs.R +fc41d0af77330bde0395f438c117c7d8 *R/funs.groupfs.R +632c61c8fc3da59cde6b337f7d4341a4 *R/funs.inf.R +dba7bfb08c9184569d97c14a0575c5a1 *R/funs.lar.R +ed45e9aa5e6383ff9888b35af9b30e9e *R/funs.manymeans.R +bd535e32d32e9cd0e723a5f9f00d9eef *R/funs.max.R +6daca48218e58720c1570784706c199a *R/funs.quadratic.R +d1db3866e82ad6e33baef9da4d994833 *man/estimateSigma.Rd +1747e0899ef985469ae560fb828755cb *man/factorDesign.Rd +1028942deac2fd45aaf2e49d94aa6dac *man/fixedLassoInf.Rd +60e2065f446f1d6dc11c77a5534580bc *man/forwardStop.Rd +2e6f87cd38e1f4b4cb60bfc8299dc1f4 *man/fs.Rd +1483067f07f71b2d996138877e4f48ef *man/fsInf.Rd +7d5ca8ce0ff81cf5f0e87cadffa29229 *man/groupfs.Rd +5ccec019c69b4832438b79830649e730 *man/groupfsInf.Rd +61bdaa3e5ac7bbe02d55f42530edf956 *man/lar.Rd +b25bc2d93c0b266dbec45d82a5d05004 *man/larInf.Rd +4da84515659e7a70fb7375dc2c791b4b *man/manyMeans.Rd +c7c96850986be5e1203cca414a410a32 *man/plot.fs.Rd +3dc4100747d7e72276a75c8e6beba37c *man/plot.lar.Rd +192e0031a10ace23df79a314cf90c648 *man/predict.fs.Rd +588230513bd05fd139c75d45f94a7cd6 *man/predict.groupfs.Rd +0b477548ac30e902eca27163a947e2ca *man/predict.lar.Rd +b275e61a2976d14595dc9dfea646675e *man/scaleGroups.Rd +0c21e5414145f4841c3897c995dad4c2 *man/selectiveInference-internal.Rd +b0bbe4ffe6e958215a85bb15fc43ab01 *man/selectiveInference.Rd +4313aa781953d7f1f6e75383e938e1c7 *src/matrixcomps.c +d7f4c478a9de5716b2da338ae6da2ea5 *src/selinf_init.c +11b2e6c34bc1ed181b407fc658a3b0af *src/truncnorm.c diff --git a/selectiveInference-currentCRAN/NAMESPACE b/selectiveInference-currentCRAN/NAMESPACE new file mode 100644 index 0000000..cf2f3b8 --- /dev/null +++ b/selectiveInference-currentCRAN/NAMESPACE @@ -0,0 +1,45 @@ +export(lar,fs, + larInf,fsInf, + coef.lar,coef.fs, + predict.lar,predict.fs, + print.lar,print.fs, + print.larInf,print.fsInf, + plot.lar,plot.fs, + fixedLassoInf,print.fixedLassoInf, +# fixedLogitLassoInf,print.fixedLogitLassoInf, +# fixedCoxLassoInf,print.fixedCoxLassoInf, + forwardStop, + estimateSigma, + manyMeans,print.manyMeans, + groupfs,groupfsInf, + scaleGroups,factorDesign + ) + +S3method("coef", "lar") +S3method("predict", "lar") +S3method("print", "lar") +S3method("plot", "lar") +S3method("print", "larInf") +S3method("coef", "fs") +S3method("predict", "fs") +S3method("print", "fs") +S3method("plot", "fs") +S3method("print", "fsInf") +S3method("print", "fixedLassoInf") +S3method("print", "fixedLogitLassoInf") +S3method("print", "fixedCoxLassoInf") +S3method("print", "manyMeans") +S3method("print", "groupfs") +S3method("print", "groupfsInf") + +useDynLib("selectiveInference",.registration=TRUE) +import(glmnet) +import(intervals) +import(survival) +importFrom("graphics", abline, axis, matplot) +importFrom("stats", dnorm, lsfit, pexp, pnorm, predict, + qnorm, rnorm, sd, uniroot, dchisq, model.matrix, pchisq) +importFrom("stats", "coef", "df", "lm", "pf") +importFrom("stats", "glm", "residuals", "vcov") + + diff --git a/selectiveInference-currentCRAN/R/funs.common.R b/selectiveInference-currentCRAN/R/funs.common.R new file mode 100644 index 0000000..5945700 --- /dev/null +++ b/selectiveInference-currentCRAN/R/funs.common.R @@ -0,0 +1,186 @@ +# Special linear time order function, works only when x +# is a scrambled vector of integers. + +Order <- function(x) { + n = length(x) + o = numeric(n) + o[x] = Seq(1,n) + return(o) +} + +# Returns a sequence of integers from a to b if a <= b, +# otherwise nothing. You have no idea how important this +# function is... + +Seq <- function(a, b, ...) { + if (a<=b) return(seq(a,b,...)) + else return(numeric(0)) +} + +# Returns the sign of x, with Sign(0)=1. + +Sign <- function(x) { + return(-1+2*(x>=0)) +} + +############################## + +# Centering and scaling convenience function + +standardize <- function(x, y, intercept, normalize) { + x = as.matrix(x) + y = as.numeric(y) + n = nrow(x) + p = ncol(x) + + if (intercept) { + bx = colMeans(x) + by = mean(y) + x = scale(x,bx,FALSE) + y = y-mean(y) + } else { + bx = rep(0,p) + by = 0 + } + if (normalize) { + sx = sqrt(colSums(x^2)) + x = scale(x,FALSE,sx) + } else { + sx = rep(1,p) + } + + return(list(x=x,y=y,bx=bx,by=by,sx=sx)) +} + +############################## + +# Interpolation function to get coefficients + +coef.interpolate <- function(betas, s, knots, dec=TRUE) { + # Sort the s values + o = order(s,dec=dec) + s = s[o] + + k = length(s) + mat = matrix(rep(knots,each=k),nrow=k) + if (dec) b = s >= mat + else b = s <= mat + blo = max.col(b,ties.method="first") + bhi = pmax(blo-1,1) + + i = bhi==blo + p = numeric(k) + p[i] = 0 + p[!i] = ((s-knots[blo])/(knots[bhi]-knots[blo]))[!i] + + beta = t((1-p)*t(betas[,blo,drop=FALSE]) + p*t(betas[,bhi,drop=FALSE])) + colnames(beta) = as.character(round(s,3)) + rownames(beta) = NULL + + # Return in original order + o = order(o) + return(beta[,o,drop=FALSE]) +} + +############################## + +checkargs.xy <- function(x, y) { + if (missing(x)) stop("x is missing") + if (is.null(x) || !is.matrix(x)) stop("x must be a matrix") + if (missing(y)) stop("y is missing") + if (is.null(y) || !is.numeric(y)) stop("y must be numeric") + if (ncol(x) == 0) stop("There must be at least one predictor [must have ncol(x) > 0]") + if (checkcols(x)) stop("x cannot have duplicate columns") + if (length(y) == 0) stop("There must be at least one data point [must have length(y) > 0]") + if (length(y)!=nrow(x)) stop("Dimensions don't match [length(y) != nrow(x)]") +} + +checkargs.misc <- function(sigma=NULL, alpha=NULL, k=NULL, + gridrange=NULL, gridpts=NULL, griddepth=NULL, + mult=NULL, ntimes=NULL, + beta=NULL, lambda=NULL, tol.beta=NULL, tol.kkt=NULL, + bh.q=NULL) { + + if (!is.null(sigma) && sigma <= 0) stop("sigma must be > 0") + if (!is.null(lambda) && lambda < 0) stop("lambda must be >= 0") + if (!is.null(alpha) && (alpha <= 0 || alpha >= 1)) stop("alpha must be between 0 and 1") + if (!is.null(k) && length(k) != 1) stop("k must be a single number") + if (!is.null(k) && (k < 1 || k != floor(k))) stop("k must be an integer >= 1") + if (!is.null(gridrange) && (length(gridrange) != 2 || gridrange[1] > gridrange[2])) + stop("gridrange must be an interval of the form c(a,b) with a <= b") + if (!is.null(gridpts) && (gridpts < 20 || gridpts != round(gridpts))) + stop("gridpts must be an integer >= 20") + if (!is.null(griddepth) && (griddepth > 10 || griddepth != round(griddepth))) + stop("griddepth must be an integer <= 10") + if (!is.null(mult) && mult < 0) stop("mult must be >= 0") + if (!is.null(ntimes) && (ntimes <= 0 || ntimes != round(ntimes))) + stop("ntimes must be an integer > 0") + if (!is.null(beta) && sum(beta!=0)==0) stop("Value of lambda too large, beta is zero") + if (!is.null(lambda) && length(lambda) != 1) stop("lambda must be a single number") + if (!is.null(lambda) && lambda < 0) stop("lambda must be >=0") + if (!is.null(tol.beta) && tol.beta <= 0) stop("tol.beta must be > 0") + if (!is.null(tol.kkt) && tol.kkt <= 0) stop("tol.kkt must be > 0") +} + +# Make sure that no two columms of A are the same +# (this works with probability one). + +checkcols <- function(A) { + b = rnorm(nrow(A)) + a = sort(t(A)%*%b) + if (any(diff(a)==0)) return(TRUE) + return(FALSE) +} + +estimateSigma <- function(x, y, intercept=TRUE, standardize=TRUE) { + checkargs.xy(x,rep(0,nrow(x))) + if(nrow(x)<10) stop("Number of observations must be at least 10 to run estimateSigma") + cvfit=cv.glmnet(x,y,intercept=intercept,standardize=standardize) + lamhat=cvfit$lambda.min + fit=glmnet(x,y,standardize=standardize) + yhat=predict(fit,x,s=lamhat) + nz=sum(predict(fit,s=lamhat, type="coef")!=0) + sigma=sqrt(sum((y-yhat)^2)/(length(y)-nz-1)) + return(list(sigmahat=sigma, df=nz)) +} + +# Update the QR factorization, after a column has been +# added. Here Q1 is m x n, Q2 is m x k, and R is n x n. + +updateQR <- function(Q1,Q2,R,col) { + m = nrow(Q1) + n = ncol(Q1) + k = ncol(Q2) + + a = .C("update1", + Q2=as.double(Q2), + w=as.double(t(Q2)%*%col), + m=as.integer(m), + k=as.integer(k), + dup=FALSE, + package="selectiveInference") + + Q2 = matrix(a$Q2,nrow=m) + w = c(t(Q1)%*%col,a$w) + + # Re-structure: delete a column from Q2, add one to + # Q1, and expand R + Q1 = cbind(Q1,Q2[,1]) + Q2 = Q2[,-1,drop=FALSE] + R = rbind(R,rep(0,n)) + R = cbind(R,w[Seq(1,n+1)]) + + return(list(Q1=Q1,Q2=Q2,R=R)) +} + +# Moore-Penrose pseudo inverse for symmetric matrices + +pinv <- function(A, tol=.Machine$double.eps) { + e = eigen(A) + v = Re(e$vec) + d = Re(e$val) + d[d > tol] = 1/d[d > tol] + d[d < tol] = 0 + if (length(d)==1) return(v*d*v) + else return(v %*% diag(d) %*% t(v)) +} diff --git a/selectiveInference-currentCRAN/R/funs.fixed.R b/selectiveInference-currentCRAN/R/funs.fixed.R new file mode 100644 index 0000000..b30d04c --- /dev/null +++ b/selectiveInference-currentCRAN/R/funs.fixed.R @@ -0,0 +1,218 @@ +# Lasso inference function (for fixed lambda). Note: here we are providing inference +# for the solution of +# min 1/2 || y - \beta_0 - X \beta ||_2^2 + \lambda || \beta ||_1 + +fixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","cox"),intercept=TRUE, status=NULL, +sigma=NULL, alpha=0.1, + type=c("partial","full"), tol.beta=1e-5, tol.kkt=0.1, + gridrange=c(-100,100), bits=NULL, verbose=FALSE) { + + family = match.arg(family) + this.call = match.call() + type = match.arg(type) + + if(family=="binomial") { + if(type!="partial") stop("Only type= partial allowed with binomial family") + out=fixedLogitLassoInf(x,y,beta,lambda,alpha=alpha, type="partial", tol.beta=tol.beta, tol.kkt=tol.kkt, + gridrange=gridrange, bits=bits, verbose=verbose,this.call=this.call) + return(out) + } +else if(family=="cox") { + if(type!="partial") stop("Only type= partial allowed with Cox family") + out=fixedCoxLassoInf(x,y,status,beta,lambda,alpha=alpha, type="partial",tol.beta=tol.beta, + tol.kkt=tol.kkt, gridrange=gridrange, bits=bits, verbose=verbose,this.call=this.call) + return(out) + } + +else{ + + + + checkargs.xy(x,y) + if (missing(beta) || is.null(beta)) stop("Must supply the solution beta") + if (missing(lambda) || is.null(lambda)) stop("Must supply the tuning parameter value lambda") + checkargs.misc(beta=beta,lambda=lambda,sigma=sigma,alpha=alpha, + gridrange=gridrange,tol.beta=tol.beta,tol.kkt=tol.kkt) + if (!is.null(bits) && !requireNamespace("Rmpfr",quietly=TRUE)) { + warning("Package Rmpfr is not installed, reverting to standard precision") + bits = NULL + } + + n = nrow(x) + p = ncol(x) + beta = as.numeric(beta) + if (length(beta) != p) stop("Since family='gaussian', beta must have length equal to ncol(x)") + + # If glmnet was run with an intercept term, center x and y + if (intercept==TRUE) { + obj = standardize(x,y,TRUE,FALSE) + x = obj$x + y = obj$y + } + + # Check the KKT conditions + g = t(x)%*%(y-x%*%beta) / lambda + if (any(abs(g) > 1+tol.kkt * sqrt(sum(y^2)))) + warning(paste("Solution beta does not satisfy the KKT conditions", + "(to within specified tolerances)")) + + vars = which(abs(beta) > tol.beta / sqrt(colSums(x^2))) + if(length(vars)==0){ + cat("Empty model",fill=T) + return() + } + if (any(sign(g[vars]) != sign(beta[vars]))) + warning(paste("Solution beta does not satisfy the KKT conditions", + "(to within specified tolerances). You might try rerunning", + "glmnet with a lower setting of the", + "'thresh' parameter, for a more accurate convergence.")) + + # Get lasso polyhedral region, of form Gy >= u + out = fixedLasso.poly(x,y,beta,lambda,vars) + G = out$G + u = out$u + + # Check polyhedral region + tol.poly = 0.01 + if (min(G %*% y - u) < -tol.poly * sqrt(sum(y^2))) + stop(paste("Polyhedral constraints not satisfied; you must recompute beta", + "more accurately. With glmnet, make sure to use exact=TRUE in coef(),", + "and check whether the specified value of lambda is too small", + "(beyond the grid of values visited by glmnet).", + "You might also try rerunning glmnet with a lower setting of the", + "'thresh' parameter, for a more accurate convergence.")) + + # Estimate sigma + if (is.null(sigma)) { + if (n >= 2*p) { + oo = intercept + sigma = sqrt(sum(lsfit(x,y,intercept=oo)$res^2)/(n-p-oo)) + } + else { + sigma = sd(y) + warning(paste(sprintf("p > n/2, and sd(y) = %0.3f used as an estimate of sigma;",sigma), + "you may want to use the estimateSigma function")) + } + } + + k = length(vars) + pv = vlo = vup = numeric(k) + vmat = matrix(0,k,n) + ci = tailarea = matrix(0,k,2) + sign = numeric(k) + + if (type=="full" & p > n) + warning(paste("type='full' does not make sense when p > n;", + "switching to type='partial'")) + + if (type=="partial" || p > n) { + xa = x[,vars,drop=F] + M = pinv(crossprod(xa)) %*% t(xa) + } + else { + M = pinv(crossprod(x)) %*% t(x) + M = M[vars,,drop=F] + } + + for (j in 1:k) { + if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j])) + + vj = M[j,] + mj = sqrt(sum(vj^2)) + vj = vj / mj # Standardize (divide by norm of vj) + sign[j] = sign(sum(vj*y)) + vj = sign[j] * vj + a = poly.pval(y,G,u,vj,sigma,bits) + pv[j] = a$pv + vlo[j] = a$vlo * mj # Unstandardize (mult by norm of vj) + vup[j] = a$vup * mj # Unstandardize (mult by norm of vj) + vmat[j,] = vj * mj * sign[j] # Unstandardize (mult by norm of vj) + + a = poly.int(y,G,u,vj,sigma,alpha,gridrange=gridrange, + flip=(sign[j]==-1),bits=bits) + ci[j,] = a$int * mj # Unstandardize (mult by norm of vj) + tailarea[j,] = a$tailarea + } + + out = list(type=type,lambda=lambda,pv=pv,ci=ci, + tailarea=tailarea,vlo=vlo,vup=vup,vmat=vmat,y=y, + vars=vars,sign=sign,sigma=sigma,alpha=alpha, + sd=sigma*sqrt(rowSums(vmat^2)), + coef0=vmat%*%y, + call=this.call) + class(out) = "fixedLassoInf" + return(out) +} +} + +############################# + + +fixedLasso.poly= +function(x, y, beta, lambda, a) { + xa = x[,a,drop=F] + xac = x[,!a,drop=F] + xai = pinv(crossprod(xa)) + xap = xai %*% t(xa) + za = sign(beta[a]) + if (length(za)>1) dz = diag(za) + if (length(za)==1) dz = matrix(za,1,1) + +# P = diag(1,nrow(xa)) - xa %*% xap + #NOTE: inactive constraints not needed below! + + G = -rbind( + # 1/lambda * t(xac) %*% P, + # -1/lambda * t(xac) %*% P, + -dz %*% xap + ) + lambda2=lambda + if(length(lambda)>1) lambda2=lambda[a] + u = -c( + # 1 - t(xac) %*% t(xap) %*% za, + # 1 + t(xac) %*% t(xap) %*% za, + -lambda2 * dz %*% xai %*% za) + + return(list(G=G,u=u)) +} + +############################## + +print.fixedLassoInf <- function(x, tailarea=TRUE, ...) { + cat("\nCall:\n") + dput(x$call) + + cat(sprintf("\nStandard deviation of noise (specified or estimated) sigma = %0.3f\n", + x$sigma)) + + cat(sprintf("\nTesting results at lambda = %0.3f, with alpha = %0.3f\n",x$lambda,x$alpha)) + cat("",fill=T) + tab = cbind(x$vars, + round(x$coef0,3), + round(x$coef0 / x$sd,3), + round(x$pv,3),round(x$ci,3)) + colnames(tab) = c("Var", "Coef", "Z-score", "P-value", "LowConfPt", "UpConfPt") + if (tailarea) { + tab = cbind(tab,round(x$tailarea,3)) + colnames(tab)[(ncol(tab)-1):ncol(tab)] = c("LowTailArea","UpTailArea") + } + rownames(tab) = rep("",nrow(tab)) + print(tab) + + cat(sprintf("\nNote: coefficients shown are %s regression coefficients\n", + ifelse(x$type=="partial","partial","full"))) + invisible() +} + +#estimateLambda <- function(x, sigma, nsamp=1000){ +# checkargs.xy(x,rep(0,nrow(x))) +# if(nsamp < 10) stop("More Monte Carlo samples required for estimation") +# if (length(sigma)!=1) stop("sigma should be a number > 0") + # if (sigma<=0) stop("sigma should be a number > 0") + + # n = nrow(x) + # eps = sigma*matrix(rnorm(nsamp*n),n,nsamp) + # lambda = 2*mean(apply(t(x)%*%eps,2,max)) + # return(lambda) +#} + diff --git a/selectiveInference-currentCRAN/R/funs.fixedCox.R b/selectiveInference-currentCRAN/R/funs.fixedCox.R new file mode 100644 index 0000000..ff778d9 --- /dev/null +++ b/selectiveInference-currentCRAN/R/funs.fixedCox.R @@ -0,0 +1,120 @@ +fixedCoxLassoInf=function(x,y,status,beta,lambda,alpha=.1, type=c("partial"),tol.beta=1e-5, tol.kkt=0.1, + gridrange=c(-100,100), bits=NULL, verbose=FALSE,this.call=NULL){ + + + checkargs.xy(x,y) + if(is.null(status)) stop("Must supply `status' argument") +if( sum(status==0)+sum(status==1)!=length(y)) stop("status vector must have values 0 or 1") + if (missing(beta) || is.null(beta)) stop("Must supply the solution beta") + if (missing(lambda) || is.null(lambda)) stop("Must supply the tuning parameter value lambda") + checkargs.misc(beta=beta,lambda=lambda,alpha=alpha, + gridrange=gridrange,tol.beta=tol.beta,tol.kkt=tol.kkt) + if (!is.null(bits) && !requireNamespace("Rmpfr",quietly=TRUE)) { + warning("Package Rmpfr is not installed, reverting to standard precision") + bits = NULL + } + + n=nrow(x) + p=ncol(x) + nvar=sum(beta!=0) + pv=vlo=vup=sd=rep(NA, nvar) + ci=tailarea=matrix(NA,nvar,2) + + + m=beta!=0 +vars=which(m) +if(sum(m)>0){ + bhat=beta[beta!=0] #penalized coefs just for active variables + s2=sign(bhat) + + #check KKT + + aaa=coxph(Surv(y,status)~x[,m],init=bhat,iter.max=0) # this gives the Cox model at exactly bhat + # so when we compute gradient and score + # we are evaluating at the LASSO solution + # naming of variables could be improved... + res=residuals(aaa,type="score") +if(!is.matrix(res)) res=matrix(res,ncol=1) +scor=colSums(res) + g=(scor+lambda*s2)/(2*lambda) +# cat(c(g,lambda,tol.kkt),fill=T) + if (any(abs(g) > 1+tol.kkt) ) + warning(paste("Solution beta does not satisfy the KKT conditions", + "(to within specified tolerances)")) + +# Hessian of partial likelihood at the LASSO solution +MM=vcov(aaa) + +bbar=(bhat+lambda*MM%*%s2) +A1=-(mydiag(s2)) +b1= -(mydiag(s2)%*%MM)%*%s2*lambda + + temp=max(A1%*%bbar-b1) + + +# compute p-values + +# JT: are we sure the signs of these are correctly handled? +# two sided p-values numerically agree with python but +# the one sided p-values are a bit off + + for(jj in 1:length(bbar)){ + vj=rep(0,length(bbar));vj[jj]=s2[jj] + + + junk=mypoly.pval.lee(bbar,A1,b1,vj,MM) + + pv[jj] = junk$pv + vlo[jj]=junk$vlo + vup[jj]=junk$vup + sd[jj]=junk$sd + + junk2=mypoly.int.lee(bbar,vj,vlo[jj],vup[jj],sd[jj],alpha) + ci[jj,]=junk2$int + tailarea[jj,] = junk2$tailarea + + } + # JT: these don't seem to be the real one-step estimators + fit0=coxph(Surv(y,status)~x[,m]) + coef0=fit0$coef + se0=sqrt(diag(fit0$var)) + zscore0=coef0/se0 + + out = list(lambda=lambda,pv=pv,ci=ci, + tailarea=tailarea,vlo=vlo,vup=vup,sd=sd, + vars=vars,alpha=alpha,coef0=coef0,zscore0=zscore0, + call=this.call) + class(out) = "fixedCoxLassoInf" +} +return(out) +} + + + +print.fixedCoxLassoInf <- function(x, tailarea=TRUE, ...) { + cat("\nCall:\n") + dput(x$call) + + cat(sprintf("\nStandard deviation of noise (specified or estimated) sigma = %0.3f\n", + x$sigma)) + + cat(sprintf("\nTesting results at lambda = %0.3f, with alpha = %0.3f\n",x$lambda,x$alpha)) + cat("",fill=T) + tab = cbind(x$vars, + round(x$coef0,3), + round(x$zscore0,3), + round(x$pv,3),round(x$ci,3)) + colnames(tab) = c("Var", "Coef", "Z-score", "P-value", "LowConfPt", "UpConfPt") + if (tailarea) { + tab = cbind(tab,round(x$tailarea,3)) + colnames(tab)[(ncol(tab)-1):ncol(tab)] = c("LowTailArea","UpTailArea") + } + rownames(tab) = rep("",nrow(tab)) + print(tab) + + cat(sprintf("\nNote: coefficients shown are %s regression coefficients\n", + ifelse(x$type=="partial","partial","full"))) + invisible() +} + + diff --git a/selectiveInference-currentCRAN/R/funs.fixedLogit.R b/selectiveInference-currentCRAN/R/funs.fixedLogit.R new file mode 100644 index 0000000..5b67354 --- /dev/null +++ b/selectiveInference-currentCRAN/R/funs.fixedLogit.R @@ -0,0 +1,151 @@ + +fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial"), tol.beta=1e-5, tol.kkt=0.1, + gridrange=c(-100,100), bits=NULL, verbose=FALSE,this.call=NULL){ + + + type = match.arg(type) + checkargs.xy(x,y) + if (missing(beta) || is.null(beta)) stop("Must supply the solution beta") + if (missing(lambda) || is.null(lambda)) stop("Must supply the tuning parameter value lambda") + checkargs.misc(beta=beta,lambda=lambda,alpha=alpha, + gridrange=gridrange,tol.beta=tol.beta,tol.kkt=tol.kkt) + if (!is.null(bits) && !requireNamespace("Rmpfr",quietly=TRUE)) { + warning("Package Rmpfr is not installed, reverting to standard precision") + bits = NULL + } + + + n=length(y) + p=ncol(x) + # I assume that intcpt was used + if(length(beta)!=p+1) stop("Since family='binomial', beta must be of length ncol(x)+1, that is, it should include an intercept") + nvar=sum(beta[-1]!=0) + pv=vlo=vup=sd=rep(NA, nvar) + ci=tailarea=matrix(NA,nvar,2) + +#do we need to worry about standardization? + +# obj = standardize(x,y,TRUE,FALSE) + # x = obj$x + # y = obj$y + + m=beta[-1]!=0 #active set + + bhat=c(beta[1],beta[-1][beta[-1]!=0]) # intcpt plus active vars + s2=sign(bhat) + lam2m=diag(c(0,rep(lambda,sum(m)))) + + + xxm=cbind(1,x[,m]) + + etahat = xxm %*% bhat + prhat = as.vector(exp(etahat) / (1 + exp(etahat))) + ww=prhat*(1-prhat) + # w=diag(ww) + +#check KKT + z=etahat+(y-prhat)/ww + # g= t(x)%*%w%*%(z-etahat)/lambda # negative gradient scaled by lambda + g=scale(t(x),FALSE,1/ww)%*%(z-etahat)/lambda # negative gradient scaled by lambda + if (any(abs(g) > 1+tol.kkt) ) + warning(paste("Solution beta does not satisfy the KKT conditions", + "(to within specified tolerances)")) + + vars = which(abs(beta[-1]) > tol.beta / sqrt(colSums(x^2))) + if(length(vars)==0){ + cat("Empty model",fill=T) + return() + } + if (any(sign(g[vars]) != sign(beta[-1][vars]))) + warning(paste("Solution beta does not satisfy the KKT conditions", + "(to within specified tolerances). You might try rerunning", + "glmnet with a lower setting of the", + "'thresh' parameter, for a more accurate convergence.")) + + #constraints for active variables + # MM=solve(t(xxm)%*%w%*%xxm) + MM=solve(scale(t(xxm),F,1/ww)%*%xxm) + gm = c(0,-g[vars]*lambda) # gradient at LASSO solution, first entry is 0 because intercept is unpenalized + # at exact LASSO solution it should be s2[-1] + dbeta = MM %*% gm + + # bbar=(bhat+lam2m%*%MM%*%s2) # JT: this is wrong, shouldn't use sign of intercept anywhere... + bbar = bhat - dbeta + + A1=-(mydiag(s2))[-1,] + b1= (s2 * dbeta)[-1] + + tol.poly = 0.01 + if (max((A1 %*% bbar) - b1) > tol.poly) + stop(paste("Polyhedral constraints not satisfied; you must recompute beta", + "more accurately. With glmnet, make sure to use exact=TRUE in coef(),", + "and check whether the specified value of lambda is too small", + "(beyond the grid of values visited by glmnet).", + "You might also try rerunning glmnet with a lower setting of the", + "'thresh' parameter, for a more accurate convergence.")) + + + + for(jj in 1:sum(m)){ + vj=c(rep(0,sum(m)+1));vj[jj+1]=s2[jj+1] + # compute p-values + junk=mypoly.pval.lee(bbar,A1,b1,vj,MM) + pv[jj] = junk$pv + + vlo[jj]=junk$vlo + vup[jj]=junk$vup + sd[jj]=junk$sd + # junk2=mypoly.int.lee(bbar[-1], A1, b1,vj,MM[-1,-1],alpha=.1) + junk2=mypoly.int.lee(bbar,vj,vlo[jj],vup[jj],sd[jj],alpha=.1) + + ci[jj,]=junk2$int + tailarea[jj,] = junk2$tailarea + } + + # JT: these are not the one step estimators but they are close + fit0=glm(y~x[,m],family="binomial") + sfit0=summary(fit0) + coef0=bbar[-1] #fit0$coef[-1] + se0=sqrt(diag(MM)[-1]) # sfit0$cov.scaled)[-1]) + zscore0=coef0/se0 + + out = list(type=type,lambda=lambda,pv=pv,ci=ci, + tailarea=tailarea,vlo=vlo,vup=vup,sd=sd, + vars=vars,alpha=alpha,coef0=coef0,zscore0=zscore0, + call=this.call, + info.matrix=MM) # info.matrix is output just for debugging purposes at the moment + class(out) = "fixedLogitLassoInf" + return(out) + + } + + + +print.fixedLogitLassoInf <- function(x, tailarea=TRUE, ...) { + cat("\nCall:\n") + dput(x$call) + + cat(sprintf("\nStandard deviation of noise (specified or estimated) sigma = %0.3f\n", + x$sigma)) + + cat(sprintf("\nTesting results at lambda = %0.3f, with alpha = %0.3f\n",x$lambda,x$alpha)) + cat("",fill=T) + tab = cbind(x$vars, + round(x$coef0,3), + round(x$zscore0,3), + round(x$pv,3),round(x$ci,3)) + colnames(tab) = c("Var", "Coef", "Z-score", "P-value", "LowConfPt", "UpConfPt") + if (tailarea) { + tab = cbind(tab,round(x$tailarea,3)) + colnames(tab)[(ncol(tab)-1):ncol(tab)] = c("LowTailArea","UpTailArea") + } + rownames(tab) = rep("",nrow(tab)) + print(tab) + + cat(sprintf("\nNote: coefficients shown are %s regression coefficients\n", + ifelse(x$type=="partial","partial","full"))) + invisible() +} + + + diff --git a/selectiveInference-currentCRAN/R/funs.fs.R b/selectiveInference-currentCRAN/R/funs.fs.R new file mode 100644 index 0000000..b5ee511 --- /dev/null +++ b/selectiveInference-currentCRAN/R/funs.fs.R @@ -0,0 +1,482 @@ +# We compute the forward stepwise regression (FS) path given +# a response vector y and predictor matrix x. We assume +# that x has columns in general position. + +fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, + verbose=FALSE) { + + this.call = match.call() + checkargs.xy(x=x,y=y) + + # Center and scale, etc. + obj = standardize(x,y,intercept,normalize) + x = obj$x + y = obj$y + bx = obj$bx + by = obj$by + sx = obj$sx + n = nrow(x) + p = ncol(x) + + ##### + # To keep consistent with the lar function, we parametrize + # so that the first step has all zero coefficients, + # Also, an interesting note: the effective "lambda" (maximal + # correlation with the residual) may increase with stepwise! + # So we don't keep track of it + + ##### + # Find the first variable to enter and its sign + working_x = scale(x,center=F,scale=sqrt(colSums(x^2))) + score = t(working_x)%*%y + i_hit = which.max(abs(score)) # Hitting coordinate + sign_hit = Sign(score[i_hit]) # Sign + signs = sign_hit # later signs will be appended to `signs` + + if (verbose) { + cat(sprintf("1. Adding variable %i, |A|=%i...",i_hit,1)) + } + + # Now iteratively find the new FS estimates + + # Things to keep track of, and return at the end + # JT: I guess the "buf" just saves us from making huge + # matrices we don't need? + + buf = min(maxsteps,500) + action = numeric(buf) # Actions taken + df = numeric(buf) # Degrees of freedom + beta = matrix(0,p,buf) # FS estimates + + action[1] = i_hit + df[1] = 0 + beta[,1] = 0 + + # Gamma matrix! + gbuf = max(2*p*3,2000) # Space for 3 steps, at least + gi = 0 # index into rows of Gamma matrix + + Gamma = matrix(0,gbuf,n) + Gamma[gi+Seq(1,p-1),] = t(sign_hit*working_x[,i_hit]+working_x[,-i_hit]); gi = gi+p-1 + Gamma[gi+Seq(1,p-1),] = t(sign_hit*working_x[,i_hit]-working_x[,-i_hit]); gi = gi+p-1 + Gamma[gi+1,] = t(sign_hit*working_x[,i_hit]); gi = gi+1 + + # nconstraint + nconstraint = numeric(buf) + vreg = matrix(0,buf,n) + nconstraint[1] = gi + vreg[1,] = sign_hit*x[,i_hit] / sum(x[,i_hit]^2) + + # Other things to keep track of, but not return + r = 1 # Size of active set + A = i_hit # Active set -- JT: isn't this basically the same as action? + I = Seq(1,p)[-i_hit] # Inactive set + X_active = x[,i_hit,drop=FALSE] # Matrix X[,A] + X_inactive = x[,-i_hit,drop=FALSE] # Matrix X[,I] + k = 2 # What step are we at? + # JT Why keep track of r and k instead of just saying k=r+1? + + # Compute a skinny QR decomposition of X_active + # JT: obs was used as variable name above -- this is something different, no? + # changed it to qr_X + + qr_X = qr(X_active) + Q = qr.Q(qr_X,complete=TRUE) + Q_active = Q[,1,drop=FALSE]; + Q_inactive = Q[,-1,drop=FALSE] + R = qr.R(qr_X) + + # Throughout the algorithm, we will maintain + # the decomposition X_active = Q_active*R. Dimensions: + # X_active: n x r + # Q_active: n x r + # Q_inactive: n x (n-r) + # R: r x r + + while (k<=maxsteps) { + ########## + # Check if we've reached the end of the buffer + if (k > length(action)) { + buf = length(action) + action = c(action,numeric(buf)) + df = c(df,numeric(buf)) + beta = cbind(beta,matrix(0,p,buf)) + nconstraint = c(nconstraint,numeric(buf)) + vreg = rbind(vreg,matrix(0,buf,n)) + } + + # Key quantities for the next entry + keepLs=backsolve(R,t(Q_active)%*%X_inactive) + X_inactive_resid = X_inactive - X_active %*% keepLs + working_x = scale(X_inactive_resid,center=F,scale=sqrt(colSums(X_inactive_resid^2))) + score = as.numeric(t(working_x)%*%y) + + # If the inactive set is empty, nothing will hit + if (r==min(n-intercept,p)) break + + # Otherwise find the next hitting time + else { + sign_score = Sign(score) + abs_score = sign_score * score + i_hit = which.max(abs_score) + sign_hit = sign_score[i_hit] + } + + # Record the solution + # what is the difference between "action" and "A"? + + action[k] = I[i_hit] + df[k] = r + beta[A,k] = backsolve(R,t(Q_active)%*%y) + + # Gamma matrix! + if (gi + 2*p > nrow(Gamma)) Gamma = rbind(Gamma,matrix(0,2*p+gbuf,n)) + working_x = t(sign_score*t(working_x)) + Gamma[gi+Seq(1,p-r),] = t(working_x); gi = gi+p-r + Gamma[gi+Seq(1,p-r-1),] = t(working_x[,i_hit]-working_x[,-i_hit]); gi = gi+p-r-1 + Gamma[gi+1,] = t(working_x[,i_hit]); gi = gi+1 + + # nconstraint, regression contrast + nconstraint[k] = gi + vreg[k,] = sign_hit*X_inactive_resid[,i_hit] / sum(X_inactive_resid[,i_hit]^2) + + # Update all of the variables + r = r+1 + A = c(A,I[i_hit]) + I = I[-i_hit] + signs = c(signs,sign_hit) + X_active = cbind(X_active,X_inactive[,i_hit]) + X_inactive = X_inactive[,-i_hit,drop=FALSE] + + # Update the QR decomposition + updated_qr = updateQR(Q_active,Q_inactive,R,X_active[,r]) + Q_active = updated_qr$Q1 + + # JT: why do we store Q_inactive? Doesn't seem to be used. + Q_inactive = updated_qr$Q2 + R = updated_qr$R + + if (verbose) { + cat(sprintf("\n%i. Adding variable %i, |A|=%i...",k,A[r],r)) + } + + # Step counter + k = k+1 + } + + # Trim + action = action[Seq(1,k-1)] + df = df[Seq(1,k-1),drop=FALSE] + beta = beta[,Seq(1,k-1),drop=FALSE] + Gamma = Gamma[Seq(1,gi),,drop=FALSE] + nconstraint = nconstraint[Seq(1,k-1)] + vreg = vreg[Seq(1,k-1),,drop=FALSE] + + # If we reached the maximum number of steps + if (k>maxsteps) { + if (verbose) { + cat(sprintf("\nReached the maximum number of steps (%i),",maxsteps)) + cat(" skipping the rest of the path.") + } + completepath = FALSE + bls = NULL + } + + # Otherwise, note that we completed the path + else { + completepath = TRUE + + # Record the least squares solution. Note that + # we have already computed this + bls = rep(0,p) + if(length(keepLs)>0) bls[A] = keepLs + } + + if (verbose) cat("\n") + + # Adjust for the effect of centering and scaling + if (intercept) df = df+1 + if (normalize) beta = beta/sx + if (normalize && completepath) bls = bls/sx + + # Assign column names + colnames(beta) = as.character(Seq(1,k-1)) + + out = list(action=action,sign=signs,df=df,beta=beta, + completepath=completepath,bls=bls, + Gamma=Gamma,nconstraint=nconstraint,vreg=vreg,x=x,y=y,bx=bx,by=by,sx=sx, + intercept=intercept,normalize=normalize,call=this.call) + class(out) = "fs" + return(out) +} + +############################## + +# Coefficient function for fs + +coef.fs <- function(object, s, ...) { + if (object$completepath) { + k = length(object$action)+1 + beta = cbind(object$beta,object$bls) + } else { + k = length(object$action) + beta = object$beta + } + + if (min(s)<0 || max(s)>k) stop(sprintf("s must be between 0 and %i",k)) + knots = 1:k + dec = FALSE + return(coef.interpolate(beta,s,knots,dec)) +} + +# Prediction function for fs + +predict.fs <- function(object, newx, s, ...) { + beta = coef.fs(object,s) + if (missing(newx)) newx = scale(object$x,FALSE,1/object$sx) + else newx = scale(newx,object$bx,FALSE) + return(newx %*% beta + object$by) +} + +############################## + +# FS inference function + +fsInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","aic"), + gridrange=c(-100,100), bits=NULL, mult=2, ntimes=2, verbose=FALSE) { + + this.call = match.call() + type = match.arg(type) + checkargs.misc(sigma=sigma,alpha=alpha,k=k, + gridrange=gridrange,mult=mult,ntimes=ntimes) + if (class(obj) != "fs") stop("obj must be an object of class fs") + if (is.null(k) && type=="active") k = length(obj$action) + if (is.null(k) && type=="all") stop("k must be specified when type = all") + if (!is.null(bits) && !requireNamespace("Rmpfr",quietly=TRUE)) { + warning("Package Rmpfr is not installed, reverting to standard precision") + bits = NULL + } + + k = min(k,length(obj$action)) # Round to last step + x = obj$x + y = obj$y + p = ncol(x) + n = nrow(x) + G = obj$Gamma + nconstraint = obj$nconstraint + sx = obj$sx + + if (is.null(sigma)) { + if (n >= 2*p) { + oo = obj$intercept + sigma = sqrt(sum(lsfit(x,y,intercept=oo)$res^2)/(n-p-oo)) + } + else { + sigma = sd(y) + warning(paste(sprintf("p > n/2, and sd(y) = %0.3f used as an estimate of sigma;",sigma), + "you may want to use the estimateSigma function")) + } + } + + khat = NULL + + if (type == "active") { + pv = vlo = vup = numeric(k) + vmat = matrix(0,k,n) + ci = tailarea = matrix(0,k,2) + vreg = obj$vreg[1:k,,drop=FALSE] + sign = obj$sign[1:k] + vars = obj$action[1:k] + + for (j in 1:k) { + if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j])) + + Gj = G[1:nconstraint[j],] + uj = rep(0,nconstraint[j]) + vj = vreg[j,] + mj = sqrt(sum(vj^2)) + vj = vj / mj # Standardize (divide by norm of vj) + a = poly.pval(y,Gj,uj,vj,sigma,bits) + pv[j] = a$pv + sxj = sx[vars[j]] + vlo[j] = a$vlo * mj / sxj # Unstandardize (mult by norm of vj / sxj) + vup[j] = a$vup * mj / sxj # Unstandardize (mult by norm of vj / sxj) + vmat[j,] = vj * mj / sxj # Unstandardize (mult by norm of vj / sxj) + + a = poly.int(y,Gj,uj,vj,sigma,alpha,gridrange=gridrange, + flip=(sign[j]==-1),bits=bits) + ci[j,] = a$int * mj / sxj # Unstandardize (mult by norm of vj / sxj) + tailarea[j,] = a$tailarea + } + + khat = forwardStop(pv,alpha) + } + + else { + if (type == "aic") { + out = aicStop(x,y,obj$action[1:k],obj$df[1:k],sigma,mult,ntimes) + khat = out$khat + m = out$stopped * ntimes + G = rbind(out$G,G[1:nconstraint[khat+m],]) # Take ntimes more steps past khat + u = c(out$u,rep(0,nconstraint[khat+m])) # (if we need to) + kk = khat + } + else { + G = G[1:nconstraint[k],] + u = rep(0,nconstraint[k]) + kk = k + } + + pv = vlo = vup = numeric(kk) + vmat = matrix(0,kk,n) + ci = tailarea = matrix(0,kk,2) + sign = numeric(kk) + vars = obj$action[1:kk] + xa = x[,vars] + M = pinv(crossprod(xa)) %*% t(xa) + + for (j in 1:kk) { + if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j])) + + vj = M[j,] + mj = sqrt(sum(vj^2)) + vj = vj / mj # Standardize (divide by norm of vj) + sign[j] = sign(sum(vj*y)) + vj = sign[j] * vj + Gj = rbind(G,vj) + uj = c(u,0) + + a = poly.pval(y,Gj,uj,vj,sigma,bits) + pv[j] = a$pv + sxj = sx[vars[j]] + vlo[j] = a$vlo * mj / sxj # Unstandardize (mult by norm of vj / sxj) + vup[j] = a$vup * mj / sxj # Unstandardize (mult by norm of vj / sxj) + vmat[j,] = vj * mj / sxj # Unstandardize (mult by norm of vj / sxj) + + a = poly.int(y,Gj,uj,vj,sigma,alpha,gridrange=gridrange, + flip=(sign[j]==-1),bits=bits) + ci[j,] = a$int * mj / sxj # Unstandardize (mult by norm of vj / sxj) + tailarea[j,] = a$tailarea + } + } + + # JT: why do we output vup, vlo? Are they used somewhere else? + + out = list(type=type,k=k,khat=khat,pv=pv,ci=ci, + tailarea=tailarea,vlo=vlo,vup=vup,vmat=vmat,y=y, + vars=vars,sign=sign,sigma=sigma,alpha=alpha, + call=this.call) + class(out) = "fsInf" + return(out) +} + +############################## + + +############################## + +print.fs <- function(x, ...) { + cat("\nCall:\n") + dput(x$call) + + cat("\nSequence of FS moves:\n") + nsteps = length(x$action) + tab = cbind(1:nsteps,x$action,x$sign) + colnames(tab) = c("Step","Var","Sign") + rownames(tab) = rep("",nrow(tab)) + print(tab) + invisible() +} + +print.fsInf <- function(x, tailarea=TRUE, ...) { + cat("\nCall:\n") + dput(x$call) + + cat(sprintf("\nStandard deviation of noise (specified or estimated) sigma = %0.3f\n", + x$sigma)) + + if (x$type == "active") { + cat(sprintf("\nSequential testing results with alpha = %0.3f\n",x$alpha)) + tab = cbind(1:length(x$pv),x$vars, + round(x$sign*x$vmat%*%x$y,3), + round(x$sign*x$vmat%*%x$y/(x$sigma*sqrt(rowSums(x$vmat^2))),3), + round(x$pv,3),round(x$ci,3)) + colnames(tab) = c("Step", "Var", "Coef", "Z-score", "P-value", + "LowConfPt", "UpConfPt") + if (tailarea) { + tab = cbind(tab,round(x$tailarea,3)) + colnames(tab)[(ncol(tab)-1):ncol(tab)] = c("LowTailArea","UpTailArea") + } + rownames(tab) = rep("",nrow(tab)) + print(tab) + + cat(sprintf("\nEstimated stopping point from ForwardStop rule = %i\n",x$khat)) + } + + else if (x$type == "all") { + cat(sprintf("\nTesting results at step = %i, with alpha = %0.3f\n",x$k,x$alpha)) + tab = cbind(x$vars, + round(x$sign*x$vmat%*%x$y,3), + round(x$sign*x$vmat%*%x$y/(x$sigma*sqrt(rowSums(x$vmat^2))),3), + round(x$pv,3),round(x$ci,3)) + colnames(tab) = c("Var", "Coef", "Z-score", "P-value", "LowConfPt", "UpConfPt") + if (tailarea) { + tab = cbind(tab,round(x$tailarea,3)) + colnames(tab)[(ncol(tab)-1):ncol(tab)] = c("LowTailArea","UpTailArea") + } + rownames(tab) = rep("",nrow(tab)) + print(tab) + } + + else if (x$type == "aic") { + cat(sprintf("\nTesting results at step = %i, with alpha = %0.3f\n",x$khat,x$alpha)) + tab = cbind(x$vars, + round(x$sign*x$vmat%*%x$y,3), + round(x$sign*x$vmat%*%x$y/(x$sigma*sqrt(rowSums(x$vmat^2))),3), + round(x$pv,3),round(x$ci,3)) + colnames(tab) = c("Var", "Coef", "Z-score", "P-value", "LowConfPt", "UpConfPt") + if (tailarea) { + tab = cbind(tab,round(x$tailarea,3)) + colnames(tab)[(ncol(tab)-1):ncol(tab)] = c("LowTailArea","UpTailArea") + } + rownames(tab) = rep("",nrow(tab)) + print(tab) + + cat(sprintf("\nEstimated stopping point from AIC rule = %i\n",x$khat)) + } + + invisible() +} + + +plot.fs <- function(x, breaks=TRUE, omit.zeros=TRUE, var.labels=TRUE, ...) { + if (x$completepath) { + k = length(x$action)+1 + beta = cbind(x$beta,x$bls) + } else { + k = length(x$action) + beta = x$beta + } + p = nrow(beta) + + xx = 1:k + xlab = "Step" + + if (omit.zeros) { + good.inds = matrix(FALSE,p,k) + good.inds[beta!=0] = TRUE + changes = t(apply(beta,1,diff))!=0 + good.inds[cbind(changes,rep(F,p))] = TRUE + good.inds[cbind(rep(F,p),changes)] = TRUE + beta[!good.inds] = NA + } + + plot(c(),c(),xlim=range(xx,na.rm=T),ylim=range(beta,na.rm=T), + xlab=xlab,ylab="Coefficients",main="Forward stepwise path",...) + abline(h=0,lwd=2) + matplot(xx,t(beta),type="l",lty=1,add=TRUE) + if (breaks) abline(v=xx,lty=2) + if (var.labels) axis(4,at=beta[,k],labels=1:p,cex=0.8,adj=0) + invisible() +} + diff --git a/selectiveInference-currentCRAN/R/funs.groupfs.R b/selectiveInference-currentCRAN/R/funs.groupfs.R new file mode 100644 index 0000000..b2c0447 --- /dev/null +++ b/selectiveInference-currentCRAN/R/funs.groupfs.R @@ -0,0 +1,794 @@ +#' Select a model with forward stepwise. +#' +#' This function implements forward selection of linear models almost identically to \code{\link[stats]{step}} with \code{direction = "forward"}. The reason this is a separate function from \code{\link{fs}} is that groups of variables (e.g. dummies encoding levels of a categorical variable) must be handled differently in the selective inference framework. +#' +#' @param x Matrix of predictors (n by p). +#' @param y Vector of outcomes (length n). +#' @param index Group membership indicator of length p. Check that \code{sort(unique(index)) = 1:G} where \code{G} is the number of distinct groups. +#' @param maxsteps Maximum number of steps for forward stepwise. +#' @param sigma Estimate of error standard deviation for use in AIC criterion. This determines the relative scale between RSS and the degrees of freedom penalty. Default is NULL corresponding to unknown sigma. When NULL, \code{link{groupfsInf}} performs truncated F inference instead of truncated \eqn{\chi}. See \code{\link[stats]{extractAIC}} for details on the AIC criterion. +#' @param k Multiplier of model size penalty, the default is \code{k = 2} for AIC. Use \code{k = log(n)} for BIC, or \code{k = 2log(p)} for RIC (best for high dimensions, when \eqn{p > n}). If \eqn{G < p} then RIC may be too restrictive and it would be better to use \code{log(G) < k < 2log(p)}. +#' @param intercept Should an intercept be included in the model? Default is TRUE. Does not count as a step. +#' @param center Should the columns of the design matrix be centered? Default is TRUE. +#' @param normalize Should the design matrix be normalized? Default is TRUE. +#' @param aicstop Early stopping if AIC increases. Default is 0 corresponding to no early stopping. Positive integer values specify the number of times the AIC is allowed to increase in a row, e.g. with \code{aicstop = 2} the algorithm will stop if the AIC criterion increases for 2 steps in a row. The default of \code{\link[stats]{step}} corresponds to \code{aicstop = 1}. +#' @param verbose Print out progress along the way? Default is FALSE. +#' @return An object of class "groupfs" containing information about the sequence of models in the forward stepwise algorithm. Call the function \code{\link{groupfsInf}} on this object to compute selective p-values. +#' @examples +#' x = matrix(rnorm(20*40), nrow=20) +#' index = sort(rep(1:20, 2)) +#' y = rnorm(20) + 2 * x[,1] - x[,4] +#' fit = groupfs(x, y, index, maxsteps = 5) +#' out = groupfsInf(fit) +#' out +#' @seealso \code{\link{groupfsInf}}, \code{\link{factorDesign}}. +groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE, center = TRUE, normalize = TRUE, aicstop = 0, verbose = FALSE) { + + if (missing(index)) stop("Missing argument: index.") + p <- ncol(x) + n <- nrow(x) + + # Group labels + labels <- unique(index) + G <- length(labels) + inactive <- labels + active <- c() + + if (missing(maxsteps) || maxsteps >= min(n, G)) maxsteps <- min(n-1, G) + checkargs.xy(x=x, y=y) + checkargs.groupfs(x, index, maxsteps) + if (maxsteps > G) stop("maxsteps is larger than number of groups") + gsizes <- sort(rle(sort(index))$lengths, decreasing = TRUE) + if (sum(gsizes[1:maxsteps]) >= nrow(x)) { + maxsteps <- max(which(cumsum(gsizes) < nrow(x))) + warning(paste("If the largest groups are included the model will be saturated/overdetermined. To prevent this maxsteps has been changed to", maxsteps)) + } + + # Initialize copies of data for loop + by <- mean(y) + y.update <- y + if (intercept) y.update <- y - by + y.last <- y.update + + # Center and scale design matrix + xscaled <- scaleGroups(x, index, center, normalize) + xm <- xscaled$xm + xs <- xscaled$xs + x.update <- xscaled$x + + x.begin <- x.update + y.begin <- y.update + stopped <- FALSE + # Store all projections computed along the path + terms = projections = maxprojs = aicpens = maxpens = cumprojs = vector("list", maxsteps) + + # Store other information from each step + path.info <- data.frame(imax=integer(maxsteps), df=integer(maxsteps), AIC=numeric(maxsteps), RSS=numeric(maxsteps), RSSdrop=numeric(maxsteps), chisq=numeric(maxsteps)) + + modelrank <- as.numeric(intercept) + if (is.null(sigma)) { + modelrank <- modelrank + 1 + aic.begin <- aic.last <- n*(log(2*pi) + log(mean(y.update^2))) + k * (n + modelrank) + } else { + aic.begin <- aic.last <- sum(y.update^2)/sigma^2 - n + k * modelrank + } + if (verbose) print(paste0("Start: AIC=", round(aic.begin, 3)), quote = FALSE) + + # Begin main loop + for (step in 1:maxsteps) { + + added <- add1.groupfs(x.update, y.update, index, labels, inactive, k, sigma) + + # Group to be added + imax <- added$imax + inactive <- setdiff(inactive, imax) + active <- union(active, imax) + inactive.inds <- which(!index %in% active) + + # Rank of group + modelrank <- modelrank + added$df + + # Stop without adding if model has become saturated + if (modelrank >= n) { + stop("Saturated model. Abandon ship!") + } + + # Regress added group out of y and inactive x + P.imax <- added$maxproj %*% t(added$maxproj) + P.imax <- diag(rep(1, n)) - P.imax + y.update <- P.imax %*% y.update + x.update[, inactive.inds] <- P.imax %*% x.update[, inactive.inds] + + # Compute AIC + if (is.null(sigma)) { + added$AIC <- n * log(added$maxterm/n) - k * added$df + n*log(2*pi) + k * (n + modelrank) + } else { + added$AIC <- sum(y.update^2)/sigma^2 - n + k * modelrank + } + + projections[[step]] <- added$projections + maxprojs[[step]] <- added$maxproj + aicpens[[step]] <- added$aicpens + maxpens[[step]] <- added$maxpen + if (step == 1) cumprojs[[step]] <- P.imax + if (step > 1) cumprojs[[step]] <- P.imax %*% cumprojs[[step-1]] + terms[[step]] <- added$terms + + # Compute RSS for unadjusted chisq p-values + added$RSS <- sum(y.update^2) + scale.chisq <- 1 + + added$RSSdrop <- sum((y.last - y.update)^2) + added$chisq <- pchisq(added$RSSdrop/scale.chisq, lower.tail=FALSE, df = added$df) + y.last <- y.update + + # Projections are stored separately + step.info <- data.frame(added[-c(3:(length(added)-4))]) + path.info[step, ] <- step.info + + if (verbose) print(round(step.info, 3)) + + if (aicstop > 0 && step < maxsteps && step >= aicstop && aic.last < added$AIC) { + if (all(diff(c(aic.begin, path.info$AIC)[(step+1-aicstop):(step+1)]) > 0)) { + + if (is.null(sigma)) { + added$AIC <- n * log(added$maxterm/n) - k * added$df + n + n*log(2*pi) + k * modelrank + } else { + added$AIC <- sum(y.update^2)/sigma^2 - n + k * modelrank + } + + path.info <- path.info[1:step, ] + projections[(step+1):maxsteps] <- NULL + maxprojs[(step+1):maxsteps] <- NULL + aicpens[(step+1):maxsteps] <- NULL + maxpens[(step+1):maxsteps] <- NULL + cumprojs[(step+1):maxsteps] <- NULL + terms[(step+1):maxsteps] <- NULL + maxsteps <- step + stopped <- TRUE + break + } + } + aic.last <- added$AIC + } + + # Is there a better way of doing this? + # Use some projections already computed? + beta <- coef(lm(y.begin ~ x.begin[,index %in% path.info$imax]-1)) + names(beta) <- index[index %in% path.info$imax] + + # Create output object + value <- list(action = path.info$imax, L = path.info$L, AIC = path.info$AIC, projections = projections, maxprojs = maxprojs, aicpens = aicpens, maxpens = maxpens, cumprojs = cumprojs, log = path.info, index = index, y = y.begin, x = x.begin, coefficients = beta, bx = xm, by = by, sx = xs, sigma = sigma, intercept = intercept, call = match.call(), terms = terms) + + class(value) <- "groupfs" + attr(value, "center") <- center + attr(value, "normalize") <- normalize + attr(value, "labels") <- labels + attr(value, "maxsteps") <- maxsteps + attr(value, "sigma") <- sigma + attr(value, "k") <- k + attr(value, "aicstop") <- aicstop + attr(value, "stopped") <- stopped + if (is.null(attr(x, "varnames"))) { + attr(value, "varnames") <- colnames(x) + } else { + attr(value, "varnames") <- attr(x, "varnames") + } + return(value) +} + +#' Add one group to the model in \code{groupfs}. +#' +#' For internal use by \code{\link{groupfs}}. +#' +#' @param xr Design matrix at current step. +#' @param yr Response vector residual at current step. +#' @param index Group membership indicator of length p. +#' @param labels The unique elements of \code{index}. +#' @param inactive Labels of inactive groups. +#' @param k Multiplier of model size penalty, use \code{k = 2} for AIC, \code{k = log(n)} for BIC, or \code{k = log(p)} for RIC. +#' @param sigma Estimate of error standard deviation for use in AIC criterion. This determines the relative scale between RSS and the degrees of freedom penalty. See \code{\link{extractAIC}} for details. +#' @return Index \code{imax} of added group, value \code{L} of maximized negative AIC, lists of projection matrices defining quadratic model selection event. +add1.groupfs <- function(xr, yr, index, labels, inactive, k, sigma = NULL) { + + # Use characters to avoid issues where + # list() populates NULL lists in the positions + # of the active variables + ### Question for later: does this slow down lapply? + keys = as.character(inactive) + n <- nrow(xr) + + # Compute sums of squares to determine which group is added + # penalized by rank of group if k > 0 + projections = aicpens = terms = vector("list", length(keys)) + names(projections) = names(terms) = names(aicpens) = keys + for (key in keys) { + inds <- which(index == key) + xi <- xr[,inds] + ui <- svdu_thresh(xi) + dfi <- ncol(ui) + projections[[key]] <- ui + uy <- t(ui) %*% yr + if (is.null(sigma)) { + aicpens[[key]] <- exp(k*dfi/n) + terms[[key]] <- (sum(yr^2) - sum(uy^2)) * aicpens[[key]] + } else { + aicpens[[key]] <- sigma^2 * k * dfi + terms[[key]] <- (sum(yr^2) - sum(uy^2)) + aicpens[[key]] + } + } + + # Maximizer = group to be added + terms.optind <- which.min(terms) + imax <- inactive[terms.optind] + optkey <- which(keys == imax) + maxproj <- projections[[optkey]] + maxpen <- aicpens[[optkey]] + maxterm <- terms[[optkey]] + projections[[optkey]] <- NULL + aicpens[[optkey]] <- NULL + + return(list(imax=imax, df = ncol(maxproj), projections = projections, maxproj = maxproj, aicpens = aicpens, maxpen = maxpen, maxterm = maxterm, terms = terms)) +} + +# ----------------------------------------------------------- + +#' Compute selective p-values for a model fitted by \code{groupfs}. +#' +#' Computes p-values for each group of variables in a model fitted by \code{\link{groupfs}}. These p-values adjust for selection by truncating the usual \eqn{\chi^2} statistics to the regions implied by the model selection event. If the \code{sigma} to \code{\link{groupfs}} was NULL then groupfsInf uses truncated \eqn{F} statistics instead of truncated \eqn{\chi}. The \code{sigma} argument to groupfsInf allows users to override and use \eqn{\chi}, but this is not recommended unless \eqn{\sigma} can be estimated well (i.e. \eqn{n > p}). +#' +#' @param obj Object returned by \code{\link{groupfs}} function +#' @param sigma Estimate of error standard deviation. Default is NULL and in this case groupfsInf uses the value of sigma specified to \code{\link{groupfs}}. +#' @param verbose Print out progress along the way? Default is TRUE. +#' @return An object of class "groupfsInf" containing selective p-values for the fitted model \code{obj}. For comparison with \code{\link{fsInf}}, note that the option \code{type = "active"} is not available. +#' +#' \describe{ +#' \item{vars}{Labels of the active groups in the order they were included.} +#' \item{pv}{Selective p-values computed from appropriate truncated distributions.} +#' \item{sigma}{Estimate of error variance used in computing p-values.} +#' \item{TC or TF}{Observed value of truncated \eqn{\chi} or \eqn{F}.} +#' \item{df}{Rank of group of variables when it was added to the model.} +#' \item{support}{List of intervals defining the truncation region of the corresponding statistic.} +#' } +groupfsInf <- function(obj, sigma = NULL, verbose = TRUE) { + + if (!is.null(obj$cvobj) && attr(obj, "stopped")) { + stop("Cross-validation and early stopping cannot be used simultaneously.") + # This shouldn't happen in the first place! + # (it wouldn't anyway unless someone tries to trick it) + } + + n <- nrow(obj$x) + p <- ncol(obj$x) + maxsteps <- attr(obj, "maxsteps") + k <- attr(obj, "k") + index <- obj$index + x <- obj$x + y <- obj$y + Ep <- sum(index %in% obj$action) + + pvals = dfs = dfs2 = Tstats = numeric(maxsteps) + supports <- list() + + if (!is.null(sigma)) { + type <- "TC" + if (!is.null(obj$sigma)) { + cat(paste("Using specified value", sigma, "for sigma in place of the value", obj$sigma, "used by groupfs()\n")) + } + } else { + if (is.null(obj$sigma)) { + type <- "TF" + Pf <- svdu_thresh(obj$x[,which(obj$index %in% obj$action), drop = FALSE]) + dffull <- ncol(Pf) + df2 <- n - dffull - obj$intercept - 1 + Pfull <- Pf %*% t(Pf) + } else { + type <- "TC" + sigma <- obj$sigma + } + } + + # Compute p-value for each active group + for (j in 1:maxsteps) { + i <- obj$action[j] + if (verbose) { + string <- paste0("Step ", j, "/", attr(obj, "maxsteps"), ": computing P-value for group ", i) + if (!is.null(obj$cvobj)) string <- paste0(string, ", including constraints from cross-validation") + if (attr(obj, "stopped")) string <- paste0(string, ", including constraints from AICstop") + cat(paste(string, "\n")) + } + + if (type == "TC") { + # Form projection onto active set minus i + # and project x_i orthogonally + x_i <- obj$x[,which(obj$index == i), drop = FALSE] + if (length(obj$action) > 1) { + minus_i <- setdiff(obj$action, i) + x_minus_i <- svdu_thresh(obj$x[,which(obj$index %in% minus_i), drop = FALSE]) + x_i <- x_i - x_minus_i %*% t(x_minus_i) %*% x_i + } + + # Project y onto what remains of x_i + Ugtilde <- svdu_thresh(x_i) + R <- t(Ugtilde) %*% obj$y + TC <- sqrt(sum(R^2)) + eta <- Ugtilde %*% R / TC + Z <- obj$y - eta * TC + dfi <- ncol(Ugtilde) + Tstats[j] <- TC + dfs[j] <- dfi + + ydecomp <- list(Z=Z, eta=eta) + + } else { + + if (length(obj$action) > 1) { + minus_i <- setdiff(obj$action, i) + Psub <- svdu_thresh(obj$x[,which(obj$index %in% minus_i), drop = FALSE]) + Z <- Psub %*% t(Psub) %*% obj$y + df1 <- dffull - ncol(Psub) + } else { + Z <- rep(0, n) + df1 <- dffull + obj$intercept + 1 + } + + C <- df1/df2 + R1 <- obj$y - Z + R2 <- obj$y - Pfull %*% obj$y + R1sq <- sum(R1^2) + R2sq <- sum(R2^2) + R <- sqrt(R1sq) + delta <- R1-R2 + Vdelta <- delta/sqrt(sum(delta^2)) + V2 <- R2/sqrt(R2sq) + TF <- (R1sq-R2sq)/(C*R2sq) + Tstats[j] <- TF + dfs[j] <- df1 + + ydecomp <- list(R=R, Z=Z, Vd=Vdelta, V2=V2, C=C) + + } + + intervallist <- truncationRegion(obj, ydecomp, type) + + # Additional constraints from cross-validation? + if (!is.null(obj$cvobj)) { + intervallist <- c(intervallist, do.call(c, + lapply(obj$cvobj, function(cvf) { + if (type == "TC") { + ydecomp <- list(R=R[-cvf$fold], eta=eta[-cvf$fold], Z=Z[-cvf$fold]) + } else { + ydecomp <- list(R=R, Z=Z[-cvf$fold], Vd=Vdelta[-cvf$fold], V2=V2[-cvf$fold], C=C) # C correct? + } + truncationRegion(cvf, ydecomp, type) + }))) + intervallist <- c(intervallist, + lapply(obj$cvquad, function(cvquad) { + if (type == "TC") { + etacvquad <- t(eta) %*% cvquad + A <- etacvquad %*% eta + B <- 2 * etacvquad %*% Z + C <- t(Z) %*% cvquad %*% Z + quadratic_roots(A, B, C, tol = 1e-15) + } else { + + zcvquad <- t(Z) %*% cvquad + vdcvquad <- t(Vdelta) %*% cvquad + v2cvquad <- t(V2) %*% cvquad + x0 <- zcvquad %*% Z + x1 <- 2*R*zcvquad %*% Vdelta + x2 <- 2*R*zcvquad %*% V2 + x12 <- 2*R^2*vdcvquad %*% V2 + x11 <- R^2*vdcvquad %*% Vdelta + x22 <- R^2*v2cvquad %*% V2 + TF_roots(R, C, coeffs = list(x0=x0, x1=x1, x2=x2, x12=x12, x11=x11, x22=x22)) + } + })) + } + + # Additional constraints from AIC stopping + if (attr(obj, "stopped")) { + aicintervals <- vector("list", maxsteps) + aicstop <- attr(obj, "aicstop") + if (type == "TC") { + pen0 <- k * obj$intercept + aic.begin <- aic.last <- sum(obj$y^2)/sigma^2 - n + k * obj$intercept + } else { + pen0 <- exp(k * (1+obj$intercept)/n) + aic.begin <- n*(log(2*pi) + log(mean(obj$y^2))) + k * (1 + n + obj$intercept) + } + AICs <- c(aic.begin, obj$AIC) + + ulist <- c(list(matrix(0, n, 1)), obj$maxprojs) + penlist <- c(pen0, obj$maxpens) + zlist <- vector("list", maxsteps+1) + zlist[[1]] <- zlist[[2]] <- Z + if (type == "TC") { + etalist <- vector("list", maxsteps+1) + etalist[[1]] <- etalist[[2]] <- eta + } else { + vdlist <- v2list <- vector("list", maxsteps+1) + vdlist[[1]] <- vdlist[[2]] <- Vdelta + v2list[[1]] <- v2list[[2]] <- V2 + } + if (maxsteps > 1) { + for (step in 1:(maxsteps-1)) { + cproj <- obj$cumprojs[[step]] + zlist[[step+2]] <- cproj %*% Z + if (type == "TC") { + etalist[[step+2]] <- cproj %*% eta + } else { + vdlist[[step+2]] <- cproj %*% Vdelta + v2list[[step+2]] <- cproj %*% V2 + } + } + } + + for (step in 1:maxsteps) { + # Compare AIC at s+1 to AIC at s + # roots() functions assume g indexes smaller AIC + # this is step+1 until the last step + peng <- penlist[[step+1]] + Ug <- ulist[[step+1]] + Uh <- ulist[[step]] + Zg <- zlist[[step+1]] + Zh <- zlist[[step]] + + if (type == "TC") { + penh <- 0 + etag <- etalist[[step+1]] + etah <- etalist[[step]] + coeffs <- quadratic_coefficients(obj$sigma, Ug, Uh, peng, penh, etag, etah, Zg, Zh) + + if (AICs[step] < AICs[step+1]) { + coeffs <- lapply(coeffs, function(coeff) -coeff) + } + + intstep <- quadratic_roots(coeffs$A, coeffs$B, coeffs$C, tol = 1e-15) + + } else { + penh <- 1 + Vdg <- vdlist[[step+1]] + Vdh <- vdlist[[step]] + V2g <- v2list[[step+1]] + V2h <- v2list[[step]] + coeffs <- TF_coefficients(R, Ug, Uh, peng, penh, Zg, Zh, Vdg, Vdh, V2g, V2h) + + if (AICs[step] < AICs[step+1]) { + coeffs <- lapply(coeffs, function(coeff) -coeff) + } + + intstep <- TF_roots(R, C, coeffs) + } + + aicintervals[[step]] <- intstep + } + intervallist <- c(intervallist, aicintervals) + } + + # Compute intersection: + region <- do.call(interval_union, intervallist) + region <- interval_union(region, Intervals(c(-Inf,0))) + E <- interval_complement(region, check_valid = FALSE) + + if (length(E) == 0) { + stop(paste("Empty support at step", j)) + } + supports[[j]] <- E + + # E is now potentially a union of intervals + if (type == "TC") { + pvals[j] <- TC_surv(TC, sigma, dfi, E) + } else { + # write TF_surv function first + pvals[j] <- TF_surv(TF, df1, df2, E) + } + + } + + if (any(is.nan(pvals))) { + nanp <- which(is.nan(pvals)) + pvals[nanp] <- 0 + warning(paste0("P-value NaNs of the form 0/0 converted to 0 for group(s) ", paste(obj$action[nanp], collapse=","), ". This typically occurs for numerical reasons in the presence of a large signal-to-noise ratio.")) + } + + names(pvals) <- obj$action + out <- list(vars = obj$action, pv=pvals) + if (type == "TC") { + out$TC <- Tstats + out$sigma <- sigma + } else { + out$TF <- Tstats + out$df2 <- df2 + } + out$df <- dfs + out$support <- supports + class(out) <- "groupfsInf" + if (!is.null(attr(obj, "varnames"))) { + attr(out, "varnames") <- attr(obj, "varnames") + } + return(out) +} + +# ----------------------------------------------------------- + +TC_surv <- function(TC, sigma, df, E) { + if (length(E) == 0) { + stop("Empty TC support") + } + + # Sum truncated cdf over each part of E + denom <- do.call(sum, lapply(1:nrow(E), function(v) { + tchi_interval(E[v,1], E[v,2], sigma, df) + })) + + # Sum truncated cdf from observed value to max of + # truncation region + numer <- do.call(sum, lapply(1:nrow(E), function(v) { + lower <- E[v,1] + upper <- E[v,2] + if (upper > TC) { + # Observed value is left of this interval's right endpoint + if (lower < TC) { + # Observed value is in this interval + return(tchi_interval(TC, upper, sigma, df)) + } else { + # Observed value is not in this interval + return(tchi_interval(lower, upper, sigma, df)) + } + } else { + # Observed value is right of this entire interval + return(0) + } + })) + + # Survival function + value <- numer/denom + # Force p-value to lie in the [0,1] interval + # in case of numerical issues + value <- max(0, min(1, value)) + value +} + +tchi_interval <- function(lower, upper, sigma, df) { + a <- (lower/sigma)^2 + b <- (upper/sigma)^2 + if (b == Inf) { + integral <- pchisq(a, df, lower.tail = FALSE) + } else { + integral <- pchisq(b, df) - pchisq(a, df) + } + if ((integral < .Machine$double.eps) && (b < Inf)) { + integral <- num_int_chi(a, b, df) + } + return(integral) +} + +num_int_chi <- function(a, b, df, nsamp = 10000) { + grid <- seq(from=a, to=b, length.out=nsamp) + integrand <- dchisq(grid, df) + return((b-a)*mean(integrand)) +} + +TF_surv <- function(TF, df1, df2, E) { + if (length(E) == 0) { + stop("Empty TF support") + } + + # Sum truncated cdf over each part of E + denom <- do.call(sum, lapply(1:nrow(E), function(v) { + TF_interval(E[v,1], E[v,2], df1, df2) + })) + + # Sum truncated cdf from observed value to max of + # truncation region + numer <- do.call(sum, lapply(1:nrow(E), function(v) { + lower <- E[v,1] + upper <- E[v,2] + if (upper > TF) { + # Observed value is left of this interval's right endpoint + if (lower < TF) { + # Observed value is in this interval + return(TF_interval(TF, upper, df1, df2)) + } else { + # Observed value is not in this interval + return(TF_interval(lower, upper, df1, df2)) + } + } else { + # Observed value is right of this entire interval + return(0) + } + })) + + # Survival function + value <- numer/denom + # Force p-value to lie in the [0,1] interval + # in case of numerical issues + #value <- max(0, min(1, value)) + value +} + +TF_interval <- function(lower, upper, df1, df2) { + a <- lower + b <- upper + if (b == Inf) { + integral <- pf(a, df1, df2, lower.tail = FALSE) + } else { + integral <- pf(b, df1, df2) - pf(a, df1, df2) + } + if ((integral < .Machine$double.eps) && (b < Inf)) { + integral <- num_int_F(a, b, df1, df2) + } + return(integral) +} + +num_int_F <- function(a, b, df1, df2, nsamp = 10000) { + grid <- seq(from=a, to=b, length.out=nsamp) + integrand <- df(grid, df1, df2) + return((b-a)*mean(integrand)) +} + +#' Center and scale design matrix by groups +#' +#' For internal use by \code{\link{groupfs}}. +#' +#' @param x Design matrix. +#' @param index Group membership indicator of length p. +#' @param center Center groups, default is TRUE. +#' @param normalize Scale groups by Frobenius norm, default is TRUE. +#' @return +#' \describe{ +#' \item{x}{Optionally centered/scaled design matrix.} +#' \item{xm}{Means of groups in original design matrix.} +#' \item{xs}{Frobenius norms of groups in original design matrix.} +#' } +scaleGroups <- function(x, index, center = TRUE, normalize = TRUE) { + keys <- unique(index) + xm <- rep(0, ncol(x)) + xs <- rep(1, ncol(x)) + + for (j in keys) { + inds <- which(index == j) + if (center) { + xmj <- mean(x[, inds]) + xm[inds] <- xmj + x[, inds] <- x[, inds] - xmj + } + normsq <- sum(x[, inds]^2) + xsj <- sqrt(normsq) + xs[inds] <- xsj + if (xsj > 0) { + if (normalize) x[, inds] <- x[, inds] / xsj + } else { + stop(paste("Design matrix contains identically zero group of variables:", j)) + } + } + return(list(x=x, xm=xm, xs=xs)) +} + +#' Expand a data frame with factors to form a design matrix with the full binary encoding of each factor. +#' +#' When using \code{\link{groupfs}} with factor variables call this function first to create a design matrix. +#' +#' @param df Data frame containing some columns which are \code{factors}. +#' @return List containing +#' \describe{ +#' \item{x}{Design matrix, the first columns contain any numeric variables from the original date frame.} +#' \item{index}{Group membership indicator for expanded matrix.} +#' } +#' @examples +#' \dontrun{ +#' fd = factorDesign(warpbreaks) +#' y = rnorm(nrow(fd$x)) +#' fit = groupfs(fd$x, y, fd$index, maxsteps=2, intercept=F) +#' pvals = groupfsInf(fit) +#' } +factorDesign <- function(df) { + factor.inds <- sapply(df[1,], is.factor) + factor.labels <- which(factor.inds) + nfacs <- sum(factor.inds) + nlevs <- sapply(df[1,factor.inds], function(fac) nlevels(fac)) + totnlevs <- sum(nlevs) + num.num = indcounter = ncol(df) - nfacs + x <- matrix(nrow=nrow(df), ncol = totnlevs + num.num) + colnames(x) <- 1:ncol(x) + index <- integer(ncol(x)) + varnames <- character(ncol(df)) + if (num.num > 0) { + x[,1:num.num] <- df[, !factor.inds] + varnames[1:num.num] = colnames(x)[1:num.num] <- colnames(df)[1:num.num] + index[1:num.num] <- 1:num.num + indcounter <- indcounter + num.num - 1 + } + for (j in 1:nfacs) { + submat <- model.matrix(~ df[, factor.labels[j]] - 1) + indcounter <- indcounter+1 + submatinds <- indcounter:(indcounter+nlevs[j]-1) + indcounter <- indcounter + nlevs[j] - 1 + colnames(x)[submatinds] <- paste0(colnames(df)[num.num + j], ":", 1:nlevs[j]) + varnames[num.num + j] <- colnames(df)[num.num + j] + x[,submatinds] <- submat + index[submatinds] <- num.num + j + } + attr(x, "varnames") <- varnames + return(list(x = x, index = index)) +} + +svdu_thresh <- function(x) { + svdx <- svd(x) + inds <- svdx$d > svdx$d[1] * sqrt(.Machine$double.eps) + return(svdx$u[, inds, drop = FALSE]) +} + +flatten <- function(L) { + if (is.list(L[[1]])) return(unlist(L, recursive=FALSE)) + return(L) +} + +print.groupfs <- function(x, ...) { + cat("\nSequence of added groups:\n") + nsteps = length(x$action) + action <- x$action + vnames <- attr(x, "varnames") + if (length(vnames) > 0) action <- vnames[action] + tab = data.frame(Group = action, Rank = x$log$df, RSS = round(x$log$RSS, 3), AIC = round(x$log$AIC, 3)) + rownames(tab) = 1:nsteps + print(tab) + cat("\nUse groupfsInf() to compute P-values\n") + invisible() +} + + +coef.groupfs <- function(object, ...) { + return(object$coefficients) +} + +#' @name predict.groupfs +#' @aliases predict.groupfs +#' @aliases coef.groupfs +#' +#' @title Prediction and coefficient functions for \code{\link{groupfs}}. +#' +#' Make predictions or extract coefficients from a groupfs forward stepwise object. +#' +#' @param object Object returned by a call to \code{\link{groupfs}}. +#' @param newx Matrix of x values at which the predictions are desired. If NULL, the x values from groupfs fitting are used. +#' @return A vector of predictions or a vector of coefficients. +predict.groupfs <- function(object, newx) { + beta <- coef.groupfs(object) + if (missing(newx)) { + newx = object$x + } else { + newx <- scaleGroups(newx, object$index, attr(object, "center"), attr(object, "normalize"))$x + } + return(newx[, object$index %in% object$action] %*% beta + ifelse(object$intercept, object$by, 0)) +} + +print.groupfsInf <- function(x, ...) { + if (!is.null(x$sigma)) { + isTF <- FALSE + Tstat <- x$TC + cat(sprintf("\nStandard deviation of noise (specified or estimated) sigma = %0.3f\n", x$sigma)) + } else { + isTF <- TRUE + Tstat <- x$TF + } + action <- x$vars + vnames <- attr(x, "varnames") + if (length(vnames) > 0) action <- vnames[action] + tab = data.frame(Group = action, Pvalue = round(x$pv, 3), + TC = round(Tstat, 3), + df = x$df, Size = round(unlist(lapply(lapply(x$support, size), sum)), 3), + Ints = unlist(lapply(x$support, nrow)), Min =round(unlist(lapply(x$support, min)), 3), + Max = round(unlist(lapply(x$support, max)), 3)) + rownames(tab) = 1:length(x$vars) + if (isTF) names(tab)[3] <- "TF" + print(tab) + cat("\nInts is the number of intervals in the truncated chi selection region and Size is the sum of their lengths. Min and Max are the lowest and highest endpoints of the truncation region. No confidence intervals are reported by groupfsInf.\n") + invisible() +} + +checkargs.groupfs <- function(x, index, maxsteps) { + if (length(index) != ncol(x)) stop("Length of index does not match number of columns of x") + if ((round(maxsteps) != maxsteps) || (maxsteps <= 0)) stop("maxsteps must be an integer > 0") +} diff --git a/selectiveInference-currentCRAN/R/funs.inf.R b/selectiveInference-currentCRAN/R/funs.inf.R new file mode 100644 index 0000000..423b4c3 --- /dev/null +++ b/selectiveInference-currentCRAN/R/funs.inf.R @@ -0,0 +1,299 @@ +# Main p-value function + +poly.pval <- function(y, G, u, v, sigma, bits=NULL) { + z = sum(v*y) + vv = sum(v^2) + sd = sigma*sqrt(vv) + + rho = G %*% v / vv + vec = (u - G %*% y + rho*z) / rho + vlo = suppressWarnings(max(vec[rho>0])) + vup = suppressWarnings(min(vec[rho<0])) + + pv = tnorm.surv(z,0,sd,vlo,vup,bits) + return(list(pv=pv,vlo=vlo,vup=vup)) +} + +# Main confidence interval function + +poly.int <- function(y, G, u, v, sigma, alpha, gridrange=c(-100,100), + gridpts=100, griddepth=2, flip=FALSE, bits=NULL) { + + z = sum(v*y) + vv = sum(v^2) + sd = sigma*sqrt(vv) + + rho = G %*% v / vv + vec = (u - G %*% y + rho*z) / rho + vlo = suppressWarnings(max(vec[rho>0])) + vup = suppressWarnings(min(vec[rho<0])) + + xg = seq(gridrange[1]*sd,gridrange[2]*sd,length=gridpts) + fun = function(x) { tnorm.surv(z,x,sd,vlo,vup,bits) } + + int = grid.search(xg,fun,alpha/2,1-alpha/2,gridpts,griddepth) + tailarea = c(fun(int[1]),1-fun(int[2])) + + if (flip) { + int = -int[2:1] + tailarea = tailarea[2:1] + } + + return(list(int=int,tailarea=tailarea)) +} + +############################## + +# Assuming that grid is in sorted order from smallest to largest, +# and vals are monotonically increasing function values over the +# grid, returns the grid end points such that the corresponding +# vals are approximately equal to {val1, val2} + +grid.search <- function(grid, fun, val1, val2, gridpts=100, griddepth=2) { + n = length(grid) + vals = fun(grid) + + ii = which(vals >= val1) + jj = which(vals <= val2) + if (length(ii)==0) return(c(grid[n],Inf)) # All vals < val1 + if (length(jj)==0) return(c(-Inf,grid[1])) # All vals > val2 + # RJT: the above logic is correct ... but for simplicity, instead, + # we could just return c(-Inf,Inf) + + i1 = min(ii); i2 = max(jj) + if (i1==1) lo = -Inf + else lo = grid.bsearch(grid[i1-1],grid[i1],fun,val1,gridpts, + griddepth-1,below=TRUE) + if (i2==n) hi = Inf + else hi = grid.bsearch(grid[i2],grid[i2+1],fun,val2,gridpts, + griddepth-1,below=FALSE) + return(c(lo,hi)) +} + +# Repeated bin search to find the point x in the interval [left, right] +# that satisfies f(x) approx equal to val. If below=TRUE, then we seek +# x such that the above holds and f(x) <= val; else we seek f(x) >= val. + +grid.bsearch <- function(left, right, fun, val, gridpts=100, griddepth=1, below=TRUE) { + n = gridpts + depth = 1 + + while (depth <= griddepth) { + grid = seq(left,right,length=n) + vals = fun(grid) + + if (below) { + ii = which(vals >= val) + if (length(ii)==0) return(grid[n]) # All vals < val (shouldn't happen) + if ((i0=min(ii))==1) return(grid[1]) # All vals > val (shouldn't happen) + left = grid[i0-1] + right = grid[i0] + } + + else { + ii = which(vals <= val) + if (length(ii)==0) return(grid[1]) # All vals > val (shouldn't happen) + if ((i0=max(ii))==n) return(grid[n]) # All vals < val (shouldn't happen) + left = grid[i0] + right = grid[i0+1] + } + + depth = depth+1 + } + + return(ifelse(below, left, right)) +} + +# Returns Prob(Z>z | Z in [a,b]), where mean can be a vector + +tnorm.surv <- function(z, mean, sd, a, b, bits=NULL) { + z = max(min(z,b),a) + + # Check silly boundary cases + p = numeric(length(mean)) + p[mean==-Inf] = 0 + p[mean==Inf] = 1 + + # Try the multi precision floating point calculation first + o = is.finite(mean) + mm = mean[o] + pp = mpfr.tnorm.surv(z,mm,sd,a,b,bits) + + # If there are any NAs, then settle for an approximation + oo = is.na(pp) + if (any(oo)) pp[oo] = bryc.tnorm.surv(z,mm[oo],sd,a,b) + + p[o] = pp + return(p) +} + +# Returns Prob(Z>z | Z in [a,b]), where mean cane be a vector, using +# multi precision floating point calculations thanks to the Rmpfr package + +mpfr.tnorm.surv <- function(z, mean=0, sd=1, a, b, bits=NULL) { + # If bits is not NULL, then we are supposed to be using Rmpf + # (note that this was fail if Rmpfr is not installed; but + # by the time this function is being executed, this should + # have been properly checked at a higher level; and if Rmpfr + # is not installed, bits would have been previously set to NULL) + if (!is.null(bits)) { + z = Rmpfr::mpfr((z-mean)/sd, precBits=bits) + a = Rmpfr::mpfr((a-mean)/sd, precBits=bits) + b = Rmpfr::mpfr((b-mean)/sd, precBits=bits) + return(as.numeric((Rmpfr::pnorm(b)-Rmpfr::pnorm(z))/ + (Rmpfr::pnorm(b)-Rmpfr::pnorm(a)))) + } + + # Else, just use standard floating point calculations + z = (z-mean)/sd + a = (a-mean)/sd + b = (b-mean)/sd + return((pnorm(b)-pnorm(z))/(pnorm(b)-pnorm(a))) +} + +# Returns Prob(Z>z | Z in [a,b]), where mean can be a vector, based on +# A UNIFORM APPROXIMATION TO THE RIGHT NORMAL TAIL INTEGRAL, W Bryc +# Applied Mathematics and Computation +# Volume 127, Issues 23, 15 April 2002, Pages 365--374 +# https://math.uc.edu/~brycw/preprint/z-tail/z-tail.pdf + +bryc.tnorm.surv <- function(z, mean=0, sd=1, a, b) { + z = (z-mean)/sd + a = (a-mean)/sd + b = (b-mean)/sd + n = length(mean) + + term1 = exp(z*z) + o = a > -Inf + term1[o] = ff(a[o])*exp(-(a[o]^2-z[o]^2)/2) + term2 = rep(0,n) + oo = b < Inf + term2[oo] = ff(b[oo])*exp(-(b[oo]^2-z[oo]^2)/2) + p = (ff(z)-term2)/(term1-term2) + + # Sometimes the approximation can give wacky p-values, + # outside of [0,1] .. + #p[p<0 | p>1] = NA + p = pmin(1,pmax(0,p)) + return(p) +} + +ff <- function(z) { + return((z^2+5.575192695*z+12.7743632)/ + (z^3*sqrt(2*pi)+14.38718147*z*z+31.53531977*z+2*12.77436324)) +} + +# Return Prob(Z>z | Z in [a,b]), where mean can be a vector, based on +# Riemann approximation tricks, by Max G'Sell + +gsell.tnorm.surv <- function(z, mean=0, sd=1, a, b) { + return(max.approx.frac(a/sd,b/sd,z/sd,mean/sd)) +} + + +############################## + +forwardStop <- function(pv, alpha=.10){ + if (alpha<0 || alpha>1) stop("alpha must be in [0,1]") + if (min(pv,na.rm=T)<0 || max(pv,na.rm=T)>1) stop("pvalues must be in [0,1]") + val=-(1/(1:length(pv)))*cumsum(log(1-pv)) + oo = which(val <= alpha) + if (length(oo)==0) out=0 + else out = oo[length(oo)] + return(out) +} + +############################## + +aicStop <- function(x, y, action, df, sigma, mult=2, ntimes=2) { + n = length(y) + k = length(action) + aic = numeric(k) + G = matrix(0,nrow=0,ncol=n) + u = numeric(0) + count = 0 + + for (i in 1:k) { + A = action[1:i] + aic[i] = sum(lsfit(x[,A],y,intercept=F)$res^2) + mult*sigma^2*df[i] + + j = action[i] + if (i==1) xtil = x[,j] + else xtil = lsfit(x[,action[1:(i-1)]],x[,j],intercept=F)$res + s = sign(sum(xtil*y)) + + if (i==1 || aic[i] <= aic[i-1]) { + G = rbind(G,s*xtil/sqrt(sum(xtil^2))) + u = c(u,sqrt(mult)*sigma) + count = 0 + } + + else { + G = rbind(G,-s*xtil/sqrt(sum(xtil^2))) + u = c(u,-sqrt(mult)*sigma) + count = count+1 + if (count == ntimes) break + } + } + + if (i < k) { + khat = i - ntimes + aic = aic[1:i] + } + else khat = k + + return(list(khat=khat,G=G,u=u,aic=aic,stopped=(i0])) + sd=sqrt(vv) + pv = tnorm.surv(temp,0,sd,vlo,vup,bits) + return(list(pv=pv,vlo=vlo,vup=vup,sd=sd)) +} + + + +mypoly.int.lee= + function(y,eta,vlo,vup,sd, alpha, gridrange=c(-100,100),gridpts=100, griddepth=2, flip=FALSE, bits=NULL) { + # compute sel intervals from poly lemmma, full version from Lee et al for full matrix Sigma + + temp = sum(eta*y) + + xg = seq(gridrange[1]*sd,gridrange[2]*sd,length=gridpts) + fun = function(x) { tnorm.surv(temp,x,sd,vlo,vup,bits) } + + int = grid.search(xg,fun,alpha/2,1-alpha/2,gridpts,griddepth) + tailarea = c(fun(int[1]),1-fun(int[2])) + + if (flip) { + int = -int[2:1] + tailarea = tailarea[2:1] + } + + return(list(int=int,tailarea=tailarea)) +} + + + +mydiag=function(x){ + if(length(x)==1) out=x + if(length(x)>1) out=diag(x) + return(out) + } + diff --git a/selectiveInference-currentCRAN/R/funs.lar.R b/selectiveInference-currentCRAN/R/funs.lar.R new file mode 100644 index 0000000..f01ee3d --- /dev/null +++ b/selectiveInference-currentCRAN/R/funs.lar.R @@ -0,0 +1,632 @@ +# We compute the least angle regression (LAR) path given +# a response vector y and predictor matrix x. We assume +# that x has columns in general position. + +# NOTE: the df estimates at each lambda_k can be thought of as the df +# for all solutions corresponding to lambda in (lambda_k,lambda_{k-1}), +# the open interval to the *right* of the current lambda_k. + +# NOTE: x having columns in general position implies that the +# centered x satisfies a modified version of the general position +# condition, where we replace k < min(n,p) by k < min(n-1,p) in +# the definition. This is still sufficient to imply the uniqueness +# of the lasso solution, on the centered x + +lar <- function(x, y, maxsteps=2000, minlam=0, intercept=TRUE, normalize=TRUE, + verbose=FALSE) { + + this.call = match.call() + checkargs.xy(x=x,y=y) + + # Center and scale, etc. + obj = standardize(x,y,intercept,normalize) + x = obj$x + y = obj$y + bx = obj$bx + by = obj$by + sx = obj$sx + n = nrow(x) + p = ncol(x) + + ##### + # Find the first variable to enter and its sign + uhat = t(x)%*%y + ihit = which.max(abs(uhat)) # Hitting coordinate + hit = abs(uhat[ihit]) # Critical lambda + s = Sign(uhat[ihit]) # Sign + + if (verbose) { + cat(sprintf("1. lambda=%.3f, adding variable %i, |A|=%i...", + hit,ihit,1)) + } + + # Now iteratively find the new LAR estimate, and + # the next critical lambda + + # Things to keep track of, and return at the end + buf = min(maxsteps,500) + lambda = numeric(buf) # Critical lambdas + action = numeric(buf) # Actions taken + df = numeric(buf) # Degrees of freedom + beta = matrix(0,p,buf) # LAR estimates + + lambda[1] = hit + action[1] = ihit + df[1] = 0 + beta[,1] = 0 + + # Gamma matrix! + gbuf = max(2*p*3,2000) # Space for 3 steps, at least + gi = 0 + Gamma = matrix(0,gbuf,n) + Gamma[gi+Seq(1,p-1),] = t(s*x[,ihit]+x[,-ihit]); gi = gi+p-1 + Gamma[gi+Seq(1,p-1),] = t(s*x[,ihit]-x[,-ihit]); gi = gi+p-1 + Gamma[gi+1,] = t(s*x[,ihit]); gi = gi+1 + + # nk, regression contrast, M plus + nk = mp = numeric(buf) + vreg = matrix(0,buf,n) + + nk[1] = gi + vreg[1,] = s*x[,ihit] / sum(x[,ihit]^2) + if (p > 1) { + c = t(as.numeric(Sign(t(x)%*%y)) * t(x)) + ratio = t(c[,-ihit])%*%c[,ihit]/sum(c[,ihit]^2) + ip = 1-ratio > 0 + crit = (t(c[,-ihit])%*%y - ratio*sum(c[,ihit]*y))/(1-ratio) + mp[1] = max(max(crit[ip]),0) + } + + # Other things to keep track of, but not return + r = 1 # Size of active set + A = ihit # Active set + I = Seq(1,p)[-ihit] # Inactive set + X1 = x[,ihit,drop=FALSE] # Matrix X[,A] + X2 = x[,-ihit,drop=FALSE] # Matrix X[,I] + k = 2 # What step are we at? + + # Compute a skinny QR decomposition of X1 + obj = qr(X1) + Q = qr.Q(obj,complete=TRUE) + Q1 = Q[,1,drop=FALSE]; + Q2 = Q[,-1,drop=FALSE] + R = qr.R(obj) + + # Throughout the algorithm, we will maintain + # the decomposition X1 = Q1*R. Dimenisons: + # X1: n x r + # Q1: n x r + # Q2: n x (n-r) + # R: r x r + + while (k<=maxsteps && lambda[k-1]>=minlam) { + ########## + # Check if we've reached the end of the buffer + if (k > length(lambda)) { + buf = length(lambda) + lambda = c(lambda,numeric(buf)) + action = c(action,numeric(buf)) + df = c(df,numeric(buf)) + beta = cbind(beta,matrix(0,p,buf)) + nk = c(nk,numeric(buf)) + mp = c(mp,numeric(buf)) + vreg = rbind(vreg,matrix(0,buf,n)) + } + + # Key quantities for the hitting times + a = backsolve(R,t(Q1)%*%y) + b = backsolve(R,backsolve(R,s,transpose=TRUE)) + aa = as.numeric(t(X2) %*% (y - X1 %*% a)) + bb = as.numeric(t(X2) %*% (X1 %*% b)) + + # If the inactive set is empty, nothing will hit + if (r==min(n-intercept,p)) hit = 0 + + # Otherwise find the next hitting time + else { + shits = Sign(aa) + hits = aa/(shits-bb) + + # Make sure none of the hitting times are larger + # than the current lambda + hits[hits>lambda[k-1]] = 0 + + ihit = which.max(hits) + hit = hits[ihit] + shit = shits[ihit] + } + + # Stop if the next critical point is negative + if (hit<=0) break + + # Record the critical lambda and solution + lambda[k] = hit + action[k] = I[ihit] + df[k] = r + beta[A,k] = a-hit*b + + # Gamma matrix! + if (gi + 2*p > nrow(Gamma)) Gamma = rbind(Gamma,matrix(0,2*p+gbuf,n)) + X2perp = X2 - X1 %*% backsolve(R,t(Q1)%*%X2) + c = t(t(X2perp)/(shits-bb)) + Gamma[gi+Seq(1,p-r),] = shits*t(X2perp); gi = gi+p-r + Gamma[gi+Seq(1,p-r-1),] = t(c[,ihit]-c[,-ihit]); gi = gi+p-r-1 + Gamma[gi+1,] = t(c[,ihit]); gi = gi+1 + + # nk, regression contrast, M plus + nk[k] = gi + vreg[k,] = shit*X2perp[,ihit] / sum(X2perp[,ihit]^2) + if (ncol(c) > 1) { + ratio = t(c[,-ihit])%*%c[,ihit]/sum(c[,ihit]^2) + ip = 1-ratio > 0 + crit = (t(c[,-ihit])%*%y - ratio*sum(c[,ihit]*y))/(1-ratio) + mp[k] = max(max(crit[ip]),0) + } + + # Update all of the variables + r = r+1 + A = c(A,I[ihit]) + I = I[-ihit] + s = c(s,shit) + X1 = cbind(X1,X2[,ihit]) + X2 = X2[,-ihit,drop=FALSE] + + # Update the QR decomposition + obj = updateQR(Q1,Q2,R,X1[,r]) + Q1 = obj$Q1 + Q2 = obj$Q2 + R = obj$R + + if (verbose) { + cat(sprintf("\n%i. lambda=%.3f, adding variable %i, |A|=%i...", + k,hit,A[r],r)) + } + + # Step counter + k = k+1 + } + + # Trim + lambda = lambda[Seq(1,k-1)] + action = action[Seq(1,k-1)] + df = df[Seq(1,k-1),drop=FALSE] + beta = beta[,Seq(1,k-1),drop=FALSE] + Gamma = Gamma[Seq(1,gi),,drop=FALSE] + nk = nk[Seq(1,k-1)] + mp = mp[Seq(1,k-1)] + vreg = vreg[Seq(1,k-1),,drop=FALSE] + + # If we reached the maximum number of steps + if (k>maxsteps) { + if (verbose) { + cat(sprintf("\nReached the maximum number of steps (%i),",maxsteps)) + cat(" skipping the rest of the path.") + } + completepath = FALSE + bls = NULL + } + + # If we reached the minimum lambda + else if (lambda[k-1]k) stop(sprintf("s must be between 0 and %i",k)) + knots = 1:k + dec = FALSE + } else { + if (min(s)= %0.3f",min(lambda))) + knots = lambda + dec = TRUE + } + + return(coef.interpolate(beta,s,knots,dec)) +} + +# Prediction function for lar + +predict.lar <- function(object, newx, s, mode=c("step","lambda"), ...) { + beta = coef.lar(object,s,mode) + if (missing(newx)) newx = scale(object$x,FALSE,1/object$sx) + else newx = scale(newx,object$bx,FALSE) + return(newx %*% beta + object$by) +} + +coef.lasso <- coef.lar +predict.lasso <- predict.lar + +############################## + +# Lar inference function + +larInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","aic"), + gridrange=c(-100,100), bits=NULL, mult=2, ntimes=2, verbose=FALSE) { + + this.call = match.call() + type = match.arg(type) + checkargs.misc(sigma=sigma,alpha=alpha,k=k, + gridrange=gridrange,mult=mult,ntimes=ntimes) + if (class(obj) != "lar") stop("obj must be an object of class lar") + if (is.null(k) && type=="active") k = length(obj$action) + if (is.null(k) && type=="all") stop("k must be specified when type = all") + if (!is.null(bits) && !requireNamespace("Rmpfr",quietly=TRUE)) { + warning("Package Rmpfr is not installed, reverting to standard precision") + bits = NULL + } + + k = min(k,length(obj$action)) # Round to last step + x = obj$x + y = obj$y + p = ncol(x) + n = nrow(x) + G = obj$Gamma + nk = obj$nk + sx = obj$sx + + if (is.null(sigma)) { + if (n >= 2*p) { + oo = obj$intercept + sigma = sqrt(sum(lsfit(x,y,intercept=oo)$res^2)/(n-p-oo)) + } + else { + sigma = sd(y) + warning(paste(sprintf("p > n/2, and sd(y) = %0.3f used as an estimate of sigma;",sigma), + "you may want to use the estimateSigma function")) + } + } + + pv.spacing = pv.modspac = pv.covtest = khat = NULL + + if (type == "active") { + pv = vlo = vup = numeric(k) + vmat = matrix(0,k,n) + ci = tailarea = matrix(0,k,2) + pv.spacing = pv.modspac = pv.covtest = numeric(k) + vreg = obj$vreg[1:k,,drop=FALSE] + sign = obj$sign[1:k] + vars = obj$action[1:k] + + for (j in 1:k) { + if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j])) + + Gj = G[1:nk[j],] + uj = rep(0,nk[j]) + vj = vreg[j,] + mj = sqrt(sum(vj^2)) + vj = vj / mj # Standardize (divide by norm of vj) + a = poly.pval(y,Gj,uj,vj,sigma,bits) + pv[j] = a$pv + sxj = sx[vars[j]] + vlo[j] = a$vlo * mj / sxj # Unstandardize (mult by norm of vj / sxj) + vup[j] = a$vup * mj / sxj # Unstandardize (mult by norm of vj) + vmat[j,] = vj * mj / sxj # Unstandardize (mult by norm of vj / sxj) + + a = poly.int(y,Gj,uj,vj,sigma,alpha,gridrange=gridrange, + flip=(sign[j]==-1),bits=bits) + ci[j,] = a$int * mj / sxj # Unstandardize (mult by norm of vj / sxj) + tailarea[j,] = a$tailarea + + pv.spacing[j] = spacing.pval(obj,sigma,j) + pv.modspac[j] = modspac.pval(obj,sigma,j) + pv.covtest[j] = covtest.pval(obj,sigma,j) + } + + khat = forwardStop(pv,alpha) + } + + else { + if (type == "aic") { + out = aicStop(x,y,obj$action[1:k],obj$df[1:k],sigma,mult,ntimes) + khat = out$khat + m = out$stopped * ntimes + G = rbind(out$G,G[1:nk[khat+m],]) # Take ntimes more steps past khat + u = c(out$u,rep(0,nk[khat+m])) # (if we need to) + kk = khat + } + else { + G = G[1:nk[k],] + u = rep(0,nk[k]) + kk = k + } + + pv = vlo = vup = numeric(kk) + vmat = matrix(0,kk,n) + ci = tailarea = matrix(0,kk,2) + sign = numeric(kk) + vars = obj$action[1:kk] + xa = x[,vars] + M = pinv(crossprod(xa)) %*% t(xa) + + for (j in 1:kk) { + if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j])) + + vj = M[j,] + mj = sqrt(sum(vj^2)) + vj = vj / mj # Standardize (divide by norm of vj) + sign[j] = sign(sum(vj*y)) + vj = sign[j] * vj + Gj = rbind(G,vj) + uj = c(u,0) + + a = poly.pval(y,Gj,uj,vj,sigma,bits) + pv[j] = a$pv + sxj = sx[vars[j]] + vlo[j] = a$vlo * mj / sxj # Unstandardize (mult by norm of vj / sxj) + vup[j] = a$vup * mj / sxj # Unstandardize (mult by norm of vj / sxj) + vmat[j,] = vj * mj / sxj # Unstandardize (mult by norm of vj / sxj) + + a = poly.int(y,Gj,uj,vj,sigma,alpha,gridrange=gridrange, + flip=(sign[j]==-1),bits=bits) + ci[j,] = a$int * mj / sxj # Unstandardize (mult by norm of vj / sxj) + tailarea[j,] = a$tailarea + } + } + + out = list(type=type,k=k,khat=khat,pv=pv,ci=ci, + tailarea=tailarea,vlo=vlo,vup=vup,vmat=vmat,y=y, + pv.spacing=pv.spacing,pv.modspac=pv.modspac,pv.covtest=pv.covtest, + vars=vars,sign=sign,sigma=sigma,alpha=alpha, + call=this.call) + class(out) = "larInf" + return(out) +} + +############################## + +spacing.pval <- function(obj, sigma, k) { + v = obj$Gamma[obj$nk[k],] + sd = sigma*sqrt(sum(v^2)) + a = obj$mp[k] + + if (k==1) b = Inf + else b = obj$lambda[k-1] + + return(tnorm.surv(obj$lambda[k],0,sd,a,b)) +} + +modspac.pval <- function(obj, sigma, k) { + v = obj$Gamma[obj$nk[k],] + sd = sigma*sqrt(sum(v^2)) + + if (k < length(obj$action)) a = obj$lambda[k+1] + else if (obj$completepath) a = 0 + else { + warning(sprintf("Modified spacing p-values at step %i require %i steps of the lar path",k,k+1)) + return(NA) + } + + if (k==1) b = Inf + else b = obj$lambda[k-1] + + return(tnorm.surv(obj$lambda[k],0,sd,a,b)) +} + +covtest.pval <- function(obj, sigma, k) { + A = which(obj$beta[,k]!=0) + sA = sign(obj$beta[A,k]) + lam1 = obj$lambda[k] + j = obj$action[k] + + if (k < length(obj$action)) { + lam2 = obj$lambda[k+1] + sj = sign(obj$beta[j,k+1]) + } else if (obj$completepath) { + lam2 = 0 + sj = sign(obj$bls[j]) + } else { + warning(sprintf("Cov test p-values at step %i require %i steps of the lar path",k,k+1)) + return(NA) + } + + x = obj$x + if (length(A)==0) term1 = 0 + else term1 = x[,A,drop=F] %*% solve(crossprod(x[,A,drop=F]),sA) + term2 = x[,c(A,j),drop=F] %*% solve(crossprod(x[,c(A,j),drop=F]),c(sA,sj)) + c = sum((term2 - term1)^2) + t = c * lam1 * (lam1-lam2) / sigma^2 + return(1-pexp(t)) +} + +############################## + +print.lar <- function(x, ...) { + cat("\nCall:\n") + dput(x$call) + + cat("\nSequence of LAR moves:\n") + nsteps = length(x$action) + tab = cbind(1:nsteps,x$action,x$sign) + colnames(tab) = c("Step","Var","Sign") + rownames(tab) = rep("",nrow(tab)) + print(tab) + invisible() +} + +print.larInf <- function(x, tailarea=TRUE, ...) { + cat("\nCall:\n") + dput(x$call) + + cat(sprintf("\nStandard deviation of noise (specified or estimated) sigma = %0.3f\n", + x$sigma)) + + if (x$type == "active") { + cat(sprintf("\nSequential testing results with alpha = %0.3f\n",x$alpha)) + cat("",fill=T) + tab = cbind(1:length(x$pv),x$vars, + round(x$sign*x$vmat%*%x$y,3), + round(x$sign*x$vmat%*%x$y/(x$sigma*sqrt(rowSums(x$vmat^2))),3), + round(x$pv,3),round(x$ci,3),round(x$pv.spacing,3),round(x$pv.cov,3)) + colnames(tab) = c("Step", "Var", "Coef", "Z-score", "P-value", + "LowConfPt", "UpConfPt", "Spacing", "CovTest") + if (tailarea) { + tab = cbind(tab,round(x$tailarea,3)) + colnames(tab)[(ncol(tab)-1):ncol(tab)] = c("LowTailArea","UpTailArea") + } + rownames(tab) = rep("",nrow(tab)) + print(tab) + + cat(sprintf("\nEstimated stopping point from ForwardStop rule = %i\n",x$khat)) + } + + else if (x$type == "all") { + cat(sprintf("\nTesting results at step = %i, with alpha = %0.3f\n",x$k,x$alpha)) + cat("",fill=T) + tab = cbind(x$vars, + round(x$sign*x$vmat%*%x$y,3), + round(x$sign*x$vmat%*%x$y/(x$sigma*sqrt(rowSums(x$vmat^2))),3), + round(x$pv,3),round(x$ci,3)) + colnames(tab) = c("Var", "Coef", "Z-score", "P-value", "LowConfPt", "UpConfPt") + if (tailarea) { + tab = cbind(tab,round(x$tailarea,3)) + colnames(tab)[(ncol(tab)-1):ncol(tab)] = c("LowTailArea","UpTailArea") + } + rownames(tab) = rep("",nrow(tab)) + print(tab) + } + + else if (x$type == "aic") { + cat(sprintf("\nTesting results at step = %i, with alpha = %0.3f\n",x$khat,x$alpha)) + cat("",fill=T) + tab = cbind(x$vars, + round(x$sign*x$vmat%*%x$y,3), + round(x$sign*x$vmat%*%x$y/(x$sigma*sqrt(rowSums(x$vmat^2))),3), + round(x$pv,3),round(x$ci,3)) + colnames(tab) = c("Var", "Coef", "Z-score", "P-value", "LowConfPt", "UpConfPt") + if (tailarea) { + tab = cbind(tab,round(x$tailarea,3)) + colnames(tab)[(ncol(tab)-1):ncol(tab)] = c("LowTailArea","UpTailArea") + } + rownames(tab) = rep("",nrow(tab)) + print(tab) + + cat(sprintf("\nEstimated stopping point from AIC rule = %i\n",x$khat)) + } + + invisible() +} + +plot.lar <- function(x, xvar=c("norm","step","lambda"), breaks=TRUE, + omit.zeros=TRUE, var.labels=TRUE, ...) { + + if (x$completepath) { + k = length(x$action)+1 + lambda = c(x$lambda,0) + beta = cbind(x$beta,x$bls) + } else { + k = length(x$action) + lambda = x$lambda + beta = x$beta + } + p = nrow(beta) + + xvar = match.arg(xvar) + if (xvar=="norm") { + xx = colSums(abs(beta)) + xlab = "L1 norm" + } else if (xvar=="step") { + xx = 1:k + xlab = "Step" + } else { + xx = lambda + xlab = "Lambda" + } + + if (omit.zeros) { + good.inds = matrix(FALSE,p,k) + good.inds[beta!=0] = TRUE + changes = t(apply(beta,1,diff))!=0 + good.inds[cbind(changes,rep(F,p))] = TRUE + good.inds[cbind(rep(F,p),changes)] = TRUE + beta[!good.inds] = NA + } + + plot(c(),c(),xlim=range(xx,na.rm=T),ylim=range(beta,na.rm=T), + xlab=xlab,ylab="Coefficients",main="Least angle regression path",...) + abline(h=0,lwd=2) + matplot(xx,t(beta),type="l",lty=1,add=TRUE) + if (breaks) abline(v=xx,lty=2) + if (var.labels) axis(4,at=beta[,k],labels=1:p,cex=0.8,adj=0) + invisible() +} diff --git a/selectiveInference-currentCRAN/R/funs.manymeans.R b/selectiveInference-currentCRAN/R/funs.manymeans.R new file mode 100644 index 0000000..998e58d --- /dev/null +++ b/selectiveInference-currentCRAN/R/funs.manymeans.R @@ -0,0 +1,196 @@ +### functions for computing the many means estimates- Stephen Reid +### returns +### i) selected indices +### ii) selection adjusted point estimates +### iii) selection adjusted interval estimates +### iv) selection adjusted p-value of hypothesis testing whether underlying signal is 0 + +######################### +##### MAIN FUNCTION ##### +######################### + +#### user-facing function for computing +#### selected set +#### point and interval estimates +#### p-values +#### input: +#### - y = Vector of observations +#### - alpha = Significance level used in CI construction +#### - bh.q = q parameter for BH(q) procedure (default: NULL) +#### - k = Number of largest elements to consider (default: NULL) +#### - sigma = Estimate of standard deviation of one of the components +#### output: +#### * A list (of class "mm") with the following components : +#### - mu.hat = Vector of length length(y) containing the estimated signal size. If a sample element is not selected, then its signal size estimate is 0 +#### - selected.set = Indices into the vector y of the sample elements that were selected by our procedure (either BH(q) or top-K) +#### - CIs = Matrix with two columns and number of rows equal to number of elements in selected.set. Provides the post-selection CI bounds for the estimated signal sizes of selected elements. CIs given is rows in the same order as encountered in selected.set +#### - p.vals = Vector of p-values for the test of nullity of the signals of the selected sample elemetns. P-values given in the same order as selected.set + +manyMeans <- function(y, alpha=0.1, bh.q=NULL, k=NULL, sigma=1, verbose=FALSE) { + this.call = match.call() + if (missing(y) || is.null(y)) stop("y must be specified") + if (!is.numeric(y)) stop("y must be numeric") + if (alpha <= 0 || alpha >= 1) stop("alpha must be between 0 and 1") + if (is.null(bh.q) && is.null(k)) stop("You must either bh.q or k; they cannot both be NULL") + if (!is.null(bh.q) && (bh.q <= 0 || bh.q >= 1)) stop("bh.q must be between 0 and 1") + if (!is.null(k) && (k < 1 || k > length(y) || k != round(k))) stop("k must be an integer between 1 and length(y)") + if (sigma <= 0) stop("sigma must be > 0") + + n = length(y) + if (!is.null(bh.q)) { # use BH selection procedure + + if (verbose && !is.null(k)) cat("(Both bh.q and k have been specified; k is being ignored)\n") + k = NULL + ci=NULL + ### find the selected set and threshold + p.vals = 2*pnorm (abs(y)/sigma, 0, 1, lower.tail=FALSE) + order.p.vals = order(p.vals) + sorted.p.vals = p.vals[order.p.vals] + + options (warn=-1) # ignore warning if max is over empty set + last.reject = max(which (sorted.p.vals <= bh.q*(1:n)/n)) + options (warn=0) # reinstitute warnings + + if (last.reject == -Inf){ # none rejected + if (verbose) cat("No sample elements selected.\n") + out = list(mu.hat=rep(0,n), selected.set=NULL, pv=NULL, ci=NULL, method="BH(q)", + bh.q=bh.q, k=NULL, threshold=NULL, sigma=sigma, call=this.call) + class(out) = "manyMeans" + return(out) + } + + selected.set = order.p.vals[1:n <= last.reject] + threshold = sigma*qnorm (last.reject/2/n, lower.tail=FALSE) + } + + else{ # use top-k selection procedure + + ### find the selected set and threshold + if (k == n) { # make no changes - return MLE + z.alpha = qnorm (alpha/2, lower.tail=FALSE) + cis = cbind(y - z.alpha*sigma, y + z.alpha*sigma) + p.vals = 2*pnorm (abs(y), 0, sigma, lower.tail=FALSE) + + out = list(mu.hat=y, selected.set=1:n, pv=p.vals, ci=ci, method="top-K", + bh.q=NULL, k=k, threshold=NULL, sigma=sigma, call=this.call) + class(out) = "manyMeans" + return(out) + } + + order.abs.y = order (-abs(y)) + sorted.abs.y = y[order.abs.y] + + selected.set = order.abs.y[1:k] + threshold = abs(sorted.abs.y[k+1]) + } + + ### estimate their underlying signal sizes + mu.hat = sapply (selected.set, function(s){ + uniroot(f=function(mu){tn.mean(mu, -threshold, threshold, sigma=sigma) - y[s]}, lower=-10000*sigma, upper=10000*sigma)$root + }) + + ### and CIs + right.ci = sapply (selected.set, function(s){ + uniroot (f=function(mu){tn.cdf (y[s], mu, -threshold, threshold, sigma=sigma) - (alpha/2)}, lower=-10000*sigma, upper=10000*sigma)$root + }) + left.ci = sapply (selected.set, function(s){ + uniroot (f=function(mu){tn.cdf (y[s], mu, -threshold, threshold, sigma=sigma) - (1-alpha/2)}, lower=-10000*sigma, upper=10000*sigma)$root + }) + + ### and p-values + p.vals = sapply (selected.set, function(s){tn.cdf (y[s], 0, -threshold, threshold, sigma=sigma)}) + p.vals = 2*pmin(p.vals, 1-p.vals) + + ### arrange + order.selected.set = order (selected.set) + selected.set = selected.set[order.selected.set] + mu.hat = mu.hat[order.selected.set] + left.ci = left.ci[order.selected.set] + right.ci = right.ci[order.selected.set] + p.vals = p.vals[order.selected.set] + + mu.hat.final = rep(0, n) + mu.hat.final[selected.set] = mu.hat + + out = list(mu.hat=mu.hat.final, selected.set=selected.set, pv=p.vals, ci=cbind(left.ci,right.ci), + method=ifelse(is.null(bh.q), "top-K", "BH(q)"), sigma=sigma, bh.q=bh.q, k=k, threshold=threshold, + call=this.call) + class(out) = "manyMeans" + return(out) +} + +#### prints a pretty data frame summarising the information of an object of the mm class +#### columns for index, signal size estimate, left and right CI bounds and p values +#### only for those sample elements selected by the selection procedure associated with the mmObj +print.manyMeans <- function(x, ...){ + cat("\nCall:\n") + dput(x$call) + + cat(sprintf("\nStandard deviation of noise sigma = %0.3f\n\n", + x$sigma)) + + tab = cbind(x$selected.set,x$mu.hat[x$selected.set],x$pv,x$ci) + tab = round(tab,3) + colnames(tab) = c("SelInd","MuHat","P-value","LowConfPt","UpConfPt") + rownames(tab) = rep("",nrow(tab)) + print(tab) +} + +############################### +##### AUXILIARY FUNCTIONS ##### +############################### + +#### function returning the cumulative distribution function value +#### of a truncated Gaussian RV, truncated to interval (-Inf, a) \union (b, Inf) +#### with underlying Gaussian having mean parameter mu and standard deviation sigma +#### at value y +tn.cdf = function(y, mu, a, b, sigma=1){ + ## denominator + d_right = pnorm (b, mu, sigma, lower.tail=FALSE, log.p=TRUE) + d_left = pnorm (a, mu, sigma, lower.tail=TRUE, log.p=TRUE) + d_max = max(d_right, d_left) + d_log = d_max + log(exp(d_left - d_max) + exp(d_right - d_max)) + + + # numerator + if (y > a & y < b){ + n_log = d_left + return (exp(n_log-d_log)) + }else{ + if (y > b){ + # b and y + n_y_tilde = pnorm (y, mu, sigma, lower.tail=FALSE, log.p=TRUE) + n_b_tilde = pnorm (b, mu, sigma, lower.tail=FALSE, log.p=TRUE) + n_yb = n_b_tilde + log(1 - exp(n_y_tilde-n_b_tilde)) + + # a + n_a = d_left + + # combine + return(exp(n_yb-d_log) + exp(n_a-d_log)) + }else{ + n_log = pnorm (y, mu, sigma, lower.tail=TRUE, log.p=TRUE) + return (exp(n_log-d_log)) + } + } +} + +##### function for computing the mean of an N(mu, 1) RV +##### truncated to be on the interval (-Inf, a) \union (b, Inf) +tn.mean = function(mu, a, b, sigma=1){ + # denominator + d_left = pnorm (a, mu, sigma, lower.tail=TRUE, log.p=TRUE) + d_right = pnorm (b, mu, sigma, lower.tail=FALSE, log.p=TRUE) + d_max = max(d_left, d_right) + d_log = d_max + log(exp(d_left - d_max) + exp(d_right - d_max)) + + # numerator + n_left = dnorm (b, mu, sigma, log=TRUE) + n_right = dnorm (a, mu, sigma, log=TRUE) + + if (n_left > n_right){ + mu + exp(n_left + log(1 - exp(n_right-n_left)) - d_log) + }else{ + mu - exp(n_right + log(1 - exp(n_left-n_right)) - d_log) + } +} diff --git a/selectiveInference-currentCRAN/R/funs.max.R b/selectiveInference-currentCRAN/R/funs.max.R new file mode 100644 index 0000000..97f34af --- /dev/null +++ b/selectiveInference-currentCRAN/R/funs.max.R @@ -0,0 +1,84 @@ +#Mills lower bound on the integral of a standard normal over an interval +#Need to use something else near zero, where mills is bad. +mills.lb = function(a,b){ + t=5#threshold for switching to approximation + if(b=t){ + return(a*exp(-a^2/2)/(1+a^2)-exp(-b^2/2)/b) + } + if(b <= -t){ + return(mills.lb(-b,-a)) + } + #Note, like in the rest of this program, I'm not dividing by sqrt(2*pi), so we need to rescale here + sqrt(2*pi)*pnorm(min(b,t))-sqrt(2*pi)*pnorm(max(a,-t))+mills.lb(a,-t)+mills.lb(t,b) +} + + +#Truncate the interval. We chop off the ends near infinity, being careful +#so that the chopped tail is guaranteed to be close enough to its mills approximation +#a,b are the left and right endpoints, z is the mid point +#delta is the multiplicative error limit of the truncation on the final fraction (roughly) +truncate.interval = function(a,b,z,delta=1e-16){ + #Initialize some stuff + L.extra = 0#Extra probability to be added for the truncation on the left + R.extra = 0#Extra probability to be added for the truncation on the right + a.new = a#truncated interval bounds + b.new = b#truncated interval bounds + + #We need bounds on the integrals + RL.lb = mills.lb(a,b) + R.lb = mills.lb(z,b) + + #Now we bound the error we can tolerate in the tail approximations + eps.R = min(delta*R.lb,delta*RL.lb/2) + eps.L = delta*RL.lb/2 + + #For now, only truncate infinite end points + #Might want to change this one day, if we have trouble with super wide but finite intervals + if (b==Inf){ + f = function(x){x^2+log(1+x^2)+log(eps.R)}#encodes error of mills approximation + b.new = uniroot(f,c(1.1,1000))$root + b.new = max(b.new,z+1)#Don't truncate past z + R.extra = exp(-b.new^2/2)/b.new + } + if (a==-Inf){ + f = function(x){x^2+log(1+x^2)+log(eps.L)}#encodes error of mills approximation + a.new = -uniroot(f,c(1.1,1000))$root + a.new = min(a.new,z-1)#Don't truncate past z + L.extra = exp(-a.new^2/2)/a.new + } + + list(a=a.new,b=b.new,L.extra=L.extra,R.extra=R.extra,z=z) +} + + +#Approximates integral_a^b e^{-x^2/2+offset^2/2} dx +# offset is used to make ratios slightly more stable +# defaults to offset=0 +# Note that I've left out 1/sqrt(2*pi), you can add it in if you like +approx.int = function(a,b,n=1000,offset=0){ + delta = (b-a)/n #Step size, may want to vary in the future + x = seq(from=a,to=b,by=delta) + y = -x^2/2 + offset^2/2 # On the log scale + m = diff(y)/diff(x) # Line segment slopes + de = diff(exp(y)) # Difference on original scale + sum(de/m) #Sum of integrals of line segments (closed form) +} + +#Uses approx.int to evaluate int_x^b phi(z)dz / int_a^b phi(z)dz +#Right now offsets everything for a little more stability +#Uses truncation to handle infinite endpoints +max.approx.frac = function(a,b,x,mu=0,n=1000){ + returns = numeric(length(mu)) + for(i in 1:length(returns)){ + truncation = truncate.interval(a-mu[i],b-mu[i],x-mu[i]) + #Our offset will use the smaller of a and b in absolute value + offset = min(abs(truncation$a),abs(truncation$b)) + #The truncation also shifts by the mean, so we don't need to do it again for the end points + #but we do need to use the center z returned by truncation, rather than x, to match + left = approx.int(truncation$a,truncation$z,n,offset)+truncation$L.extra + right = approx.int(truncation$z,truncation$b,n,offset)+truncation$R.extra + returns[i] = right/(left+right) + } + returns +} diff --git a/selectiveInference-currentCRAN/R/funs.quadratic.R b/selectiveInference-currentCRAN/R/funs.quadratic.R new file mode 100644 index 0000000..799352b --- /dev/null +++ b/selectiveInference-currentCRAN/R/funs.quadratic.R @@ -0,0 +1,257 @@ +truncationRegion <- function(obj, ydecomp, type, tol = 1e-15) { + + n <- nrow(obj$x) + Z <- ydecomp$Z + if (type == "TC") { + eta <- ydecomp$eta + } else { + Vd <- ydecomp$Vd + V2 <- ydecomp$V2 + C <- ydecomp$C + R <- ydecomp$R + } + L <- lapply(1:length(obj$action), function(s) { + + Ug <- obj$maxprojs[[s]] + peng <- obj$maxpens[[s]] + if (s > 1) { + Zs <- obj$cumprojs[[s-1]] %*% Z + if (type == "TC") { + etas <- obj$cumprojs[[s-1]] %*% eta + } else { + Vds <- obj$cumprojs[[s-1]] %*% Vd + V2s <- obj$cumprojs[[s-1]] %*% V2 + } + } else { + Zs <- Z + if (type == "TC") { + etas <- eta + } else { + Vds <- Vd + V2s <- V2 + } + } + + num.projs <- length(obj$projections[[s]]) + if (num.projs == 0) { + return(list(Intervals(c(-Inf,0)))) + } else { + lapply(1:num.projs, function(l) { + + Uh <- obj$projections[[s]][[l]] + penh <- obj$aicpens[[s]][[l]] + # The quadratic form corresponding to + # (t*U + Z)^T %*% Q %*% (t*U + Z) \geq 0 + # we find the roots in t, if there are any + # and return the interval of potential t + if (type == "TC") { + coeffs <- quadratic_coefficients(obj$sigma, Ug, Uh, peng, penh, etas, etas, Zs, Zs) + quadratic_roots(coeffs$A, coeffs$B, coeffs$C, tol) + } else { + coeffs <- TF_coefficients(R, Ug, Uh, peng, penh, Zs, Zs, Vds, Vds, V2s, V2s) + roots <- TF_roots(R, C, coeffs) + return(roots) + } + }) + } + # LL is a list of intervals + }) + # L is now a list of lists of intervals + return(unlist(L, recursive = FALSE, use.names = FALSE)) +} + +quadratic_coefficients <- function(sigma, Ug, Uh, peng, penh, etag, etah, Zg, Zh) { + # g indexes minimizer, h the comparison + Uheta <- t(Uh) %*% etah + Ugeta <- t(Ug) %*% etag + UhZ <- t(Uh) %*% Zh + UgZ <- t(Ug) %*% Zg + etaZh <- t(etah) %*% Zh + etaZg <- t(etag) %*% Zg + if (is.null(sigma)) { + A <- penh * (sum(etah^2) - sum(Uheta^2)) - peng * (sum(etag^2) - sum(Ugeta^2)) + B <- 2 * penh * (etaZh - t(Uheta) %*% UhZ) - 2 * peng * (etaZg - t(Ugeta) %*% UgZ) + C <- penh * (sum(Zh^2) - sum(UhZ^2)) - peng * (sum(Zg^2) - sum(UgZ^2)) + } else { + A <- (sum(etah^2) - sum(Uheta^2)) - (sum(etag^2) - sum(Ugeta^2)) + B <- 2 * (etaZh - t(Uheta) %*% UhZ) - 2 * (etaZg - t(Ugeta) %*% UgZ) + C <- (sum(Zh^2) - sum(UhZ^2) + penh) - (sum(Zg^2) - sum(UgZ^2) + peng) + } + return(list(A = A, B = B, C = C)) +} + +quadratic_roots <- function(A, B, C, tol) { + disc <- B^2 - 4*A*C + b2a <- -B/(2*A) + + if (disc > tol) { + # Real roots + pm <- sqrt(disc)/(2*A) + endpoints <- sort(c(b2a - pm, b2a + pm)) + + } else { + # No real roots + if (A > -tol) { + # Quadratic form always positive + return(Intervals(c(-Inf,0))) + } else { + # Quadratic form always negative + stop("Empty TC support is infeasible") + } + } + + if (A > tol) { + # Parabola opens upward + if (min(endpoints) > 0) { + # Both roots positive, union of intervals + return(Intervals(rbind(c(-Inf,0), endpoints))) + } else { + # At least one negative root + return(Intervals(c(-Inf, max(0, endpoints[2])))) + } + } else { + if (A < -tol) { + # Parabola opens downward + if (endpoints[2] < 0) { + # Positive quadratic form only when t negative + stop("Negative TC support is infeasible") + } else { + # Part which is positive + if (endpoints[1] > 0) { + return(Intervals(rbind(c(-Inf, endpoints[1]), c(endpoints[2], Inf)))) + } else { + return(Intervals(c(endpoints[2], Inf))) + } + } + } else { + # a is too close to 0, quadratic is actually linear + if (abs(B) > tol) { + if (B > 0) { + return(Intervals(c(-Inf, max(0, -C/B)))) + } else { + if (-C/B < 0) stop("Infeasible linear equation") + return(Intervals(rbind(c(-Inf, 0), c(-C/B, Inf)))) + } + } else { + warning("Ill-conditioned quadratic") + return(Intervals(c(-Inf,0))) + } + } + } +} + +# Helper functions for TF roots +roots_to_checkpoints <- function(roots) { + checkpoints <- unique(sort(c(0, roots))) + return(c(0, (checkpoints + c(checkpoints[-1], 200 + checkpoints[length(checkpoints)]))/2)) +} +roots_to_partition <- function(roots) { + checkpoints <- unique(sort(c(0, roots))) + return(list(endpoints = c(checkpoints, Inf), midpoints = (checkpoints + c(checkpoints[-1], 200 + checkpoints[length(checkpoints)]))/2)) +} + +# Efficiently compute coefficients of one-dimensional TF slice function +TF_coefficients <- function(R, Ug, Uh, peng, penh, Zg, Zh, Vdg, Vdh, V2g, V2h) { + + UhZ <- t(Uh) %*% Zh + UgZ <- t(Ug) %*% Zg + UhVd <- t(Uh) %*% Vdh + UgVd <- t(Ug) %*% Vdg + UhV2 <- t(Uh) %*% V2h + UgV2 <- t(Ug) %*% V2g + VdZh <- sum(Vdh*Zh) + VdZg <- sum(Vdg*Zg) + V2Zh <- sum(V2h*Zh) + V2Zg <- sum(V2g*Zg) + + x0 <- penh * (sum(Zh^2) - sum(UhZ^2)) - peng * (sum(Zg^2) - sum(UgZ^2)) + x1 <- 2*R*(penh * (VdZh - sum(UhZ*UhVd)) - peng * (VdZg - sum(UgZ*UgVd))) + x2 <- 2*R*(penh * (V2Zh - sum(UhZ*UhV2)) - peng * (V2Zg - sum(UgZ*UgV2))) + x12 <- 2*R^2*(penh * (sum(Vdh*V2h) - sum(UhVd*UhV2)) - peng * (sum(Vdg*V2g) - sum(UgVd*UgV2))) + x11 <- R^2*(penh * (sum(Vdh^2) - sum(UhVd^2)) - peng * (sum(Vdg^2) - sum(UgVd^2))) + x22 <- R^2*(penh * (sum(V2h^2) - sum(UhV2^2)) - peng * (sum(V2g^2) - sum(UgV2^2))) + + return(list(x11=x11, x22=x22, x12=x12, x1=x1, x2=x2, x0=x0)) +} + +# Numerically solve for roots of TF slice using +# hybrid polyroot/uniroot approach + +TF_roots <- function(R, C, coeffs, tol = 1e-8, tol2 = 1e-6) { + + x11 <- coeffs$x11 + x22 <- coeffs$x22 + x12 <- coeffs$x12 + x1 <- coeffs$x1 + x2 <- coeffs$x2 + x0 <- coeffs$x0 + + g1 <- function(t) sqrt(C*t/(1+C*t)) + g2 <- function(t) 1/sqrt(1+C*t) + I <- function(t) x11*g1(t)^2 + x12*g1(t)*g2(t) + x22*g2(t)^2 + x1*g1(t) + x2*g2(t) + x0 + + z4 <- complex(real = -x11 + x22, imaginary = -x12)/4 + z3 <- complex(real = x2, imaginary = -x1)/2 + z2 <- complex(real = x11/2+x22/2+x0) + z1 <- Conj(z3) + z0 <- Conj(z4) + + zcoefs <- c(z0, z1, z2, z3, z4) + croots <- polyroot(zcoefs) + thetas <- Arg(croots) + # Can't specify polyroot precision :( + modinds <- Mod(croots) <= 1 + tol2 & Mod(croots) >= 1 - tol2 + angleinds <- thetas >=0 & thetas <= pi/2 + roots <- unique(thetas[which(modinds & angleinds)]) + troots <- tan(roots)^2/C + + checkpoints <- c() + if (length(troots) > 0) checkpoints <- roots_to_checkpoints(troots) + checkpoints <- sort( + c(checkpoints, 0, tol, tol2, + seq(from = sqrt(tol2), to = 1, length.out = 50), + seq(from = 1.2, to=50, length.out = 20), + 100, 1000, 10000)) + ## if (length(troots) == 0) { + ## # Polyroot didn't catch any roots + ## # ad-hoc check: + ## checkpoints <- c(0, tol, tol2, + ## seq(from = sqrt(tol2), to = 1, length.out = 50), + ## seq(from = 1.2, to=50, length.out = 20), + ## 100, 1000, 10000) + ## } else { + ## checkpoints <- roots_to_checkpoints(troots) + ## } + + signs <- sign(I(checkpoints)) + diffs <- c(0, diff(signs)) + changeinds <- which(diffs != 0) + + if (length(changeinds) > 0) { + + roots <- unlist(lapply(changeinds, function(ind) { + uniroot(I, lower = checkpoints[ind-1], upper = checkpoints[ind], tol = tol)$root + })) + + partition <- roots_to_partition(roots) + negative <- which(I(partition$midpoints) < 0) + + intervals <- matrix(NA, ncol=2) + for (i in 1:length(negative)) { + ind <- negative[i] + if ((i > 1) && (ind == negative[i-1] + 1)) { + # There was not a sign change at end of previous interval + intervals[nrow(intervals), 2] <- partition$endpoints[ind+1] + } else { + intervals <- rbind(intervals, c(partition$endpoints[ind], partition$endpoints[ind+1])) + } + } + + return(Intervals(intervals[-1,])) + } + + # Apparently no roots, always positive + if (I(0) < 0) stop("Infeasible constraint!") + return(Intervals(c(-Inf,0))) +} + diff --git a/selectiveInference-currentCRAN/man/estimateSigma.Rd b/selectiveInference-currentCRAN/man/estimateSigma.Rd new file mode 100644 index 0000000..c956121 --- /dev/null +++ b/selectiveInference-currentCRAN/man/estimateSigma.Rd @@ -0,0 +1,64 @@ +\name{estimateSigma} +\alias{estimateSigma} +%- Also NEED an '\alias' for EACH other topic documented here. +\title{ +Estimate the noise standard deviation in regression +} +\description{ +Estimates the standard deviation of the noise, for use in the selectiveInference +package +} +\usage{ +estimateSigma(x, y, intercept=TRUE, standardize=TRUE) +} +\arguments{ + \item{x}{ +Matrix of predictors (n by p) +} + \item{y}{ +Vector of outcomes (length n) +} +\item{intercept}{Should glmnet be run with an intercept? Default is TRUE} +\item{standardize}{Should glmnet be run with standardized predictors? Default is TRUE} +} +\details{ +This function estimates the standard deviation of the noise, in a linear regresion setting. +A lasso regression is fit, using cross-validation to estimate the tuning parameter lambda. +With sample size n, yhat equal to the predicted values and df being the number of nonzero +coefficients from the lasso fit, the estimate of sigma is \code{sqrt(sum((y-yhat)^2) / (n-df-1))}. +Important: if you are using glmnet to compute the lasso estimate, be sure to use the settings +for the "intercept" and "standardize" arguments in glmnet and estimateSigma. Same applies to fs +or lar, where the argument for standardization is called "normalize". +} +\value{ +\item{sigmahat}{The estimate of sigma} +\item{df}{The degrees of freedom of lasso fit used} +} +\references{ +Stephen Reid, Jerome Friedman, and Rob Tibshirani (2014). +A study of error variance estimation in lasso regression. arXiv:1311.5274. +} + +\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} + +\examples{ +set.seed(33) +n = 50 +p = 10 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) + +# run forward stepwise +fsfit = fs(x,y) + +# estimate sigma +sigmahat = estimateSigma(x,y)$sigmahat + +# run sequential inference with estimated sigma +out = fsInf(fsfit,sigma=sigmahat) +out +} + + diff --git a/selectiveInference-currentCRAN/man/factorDesign.Rd b/selectiveInference-currentCRAN/man/factorDesign.Rd new file mode 100644 index 0000000..8e061db --- /dev/null +++ b/selectiveInference-currentCRAN/man/factorDesign.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/funs.groupfs.R +\name{factorDesign} +\alias{factorDesign} +\title{Expand a data frame with factors to form a design matrix with the full binary encoding of each factor.} +\usage{ +factorDesign(df) +} +\arguments{ +\item{df}{Data frame containing some columns which are \code{factors}.} +} +\value{ +List containing +\describe{ + \item{x}{Design matrix, the first columns contain any numeric variables from the original date frame.} + \item{index}{Group membership indicator for expanded matrix.} +} +} +\description{ +When using \code{\link{groupfs}} with factor variables call this function first to create a design matrix. +} +\examples{ +\dontrun{ +fd = factorDesign(warpbreaks) +y = rnorm(nrow(fd$x)) +fit = groupfs(fd$x, y, fd$index, maxsteps=2, intercept=FALSE) +pvals = groupfsInf(fit) +} +} + diff --git a/selectiveInference-currentCRAN/man/fixedLassoInf.Rd b/selectiveInference-currentCRAN/man/fixedLassoInf.Rd new file mode 100644 index 0000000..6795ec1 --- /dev/null +++ b/selectiveInference-currentCRAN/man/fixedLassoInf.Rd @@ -0,0 +1,252 @@ +\name{fixedLassoInf} +\alias{fixedLassoInf} + +\title{ +Inference for the lasso, with a fixed lambda +} +\description{ +Compute p-values and confidence intervals for the lasso estimate, at a +fixed value of the tuning parameter lambda +} +\usage{ +fixedLassoInf(x, y, beta, lambda, family = c("gaussian", "binomial", + "cox"),intercept=TRUE, status=NULL, sigma=NULL, alpha=0.1, + type=c("partial","full"), tol.beta=1e-5, tol.kkt=0.1, + gridrange=c(-100,100), bits=NULL, verbose=FALSE) +} +\arguments{ + \item{x}{ +Matrix of predictors (n by p); +} + \item{y}{ +Vector of outcomes (length n) +} + \item{beta}{ +Estimated lasso coefficients (e.g., from glmnet). This is of length p +(so the intercept is not included as the first component). + + Be careful! This function uses the "standard" lasso objective + \deqn{ + 1/2 \|y - x \beta\|_2^2 + \lambda \|\beta\|_1. + } + In contrast, glmnet multiplies the first term by a factor of 1/n. + So after running glmnet, to extract the beta corresponding to a value lambda, + you need to use \code{beta = coef(obj, s=lambda/n)[-1]}, + where obj is the object returned by glmnet (and [-1] removes the intercept, + which glmnet always puts in the first component) +} + \item{lambda}{ +Value of lambda used to compute beta. See the above warning +} + +\item{family}{Response type: "gaussian" (default), "binomial", or + "cox" (for censored survival data) } + +\item{sigma}{ +Estimate of error standard deviation. If NULL (default), this is estimated +using the mean squared residual of the full least squares fit when n >= 2p, and +using the standard deviation of y when n < 2p. In the latter case, the user +should use \code{\link{estimateSigma}} function for a more accurate estimate. +Not used for family= "binomial", or "cox" +} +\item{alpha}{ +Significance level for confidence intervals (target is miscoverage alpha/2 in each tail) +} +\item{intercept}{ +Was the lasso problem solved (e.g., by glmnet) with an intercept in the model? +Default is TRUE. Must be TRUE for "binomial" family. Not used for 'cox" family, where no intercept is assumed. +} +\item{status}{Censoring status for Cox model; 1=failurem 0=censored} +\item{type}{Contrast type for p-values and confidence intervals: default is +"partial"---meaning that the contrasts tested are the partial population +regression coefficients, within the active set of predictors; the alternative is +"full"---meaning that the full population regression coefficients are tested. +The latter does not make sense when p > n.} +\item{tol.beta}{ +Tolerance for determining if a coefficient is zero +} +\item{tol.kkt}{ +Tolerance for determining if an entry of the subgradient is zero +} +\item{gridrange}{ +Grid range for constructing confidence intervals, on the standardized scale +} +\item{bits}{ +Number of bits to be used for p-value and confidence interval calculations. Default is +NULL, in which case standard floating point calculations are performed. When not NULL, +multiple precision floating point calculations are performed with the specified number +of bits, using the R package \code{Rmpfr} (if this package is not installed, then a +warning is thrown, and standard floating point calculations are pursued). +Note: standard double precision uses 53 bits +so, e.g., a choice of 200 bits uses about 4 times double precision. The confidence +interval computation is sometimes numerically challenging, and the extra precision can be +helpful (though computationally more costly). In particular, extra precision might be tried +if the values in the output columns of \code{tailarea} differ noticeably from alpha/2. +} +\item{verbose}{ +Print out progress along the way? Default is FALSE} +} + +\details{ +This function computes selective p-values and confidence intervals for the lasso, +given a fixed value of the tuning parameter lambda. +Three different response types are supported: gaussian, binomial and Cox. +The confidence interval construction involves numerical search and can be fragile: +if the observed statistic is too close to either end of the truncation interval +(vlo and vup, see references), then one or possibly both endpoints of the interval of +desired coverage cannot be computed, and default to +/- Inf. The output \code{tailarea} +gives the achieved Gaussian tail areas for the reported intervals---these should be close +to alpha/2, and can be used for error-checking purposes. + +Important!: Before running glmnet (or some other lasso-solver) x should be centered, that is x <- scale(X,TRUE,FALSE). +In addition, if standardization of the predictors is desired, x should be scaled as well: x <- scale(x,TRUE,TRUE). +Then when running glmnet, set standardize=F. See example below. + +The penalty.factor facility in glmmet-- allowing different penalties lambda for each predictor, +is not yet implemented in fixedLassoInf. However you can finesse this--- see the example below. One caveat- using this approach, a penalty factor of zero (forcing a predictor in) +is not allowed. + +Note that the coefficients and standard errors reported are unregularized. +Eg for the Gaussian, they are the usual least squares estimates and standard errors +for the model fit to the actice set from the lasso. +} +\value{ +\item{type}{Type of coefficients tested (partial or full)} +\item{lambda}{Value of tuning parameter lambda used} +\item{pv}{One-sided P-values for active variables, uses the fact we have conditioned on the sign.} +\item{ci}{Confidence intervals} +\item{tailarea}{Realized tail areas (lower and upper) for each confidence interval} +\item{vlo}{Lower truncation limits for statistics} +\item{vup}{Upper truncation limits for statistics} +\item{vmat}{Linear contrasts that define the observed statistics} +\item{y}{Vector of outcomes} +\item{vars}{Variables in active set} +\item{sign}{Signs of active coefficients} +\item{alpha}{Desired coverage (alpha/2 in each tail)} +\item{sigma}{Value of error standard deviation (sigma) used} +\item{call}{The call to lassoInf} +} + +\references{ +Jason Lee, Dennis Sun, Yuekai Sun, and Jonathan Taylor (2013). +Exact post-selection inference, with application to the lasso. arXiv:1311.6238. + + Jonathan Taylor and Robert Tibshirani (2016) Post-selection inference for L1-penalized likelihood models. +arXiv:1602.07358 + +} +\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} + +\examples{ +set.seed(43) +n = 50 +p = 10 +sigma = 1 + +x = matrix(rnorm(n*p),n,p) +x=scale(x,TRUE,TRUE) + +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) + +# first run glmnet +gfit = glmnet(x,y,standardize=FALSE) + +# extract coef for a given lambda; note the 1/n factor! +# (and we don't save the intercept term) +lambda = .8 +beta = coef(gfit, s=lambda/n, exact=TRUE,x=x,y=y)[-1] + +# compute fixed lambda p-values and selection intervals +out = fixedLassoInf(x,y,beta,lambda,sigma=sigma) +out + + +## as above, but use lar function instead to get initial +## lasso fit (should get same results) + lfit = lar(x,y,normalize=FALSE) + beta = coef(lfit,s=lambda,mode="lambda",x=x,y=y) + out2 = fixedLassoInf(x,y,beta,lambda,sigma=sigma) + out2 + +## mimic different penalty factors by first scaling x + set.seed(43) +n = 50 +p = 10 +sigma = 1 + +x = matrix(rnorm(n*p),n,p) +x=scale(x,TRUE,TRUE) + +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) +pf=c(rep(1,7),rep(.1,3)) #define penalty factors +pf=p*pf/sum(pf) # penalty factors should be rescaled so they sum to p +xs=scale(x,FALSE,pf) #scale cols of x by penalty factors +# first run glmnet +gfit = glmnet(xs,y,standardize=FALSE) + +# extract coef for a given lambda; note the 1/n factor! +# (and we don't save the intercept term) +lambda = .8 +beta_hat = coef(gfit, s=lambda/n, exact=TRUE,x=x,y=y)[-1] + +# compute fixed lambda p-values and selection intervals +out = fixedLassoInf(xs,y,beta_hat,lambda,sigma=sigma) + +#rescale conf points to undo the penalty factor +out$ci=t(scale(t(out$ci),FALSE,pf[out$vars])) +out + +#logistic model +set.seed(43) + n = 50 + p = 10 + sigma = 1 + + x = matrix(rnorm(n*p),n,p) + x=scale(x,TRUE,TRUE) + + beta = c(3,2,rep(0,p-2)) + y = x\%*\%beta + sigma*rnorm(n) + y=1*(y>mean(y)) + # first run glmnet + gfit = glmnet(x,y,standardize=FALSE,family="binomial") + + # extract coef for a given lambda; note the 1/n factor! + # (and here we DO include the intercept term) + lambda = .8 + beta_hat = coef(gfit, s=lambda/n, exact=TRUE,x=x,y=y) + + # compute fixed lambda p-values and selection intervals + out = fixedLassoInf(x,y,beta_hat,lambda,family="binomial") + out + +#Cox model +set.seed(43) + n = 50 + p = 10 + sigma = 1 + + x = matrix(rnorm(n*p),n,p) + x=scale(x,TRUE,TRUE) + + beta = c(3,2,rep(0,p-2)) + tim = as.vector(x\%*\%beta + sigma*rnorm(n)) + tim= tim-min(tim)+1 +status=sample(c(0,1),size=n,replace=TRUE) + # first run glmnet + + + gfit = glmnet(x,Surv(tim,status),standardize=FALSE,family="cox") + + # extract coef for a given lambda; note the 1/n factor! + + lambda = 1.5 + beta_hat = as.numeric(coef(gfit, s=lambda/n, exact=TRUE,x=x,y=Surv(tim,status))) + + # compute fixed lambda p-values and selection intervals + out = fixedLassoInf(x,tim,beta_hat,lambda,status=status,family="cox") + out +} + \ No newline at end of file diff --git a/selectiveInference-currentCRAN/man/forwardStop.Rd b/selectiveInference-currentCRAN/man/forwardStop.Rd new file mode 100644 index 0000000..87eb7ab --- /dev/null +++ b/selectiveInference-currentCRAN/man/forwardStop.Rd @@ -0,0 +1,55 @@ +\name{forwardStop} +\alias{forwardStop} +%- Also NEED an '\alias' for EACH other topic documented here. +\title{ +ForwardStop rule for sequential p-values +} +\description{ +Computes the ForwardStop sequential stopping rule of G'Sell et al (2014) +} +\usage{ +forwardStop(pv, alpha=0.1) +} +%- maybe also 'usage' for other objects documented here. +\arguments{ + \item{pv}{ +Vector of **sequential** p-values, for example from fsInf or larInf +} + \item{alpha}{ +Desired type FDR level (between 0 and 1) +} +} +\details{ +Computes the ForwardStop sequential stopping rule of G'Sell et al (2014). +Guarantees FDR control at the level alpha, for independent p-values. +} +\value{ +Step number for sequential stop. +} +\references{ +Max Grazier G'Sell, Stefan Wager, Alexandra Chouldechova, and Rob Tibshirani (2014). +Sequential selection procedures and Fflse Discovery Rate Control. arXiv:1309.5352. +To appear in Journal of the Royal Statistical Society: Series B. +} +\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} + +\examples{ +set.seed(33) +n = 50 +p = 10 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) + +# run forward stepwise +fsfit = fs(x,y) + +# compute sequential p-values and confidence intervals +# (sigma estimated from full model) +out = fsInf(fsfit) +out + +# estimate optimal stopping point +forwardStop(out$pv, alpha=.10) +} diff --git a/selectiveInference-currentCRAN/man/fs.Rd b/selectiveInference-currentCRAN/man/fs.Rd new file mode 100644 index 0000000..2a61c83 --- /dev/null +++ b/selectiveInference-currentCRAN/man/fs.Rd @@ -0,0 +1,95 @@ +\name{fs} +\alias{fs} +%- Also NEED an '\alias' for EACH other topic documented here. +\title{ +Forward stepwise regression +} +\description{ +This function implements forward stepwise regression, for use in the +selectiveInference package +} +\usage{ +fs(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, verbose=FALSE) +} +\arguments{ + \item{x}{ +Matrix of predictors (n by p) +} + \item{y}{ +Vector of outcomes (length n) +} + \item{maxsteps}{ +Maximum number of steps to take +} +\item{intercept}{Should an intercept be included on the model? Default is TRUE} +\item{normalize}{Should the predictors be normalized? Default is TRUE. (Note: +this argument has no real effect on model selection since forward stepwise is +scale invariant already; however, it is included for completeness, and to match +the interface for the \code{lar} function) +} +\item{verbose}{Print out progress along the way? Default is FALSE} +} + +\details{ +This function implements forward stepwise regression, adding the predictor at each +step that maximizes the absolute correlation between the predictors---once +orthogonalized with respect to the current model---and the residual. This entry +criterion is standard, and is equivalent to choosing the variable that achieves +the biggest drop in RSS at each step; it is used, e.g., by the \code{step} function +in R. Note that, for example, the \code{lars} package implements a stepwise option +(with type="step"), but uses a (mildly) different entry criterion, based on maximal +absolute correlation between the original (non-orthogonalized) predictors and the +residual. +} +\value{ +\item{action}{Vector of predictors in order of entry} +\item{sign}{Signs of coefficients of predictors, upon entry} +\item{df}{Degrees of freedom of each active model} +\item{beta}{Matrix of regression coefficients for each model along the path, +one column per model} +\item{completepath}{Was the complete stepwise path computed?} +\item{bls}{If completepath is TRUE, the full least squares coefficients} +\item{Gamma}{Matrix that captures the polyhedral selection at each step} +\item{nk}{Number of polyhedral constraints at each step in path} +\item{vreg}{Matrix of linear contrasts that gives coefficients of variables +to enter along the path} +\item{x}{Matrix of predictors used} +\item{y}{Vector of outcomes used} +\item{bx}{Vector of column means of original x} +\item{by}{Mean of original y} +\item{sx}{Norm of each column of original x} +\item{intercept}{Was an intercept included?} +\item{normalize}{Were the predictors normalized?} +\item{call}{The call to fs} +} + +\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} + +\seealso{ + \code{\link{fsInf}}, \code{\link{predict.fs}},\code{\link{coef.fs}}, \code{\link{plot.fs}} +} + +\examples{ +set.seed(33) +n = 50 +p = 10 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) + +# run forward stepwise, plot results +fsfit = fs(x,y) +plot(fsfit) + +# compute sequential p-values and confidence intervals +# (sigma estimated from full model) +out = fsInf(fsfit) +out +} + + +% Add one or more standard keywords, see file 'KEYWORDS' in the +% R documentation directory. +\keyword{ ~kwd1 } +\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line diff --git a/selectiveInference-currentCRAN/man/fsInf.Rd b/selectiveInference-currentCRAN/man/fsInf.Rd new file mode 100644 index 0000000..613bc5a --- /dev/null +++ b/selectiveInference-currentCRAN/man/fsInf.Rd @@ -0,0 +1,135 @@ +\name{fsInf} +\alias{fsInf} +\title{ +Selective inference for forward stepwise regression +} +\description{ +Computes p-values and confidence intervals for forward +stepwise regression +} +\usage{ +fsInf(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","aic"), + gridrange=c(-100,100), bits=NULL, mult=2, ntimes=2, verbose=FALSE) +} +\arguments{ + \item{obj}{ +Object returned by \code{\link{fs}} function +} +\item{sigma}{ +Estimate of error standard deviation. If NULL (default), this is estimated +using the mean squared residual of the full least squares fit when n >= 2p, and +using the standard deviation of y when n < 2p. In the latter case, the user +should use \code{\link{estimateSigma}} function for a more accurate estimate +} +\item{alpha}{ +Significance level for confidence intervals (target is miscoverage alpha/2 in each tail) +} +\item{k}{ +See "type" argument below. Default is NULL, in which case k is taken to be the +the number of steps computed in the forward stepwise path +} +\item{type}{ +Type of analysis desired: with "active" (default), p-values and confidence intervals are +computed for each predictor as it is entered into the active step, all the way through +k steps; with "all", p-values and confidence intervals are computed for all variables in +the active model after k steps; with "aic", the number of steps k is first estimated using +a modified AIC criterion, and then the same type of analysis as in "all" is carried out for +this particular value of k. + +Note that the AIC scheme is defined to choose a number of steps k after which the AIC criterion +increases \code{ntimes} in a row, where \code{ntimes} can be specified by the user (see below). +Under this definition, the AIC selection event is characterizable as a polyhedral set, and hence +the extra conditioning can be taken into account exactly. Also note that an analogous BIC scheme +can be specified through the \code{mult} argument (see below) +} +\item{gridrange}{ +Grid range for constructing confidence intervals, on the standardized scale +} +\item{bits}{ +Number of bits to be used for p-value and confidence interval calculations. Default is +NULL, in which case standard floating point calculations are performed. When not NULL, +multiple precision floating point calculations are performed with the specified number +of bits, using the R package \code{Rmpfr} (if this package is not installed, then a +warning is thrown, and standard floating point calculations are pursued). +Note: standard double precision uses 53 bits +so, e.g., a choice of 200 bits uses about 4 times double precision. The confidence +interval computation is sometimes numerically challenging, and the extra precision can be +helpful (though computationally more costly). In particular, extra precision might be tried +if the values in the output columns of \code{tailarea} differ noticeably from alpha/2. +} +\item{mult}{Multiplier for the AIC-style penalty. Hence a value of 2 (default) +gives AIC, whereas a value of log(n) would give BIC} +\item{ntimes}{Number of steps for which AIC-style criterion has to increase before +minimizing point is declared} +\item{verbose}{Print out progress along the way? Default is FALSE} +} + +\details{ +This function computes selective p-values and confidence intervals (selection intervals) +for forward stepwise regression. The default is to report the results for +each predictor after its entry into the model. See the "type" argument for other options. +The confidence interval construction involves numerical search and can be fragile: +if the observed statistic is too close to either end of the truncation interval +(vlo and vup, see references), then one or possibly both endpoints of the interval of +desired coverage cannot be computed, and default to +/- Inf. The output \code{tailarea} +gives the achieved Gaussian tail areas for the reported intervals---these should be close +to alpha/2, and can be used for error-checking purposes. +} + +\value{ +\item{type}{Type of analysis (active, all, or aic)} +\item{k}{Value of k specified in call} +\item{khat}{When type is "active", this is an estimated stopping point +declared by \code{\link{forwardStop}}; when type is "aic", this is the +value chosen by the modified AIC scheme} +\item{pv}{One sided P-values for active variables, uses the sign that a variable entered the model with.} +\item{ci}{Confidence intervals} +\item{tailarea}{Realized tail areas (lower and upper) for each confidence interval} +\item{vlo}{Lower truncation limits for statistics} +\item{vup}{Upper truncation limits for statistics} +\item{vmat}{Linear contrasts that define the observed statistics} +\item{y}{Vector of outcomes} +\item{vars}{Variables in active set} +\item{sign}{Signs of active coefficients} +\item{alpha}{Desired coverage (alpha/2 in each tail)} +\item{sigma}{Value of error standard deviation (sigma) used} +\item{call}{The call to fsInf} +} + +\references{ +Ryan Tibshirani, Jonathan Taylor, Richard Lockhart, and Rob Tibshirani (2014). +Exact post-selection inference for sequential regression procedures. arXiv:1401.3889. + +Joshua Loftus and Jonathan Taylor (2014). A significance test for forward stepwise +model selection. arXiv:1405.3920. +} + +\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} + +\seealso{\code{\link{fs}}} + +\examples{ +set.seed(33) +n = 50 +p = 10 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) + +# run forward stepwise +fsfit = fs(x,y) + +# compute sequential p-values and confidence intervals +# (sigma estimated from full model) +out.seq = fsInf(fsfit) +out.seq + +# compute p-values and confidence intervals after AIC stopping +out.aic = fsInf(fsfit,type="aic") +out.aic + +# compute p-values and confidence intervals after 5 fixed steps +out.fix = fsInf(fsfit,type="all",k=5) +out.fix +} diff --git a/selectiveInference-currentCRAN/man/groupfs.Rd b/selectiveInference-currentCRAN/man/groupfs.Rd new file mode 100644 index 0000000..a57c6dc --- /dev/null +++ b/selectiveInference-currentCRAN/man/groupfs.Rd @@ -0,0 +1,49 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/funs.groupfs.R +\name{groupfs} +\alias{groupfs} +\title{Select a model with forward stepwise.} +\usage{ +groupfs(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE, + center = TRUE, normalize = TRUE, aicstop = 0, verbose = FALSE) +} +\arguments{ +\item{x}{Matrix of predictors (n by p).} + +\item{y}{Vector of outcomes (length n).} + +\item{index}{Group membership indicator of length p. Check that \code{sort(unique(index)) = 1:G} where \code{G} is the number of distinct groups.} + +\item{maxsteps}{Maximum number of steps for forward stepwise.} + +\item{sigma}{Estimate of error standard deviation for use in AIC criterion. This determines the relative scale between RSS and the degrees of freedom penalty. Default is NULL corresponding to unknown sigma. When NULL, \code{link{groupfsInf}} performs truncated F inference instead of truncated \eqn{\chi}. See \code{\link[stats]{extractAIC}} for details on the AIC criterion.} + +\item{k}{Multiplier of model size penalty, the default is \code{k = 2} for AIC. Use \code{k = log(n)} for BIC, or \code{k = 2log(p)} for RIC (best for high dimensions, when \eqn{p > n}). If \eqn{G < p} then RIC may be too restrictive and it would be better to use \code{log(G) < k < 2log(p)}.} + +\item{intercept}{Should an intercept be included in the model? Default is TRUE. Does not count as a step.} + +\item{center}{Should the columns of the design matrix be centered? Default is TRUE.} + +\item{normalize}{Should the design matrix be normalized? Default is TRUE.} + +\item{aicstop}{Early stopping if AIC increases. Default is 0 corresponding to no early stopping. Positive integer values specify the number of times the AIC is allowed to increase in a row, e.g. with \code{aicstop = 2} the algorithm will stop if the AIC criterion increases for 2 steps in a row. The default of \code{\link[stats]{step}} corresponds to \code{aicstop = 1}.} + +\item{verbose}{Print out progress along the way? Default is FALSE.} +} +\value{ +An object of class "groupfs" containing information about the sequence of models in the forward stepwise algorithm. Call the function \code{\link{groupfsInf}} on this object to compute selective p-values. +} +\description{ +This function implements forward selection of linear models almost identically to \code{\link[stats]{step}} with \code{direction = "forward"}. The reason this is a separate function from \code{\link{fs}} is that groups of variables (e.g. dummies encoding levels of a categorical variable) must be handled differently in the selective inference framework. +} +\examples{ +x = matrix(rnorm(20*40), nrow=20) +index = sort(rep(1:20, 2)) +y = rnorm(20) + 2 * x[,1] - x[,4] +fit = groupfs(x, y, index, maxsteps = 5) +pvals = groupfsInf(fit) +} +\seealso{ +\code{\link{groupfsInf}}, \code{\link{factorDesign}}. +} + diff --git a/selectiveInference-currentCRAN/man/groupfsInf.Rd b/selectiveInference-currentCRAN/man/groupfsInf.Rd new file mode 100644 index 0000000..74b9a5e --- /dev/null +++ b/selectiveInference-currentCRAN/man/groupfsInf.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/funs.groupfs.R +\name{groupfsInf} +\alias{groupfsInf} +\title{Compute selective p-values for a model fitted by \code{groupfs}.} +\usage{ +groupfsInf(obj, sigma = NULL, verbose = TRUE) +} +\arguments{ +\item{obj}{Object returned by \code{\link{groupfs}} function} + +\item{sigma}{Estimate of error standard deviation. Default is NULL and in this case groupfsInf uses the value of sigma specified to \code{\link{groupfs}}.} + +\item{verbose}{Print out progress along the way? Default is TRUE.} +} +\value{ +An object of class "groupfsInf" containing selective p-values for the fitted model \code{obj}. For comparison with \code{\link{fsInf}}, note that the option \code{type = "active"} is not available. + +\describe{ + \item{vars}{Labels of the active groups in the order they were included.} + \item{pv}{Selective p-values computed from appropriate truncated distributions.} + \item{sigma}{Estimate of error variance used in computing p-values.} + \item{TC or TF}{Observed value of truncated \eqn{\chi} or \eqn{F}.} + \item{df}{Rank of group of variables when it was added to the model.} + \item{support}{List of intervals defining the truncation region of the corresponding statistic.} +} +} +\description{ +Computes p-values for each group of variables in a model fitted by \code{\link{groupfs}}. These p-values adjust for selection by truncating the usual \eqn{\chi^2} statistics to the regions implied by the model selection event. If the \code{sigma} to \code{\link{groupfs}} was NULL then groupfsInf uses truncated \eqn{F} statistics instead of truncated \eqn{\chi}. The \code{sigma} argument to groupfsInf allows users to override and use \eqn{\chi}, but this is not recommended unless \eqn{\sigma} can be estimated well (i.e. \eqn{n > p}). +} + diff --git a/selectiveInference-currentCRAN/man/lar.Rd b/selectiveInference-currentCRAN/man/lar.Rd new file mode 100644 index 0000000..590a663 --- /dev/null +++ b/selectiveInference-currentCRAN/man/lar.Rd @@ -0,0 +1,95 @@ +\name{lar} +\alias{lar} +\title{ +Least angle regression +} +\description{ +This function implements least angle regression, for use in the +selectiveInference package +} +\usage{ +lar(x, y, maxsteps=2000, minlam=0, intercept=TRUE, normalize=TRUE, + verbose=FALSE) +} + +\arguments{ + \item{x}{ +Matrix of predictors (n by p) +} + \item{y}{ +Vector of outcomes (length n) +} + \item{maxsteps}{ +Maximum number of steps to take +} +\item{minlam}{ +Minimum value of lambda to consider +} +\item{intercept}{Should an intercept be included on the model? Default is TRUE} +\item{normalize}{Should the predictors be normalized? Default is TRUE} +\item{verbose}{Print out progress along the way? Default is FALSE} +} + +\details{ +The least angle regression algorithm is described in detail by Efron et al. (2002). +This function should match (in terms of its output) that from the \code{lars} package, +but returns additional information (namely, the polyhedral constraints) needed for the +selective inference calculations. +} + +\value{ +\item{lambda}{Values of lambda (knots) visited along the path} +\item{action}{Vector of predictors in order of entry} +\item{sign}{Signs of coefficients of predictors, upon entry} +\item{df}{Degrees of freedom of each active model} +\item{beta}{Matrix of regression coefficients for each model along the path, +one model per column} +\item{completepath}{Was the complete stepwise path computed?} +\item{bls}{If completepath is TRUE, the full least squares coefficients} +\item{Gamma}{Matrix that captures the polyhedral selection at each step} +\item{nk}{Number of polyhedral constraints at each step in path} +\item{vreg}{Matrix of linear contrasts that gives coefficients of variables +to enter along the path} +\item{mp}{Value of M+ (for internal use with the spacing test)} +\item{x}{Matrix of predictors used} +\item{y}{Vector of outcomes used} +\item{bx}{Vector of column means of original x} +\item{by}{Mean of original y} +\item{sx}{Norm of each column of original x} +\item{intercept}{Was an intercept included?} +\item{normalize}{Were the predictors normalized?} +\item{call}{The call to lar} +} + +\references{ +Brad Efron, Trevor Hastie, Iain Johnstone, and Rob Tibshirani (2002). +Least angle regression. Annals of Statistics (with discussion). + +See also the descriptions in Trevor Hastie, Rob Tibshirani, and +Jerome Friedman (2002, 2009). Elements of Statistical Learning. +} + +\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Max G'Sell, Joshua Loftus, Stephen Reid} + +\seealso{ + \code{\link{larInf}}, \code{\link{predict.lar}}, \code{\link{coef.lar}}, \code{\link{plot.lar}} +} + +\examples{ +set.seed(43) +n = 50 +p = 10 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) + +# run LAR, plot results +larfit = lar(x,y) +plot(larfit) + +# compute sequential p-values and confidence intervals +# (sigma estimated from full model) +out = larInf(larfit) +out +} diff --git a/selectiveInference-currentCRAN/man/larInf.Rd b/selectiveInference-currentCRAN/man/larInf.Rd new file mode 100644 index 0000000..8e3b2d0 --- /dev/null +++ b/selectiveInference-currentCRAN/man/larInf.Rd @@ -0,0 +1,140 @@ +\name{larInf} +\alias{larInf} +\title{ +Selective inference for least angle regression +} +\description{ +Computes p-values and confidence intervals for least +angle regression +} +\usage{ +larInf(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","aic"), + gridrange=c(-100,100), bits=NULL, mult=2, ntimes=2, verbose=FALSE) +} +\arguments{ + \item{obj}{ +Object returned by \code{lar} function (not the \code{lars} function!) +} +\item{sigma}{ +Estimate of error standard deviation. If NULL (default), this is estimated +using the mean squared residual of the full least squares fit when n >= 2p, and +using the standard deviation of y when n < 2p. In the latter case, the user +should use \code{\link{estimateSigma}} function for a more accurate estimate +} +\item{alpha}{ +Significance level for confidence intervals (target is miscoverage alpha/2 in each tail) +} +\item{k}{ +See "type" argument below. Default is NULL, in which case k is taken to be the +the number of steps computed in the least angle regression path +} +\item{type}{ +Type of analysis desired: with "active" (default), p-values and confidence intervals are +computed for each predictor as it is entered into the active step, all the way through +k steps; with "all", p-values and confidence intervals are computed for all variables in +the active model after k steps; with "aic", the number of steps k is first estimated using +a modified AIC criterion, and then the same type of analysis as in "all" is carried out for +this particular value of k. + +Note that the AIC scheme is defined to choose a number of steps k after which the AIC criterion +increases \code{ntimes} in a row, where \code{ntimes} can be specified by the user (see below). +Under this definition, the AIC selection event is characterizable as a polyhedral set, and hence +the extra conditioning can be taken into account exactly. Also note that an analogous BIC scheme +can be specified through the \code{mult} argument (see below) +} +\item{gridrange}{ +Grid range for constructing confidence intervals, on the standardized scale +} +\item{bits}{ +Number of bits to be used for p-value and confidence interval calculations. Default is +NULL, in which case standard floating point calculations are performed. When not NULL, +multiple precision floating point calculations are performed with the specified number +of bits, using the R package \code{Rmpfr} (if this package is not installed, then a +warning is thrown, and standard floating point calculations are pursued). +Note: standard double precision uses 53 bits +so, e.g., a choice of 200 bits uses about 4 times double precision. The confidence +interval computation is sometimes numerically challenging, and the extra precision can be +helpful (though computationally more costly). In particular, extra precision might be tried +if the values in the output columns of \code{tailarea} differ noticeably from alpha/2. +} +\item{mult}{Multiplier for the AIC-style penalty. Hence a value of 2 (default) +gives AIC, whereas a value of log(n) would give BIC} +\item{ntimes}{Number of steps for which AIC-style criterion has to increase before +minimizing point is declared} +\item{verbose}{Print out progress along the way? Default is FALSE} +} + +\details{ +This function computes selective p-values and confidence intervals (selection intervals) +for least angle regression. The default is to report the results for +each predictor after its entry into the model. See the "type" argument for other options. +The confidence interval construction involves numerical search and can be fragile: +if the observed statistic is too close to either end of the truncation interval +(vlo and vup, see references), then one or possibly both endpoints of the interval of +desired coverage cannot be computed, and default to +/- Inf. The output \code{tailarea} +gives the achieved Gaussian tail areas for the reported intervals---these should be close +to alpha/2, and can be used for error-checking purposes. +} + +\value{ + +\item{type}{Type of analysis (active, all, or aic)} +\item{k}{Value of k specified in call} +\item{khat}{When type is "active", this is an estimated stopping point +declared by \code{\link{forwardStop}}; when type is "aic", this is the +value chosen by the modified AIC scheme} +\item{pv}{P-values for active variables} +\item{ci}{Confidence intervals} +\item{tailarea}{Realized tail areas (lower and upper) for each confidence interval} +\item{vlo}{Lower truncation limits for statistics} +\item{vup}{Upper truncation limits for statistics} +\item{vmat}{Linear contrasts that define the observed statistics} +\item{y}{Vector of outcomes} +\item{pv.spacing}{P-values from the spacing test (here M+ is used)} +\item{pv.modspac}{P-values from the modified form of the spacing test +(here M+ is replaced by the next knot)} +\item{pv.covtest}{P-values from covariance test} +\item{vars}{Variables in active set} +\item{sign}{Signs of active coefficients} +\item{alpha}{Desired coverage (alpha/2 in each tail)} +\item{sigma}{Value of error standard deviation (sigma) used} +\item{call}{The call to larInf} +} + +\references{ +Ryan Tibshirani, Jonathan Taylor, Richard Lockhart, and Rob Tibshirani (2014). +Exact post-selection inference for sequential regression procedures. arXiv:1401.3889. +} + +\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} + +\seealso{ + \code{\link{lar}} +} + +\examples{ +set.seed(43) +n = 50 +p = 10 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) + +# run LAR +larfit = lar(x,y) + +# compute sequential p-values and confidence intervals +# (sigma estimated from full model) +out.seq = larInf(larfit) +out.seq + +# compute p-values and confidence intervals after AIC stopping +out.aic = larInf(larfit,type="aic") +out.aic + +# compute p-values and confidence intervals after 5 fixed steps +out.fix = larInf(larfit,type="all",k=5) +out.fix +} + diff --git a/selectiveInference-currentCRAN/man/manyMeans.Rd b/selectiveInference-currentCRAN/man/manyMeans.Rd new file mode 100644 index 0000000..57fc429 --- /dev/null +++ b/selectiveInference-currentCRAN/man/manyMeans.Rd @@ -0,0 +1,60 @@ +\name{manyMeans} +\alias{manyMeans} +\title{ +Selective inference for many normal means +} +\description{ +Computes p-values and confidence intervals for the largest k +among many normal means +} +\usage{ +manyMeans(y, alpha=0.1, bh.q=NULL, k=NULL, sigma=1, verbose=FALSE) +} + +\arguments{ + \item{y}{Vector of outcomes (length n)} +\item{alpha}{ +Significance level for confidence intervals (target is miscoverage alpha/2 in each tail) +} +\item{bh.q}{q parameter for BH(q) procedure} +\item{k}{Number of means to consider} +\item{sigma}{Estimate of error standard deviation} +\item{verbose}{Print out progress along the way? Default is FALSE} +} +\details{ +This function compute p-values and confidence intervals for the largest k +among many normal means. One can specify a fixed number of means k to consider, +or choose the number to consider via the BH rule. +} + +\value{ +\item{mu.hat}{ Vector of length n containing the estimated signal sizes. +If a sample element is not selected, then its signal size estimate is 0} +\item{selected.set}{Indices of the vector y of the sample elements that +were selected by the procedure (either BH(q) or top-K). Labelled "Selind" in output table.} +\item{pv}{P-values for selected signals} +\item{ci}{Confidence intervals} +\item{method}{Method used to choose number of means} +\item{sigma}{Value of error standard deviation (sigma) used} +\item{bh.q}{BH q-value used} +\item{k}{Desired number of means} +\item{threshold}{Computed cutoff} +\item{call}{The call to manyMeans} +} + +\references{ +Stephen Reid, Jonathan Taylor, and Rob Tibshirani (2014). +Post-selection point and interval estimation of signal sizes in Gaussian samples. +arXiv:1405.3340. +} + +\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} + +\examples{ +set.seed(12345) +n = 100 +mu = c(rep(3,floor(n/5)), rep(0,n-floor(n/5))) +y = mu + rnorm(n) +out = manyMeans(y, bh.q=0.1) +out +} diff --git a/selectiveInference-currentCRAN/man/plot.fs.Rd b/selectiveInference-currentCRAN/man/plot.fs.Rd new file mode 100644 index 0000000..4f77013 --- /dev/null +++ b/selectiveInference-currentCRAN/man/plot.fs.Rd @@ -0,0 +1,42 @@ +\name{plot.fs} +\alias{plot.fs} + +\title{ +Plot function for forward stepwise regression +} +\description{ +Plot coefficient profiles along the forward stepwise path +} + +\usage{ +\method{plot}{fs} (x, breaks=TRUE, omit.zeros=TRUE, var.labels=TRUE, ...) +} + +\arguments{ +\item{x}{ +Object returned by a call to \code{fs} function +} +\item{breaks}{Should vertical lines be drawn at each break point in the piecewise +linear coefficient paths? Default is TRUE} +\item{omit.zeros}{Should segments of the coefficients paths that are equal to +zero be omitted (to avoid clutter in the figure)? Default is TRUE} +\item{var.labels}{Should paths be labelled with corresponding variable numbers? +Default is TRUE} +\item{...}{Additional arguments for plotting} +} + +\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} + +\examples{ +set.seed(33) +n = 50 +p = 10 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) + +# run forward stepwise, plot results +fsfit = fs(x,y) +plot(fsfit) +} diff --git a/selectiveInference-currentCRAN/man/plot.lar.Rd b/selectiveInference-currentCRAN/man/plot.lar.Rd new file mode 100644 index 0000000..baa1195 --- /dev/null +++ b/selectiveInference-currentCRAN/man/plot.lar.Rd @@ -0,0 +1,46 @@ +\name{plot.lar} +\alias{plot.lar} + +\title{ +Plot function for least angle regression +} +\description{ +Plot coefficient profiles along the LAR path +} + +\usage{ +\method{plot}{lar}(x, xvar=c("norm","step","lambda"), breaks=TRUE, + omit.zeros=TRUE, var.labels=TRUE, ...) +} + +\arguments{ +\item{x}{ +Object returned by a call to \code{lar} function +(not the \code{lars} function!) +} +\item{xvar}{Either "norm" or "step" or "lambda", determining what is plotted +on the x-axis} +\item{breaks}{Should vertical lines be drawn at each break point in the piecewise +linear coefficient paths? Default is TRUE} +\item{omit.zeros}{Should segments of the coefficients paths that are equal to +zero be omitted (to avoid clutter in the figure)? Default is TRUE} +\item{var.labels}{Should paths be labelled with corresponding variable numbers? +Default is TRUE} +\item{...}{Additional arguments for plotting} +} + +\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} + +\examples{ +set.seed(43) +n = 50 +p = 10 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) + +# run LAR, plot results +larfit = lar(x,y) +plot(larfit) +} diff --git a/selectiveInference-currentCRAN/man/predict.fs.Rd b/selectiveInference-currentCRAN/man/predict.fs.Rd new file mode 100644 index 0000000..5e50482 --- /dev/null +++ b/selectiveInference-currentCRAN/man/predict.fs.Rd @@ -0,0 +1,49 @@ +\name{predict.fs} +\alias{predict.fs} +\alias{coef.fs} + +\title{ +Prediction and coefficient functions for forward stepwise +regression +} +\description{ +Make predictions or extract coefficients from a forward stepwise object +} +\usage{ +\method{predict}{fs}(object, newx, s, ...) +\method{coef}{fs}(object, s, ...) +} + +\arguments{ +\item{object}{ +Object returned by a call to \code{fs} function +} +\item{newx}{ +Matrix of x values at which the predictions are desired. If NULL, +the x values from forward stepwise fitting are used +} +\item{s}{ +Step number(s) at which predictions or coefficients are desired +} +\item{\dots}{Additional arguments} +} + +\value{ +Either a vector/matrix of predictions, or a vector/matrix of coefficients. +} + +\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} + +\examples{ +set.seed(33) +n = 200 +p = 20 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(rep(3,10),rep(0,p-10)) +y = x\%*\%beta + sigma*rnorm(n) + +# run forward stepwise and predict functions +obj = fs(x,y) +fit = predict(obj,x,s=3) +} diff --git a/selectiveInference-currentCRAN/man/predict.groupfs.Rd b/selectiveInference-currentCRAN/man/predict.groupfs.Rd new file mode 100644 index 0000000..4a382c7 --- /dev/null +++ b/selectiveInference-currentCRAN/man/predict.groupfs.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/funs.groupfs.R +\name{predict.groupfs} +\alias{predict.groupfs} +\title{Prediction and coefficient functions for \code{\link{groupfs}}. + +Make predictions or extract coefficients from a groupfs forward stepwise object.} +\usage{ +\method{predict}{groupfs}(object, newx) +} +\arguments{ +\item{object}{Object returned by a call to \code{\link{groupfs}}.} + +\item{newx}{Matrix of x values at which the predictions are desired. If NULL, the x values from groupfs fitting are used.} +} +\value{ +A vector of predictions or a vector of coefficients. +} +\description{ +Prediction and coefficient functions for \code{\link{groupfs}}. + +Make predictions or extract coefficients from a groupfs forward stepwise object. +} + diff --git a/selectiveInference-currentCRAN/man/predict.lar.Rd b/selectiveInference-currentCRAN/man/predict.lar.Rd new file mode 100644 index 0000000..c91bed3 --- /dev/null +++ b/selectiveInference-currentCRAN/man/predict.lar.Rd @@ -0,0 +1,52 @@ +\name{predict.lar} +\alias{predict.lar} +\alias{coef.lar} + +\title{ +Prediction and coefficient functions for least angle regression +} +\description{ +Make predictions or extract coefficients from a least angle regression object +} +\usage{ +\method{predict}{lar}(object, newx, s, mode=c("step","lambda"), ...) +\method{coef}{lar}(object, s, mode=c("step","lambda"), ...) +} + +\arguments{ +\item{object}{ +Object returned by a call to \code{lar} function +(not the \code{lars} function!) +} +\item{newx}{ +Matrix of x values at which the predictions are desired. If NULL, +the x values from least angle regression fitting are used +} +\item{s}{ +Step number(s) or lambda value(s) at which predictions or coefficients +are desired +} +\item{mode}{Either "step" or "lambda", determining the role of s (above)} + +\item{\dots}{Additional arguments} +} + +\value{ +Either a vector/matrix of predictions, or a vector/matrix of coefficients. +} + +\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} + +\examples{ +set.seed(33) +n = 200 +p = 20 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(rep(3,10),rep(0,p-10)) +y = x\%*\%beta + sigma*rnorm(n) + +# run lar and predict functions +obj = lar(x,y) +fit = predict(obj,x,s=3) +} diff --git a/selectiveInference-currentCRAN/man/scaleGroups.Rd b/selectiveInference-currentCRAN/man/scaleGroups.Rd new file mode 100644 index 0000000..e5a93fa --- /dev/null +++ b/selectiveInference-currentCRAN/man/scaleGroups.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/funs.groupfs.R +\name{scaleGroups} +\alias{scaleGroups} +\title{Center and scale design matrix by groups} +\usage{ +scaleGroups(x, index, center = TRUE, normalize = TRUE) +} +\arguments{ +\item{x}{Design matrix.} + +\item{index}{Group membership indicator of length p.} + +\item{center}{Center groups, default is TRUE.} + +\item{normalize}{Scale groups by Frobenius norm, default is TRUE.} +} +\value{ +\describe{ + \item{x}{Optionally centered/scaled design matrix.} + \item{xm}{Means of groups in original design matrix.} + \item{xs}{Frobenius norms of groups in original design matrix.} +} +} +\description{ +For internal use by \code{\link{groupfs}}. +} + diff --git a/selectiveInference-currentCRAN/man/selectiveInference-internal.Rd b/selectiveInference-currentCRAN/man/selectiveInference-internal.Rd new file mode 100644 index 0000000..4808173 --- /dev/null +++ b/selectiveInference-currentCRAN/man/selectiveInference-internal.Rd @@ -0,0 +1,24 @@ +\name{selectiveInference-internal} +\title{Internal PMA functions} +\alias{print.fixedLassoInf} + \alias{print.fs} +\alias{print.fsInf} +\alias{print.larInf} +\alias{print.lar} + \alias{print.manyMeans} + + + +\description{Internal selectiveInference functions} +\usage{ +\method{print}{fs}(x, ...) +\method{print}{fsInf}(x, tailarea = TRUE, ...) +\method{print}{lar}(x,...) +\method{print}{larInf}(x, tailarea = TRUE, ...) +\method{print}{fixedLassoInf}(x, tailarea = TRUE, ...) +\method{print}{manyMeans}(x,...) + +} +\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} +\keyword{internal} + diff --git a/selectiveInference-currentCRAN/man/selectiveInference.Rd b/selectiveInference-currentCRAN/man/selectiveInference.Rd new file mode 100644 index 0000000..6e038d4 --- /dev/null +++ b/selectiveInference-currentCRAN/man/selectiveInference.Rd @@ -0,0 +1,205 @@ +\name{selectiveInference} +\alias{selectiveInference} +\docType{package} +\title{ +Tools for selective inference +} +\description{ +Functions to perform post-selection inference for forward +stepwise regression, least angle regression, the lasso and the +many normal means problem. The lasso function also supports logistic regression and the Cox model. +} +\details{ +\tabular{ll}{ +Package: \tab selectiveInference\cr +Type: \tab Package\cr +License: \tab GPL-2\cr +} + +This package provides tools for inference after selection, in forward stepwise +regression, least angle regression, the lasso, and the many normal means problem. +The functions compute p-values and selection intervals that properly account for +the inherent selection carried out by the procedure. These have exact finite sample +type I error and coverage under Gaussian errors. For the logistic and Cox familes (fixedLassoInf), + the coverage is asymptotically valid + +This R package was developed as part of the selective inference software project +in Python and R: + +\url{https://github.com/selective-inference} + +Some of the R code in this work is a modification of Python code from this +repository. Here is the current selective inference software team: + +Yuval Benjamini, +Leonard Blier, +Will Fithian, +Jason Lee, +Joshua Loftus, +Joshua Loftus, Stephen Reid, +Dennis Sun, +Yuekai Sun, +Jonathan Taylor, +Xiaoying Tian, +Ryan Tibshirani, +Rob Tibshirani + +The main functions included in the package are: +\code{\link{fs}}, +\code{\link{fsInf}}, +\code{\link{lar}}, +\code{\link{larInf}}, +\code{\link{fixedLassoInf}}, +\code{\link{manyMeans}} +} + +\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid + +Maintainer: Rob Tibshirani +} + +\references{ +Ryan Tibshirani, Jonathan Taylor, Richard Lockhart, and Rob Tibshirani (2014). +Exact post-selection inference for sequential regression procedures. arXiv:1401.3889. + +Jason Lee, Dennis Sun, Yuekai Sun, and Jonathan Taylor (2013). +Exact post-selection inference, with application to the lasso. arXiv:1311.6238. + +Stephen Reid, Jonathan Taylor, and Rob Tibshirani (2014). +Post-selection point and interval estimation of signal sizes in Gaussian samples. +arXiv:1405.3340. + + +Jonathan Taylor and Robert Tibshirani (2016) Post-selection inference for L1-penalized likelihood models. +arXiv:1602.07358 + +} + +\examples{ +set.seed(33) +n = 50 +p = 10 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) + +# run forward stepwise +fsfit = fs(x,y) + +# compute sequential p-values and confidence intervals +# (sigma estimated from full model) +out.seq = fsInf(fsfit) +out.seq + +# compute p-values and confidence intervals after AIC stopping +out.aic = fsInf(fsfit,type="aic") +out.aic + +# compute p-values and confidence intervals after 5 fixed steps +out.fix = fsInf(fsfit,type="all",k=5) +out.fix + +## NOT RUN---lasso at fixed lambda- Gaussian family +## first run glmnet +# gfit = glmnet(x,y) + +## extract coef for a given lambda; note the 1/n factor! +## (and we don't save the intercept term) +# lambda = .1 +# beta = coef(gfit, s=lambda/n, exact=TRUE)[-1] + +## compute fixed lambda p-values and selection intervals +# out = fixedLassoInf(x,y,beta,lambda,sigma=sigma) +# out + + +#lasso at fixed lambda- logistic family +#set.seed(43) + # n = 50 + # p = 10 + # sigma = 1 + + # x = matrix(rnorm(n*p),n,p) + x=scale(x,TRUE,TRUE) + # +# beta = c(3,2,rep(0,p-2)) + # y = x%*%beta + sigma*rnorm(n) + # y=1*(y>mean(y)) + # first run glmnet + # gfit = glmnet(x,y,standardize=FALSE,family="binomial") + + # extract coef for a given lambda; note the 1/n factor! + # (and here we DO include the intercept term) + # lambda = .8 + # beta = coef(gfit, s=lambda/n, exact=TRUE) + + # # compute fixed lambda p-values and selection intervals + # out = fixedLassoInf(x,y,beta,lambda,family="binomial") + # out + +##lasso at fixed lambda- Cox family +#set.seed(43) +# n = 50 + # p = 10 + # sigma = 1 + + # x = matrix(rnorm(n*p),n,p) + # x=scale(x,TRUE,TRUE) + + # beta = c(3,2,rep(0,p-2)) + # tim = as.vector(x\%*\%beta + sigma*rnorm(n)) + # tim= tim-min(tim)+1 +#status=sample(c(0,1),size=n,replace=T) + # first run glmnet + # gfit = glmnet(x,Surv(tim,status),standardize=FALSE,family="cox") + # extract coef for a given lambda; note the 1/n factor! + + # lambda = 1.5 + # beta = as.numeric(coef(gfit, s=lambda/n, exact=TRUE)) + + # compute fixed lambda p-values and selection intervals + # out = fixedLassoInf(x,tim,beta,lambda,status=status,family="cox") + # out +## NOT RUN---many normal means +# set.seed(12345) +# n = 100 +# mu = c(rep(3,floor(n/5)), rep(0,n-floor(n/5))) +# y = mu + rnorm(n) +# out = manyMeans(y, bh.q=0.1) +# out + +## NOT RUN---forward stepwise with groups +# set.seed(1) +# n = 20 +# p = 40 +# x = matrix(rnorm(n*p), nrow=n) +# index = sort(rep(1:(p/2), 2)) +# y = rnorm(n) + 2 * x[,1] - x[,4] +# fit = groupfs(x, y, index, maxsteps = 5) +# out = groupfsInf(fit) +# out + +## NOT RUN---estimation of sigma for use in fsInf +## (or larInf or fixedLassoInf) +# set.seed(33) +# n = 50 +# p = 10 +# sigma = 1 +# x = matrix(rnorm(n*p),n,p) +# beta = c(3,2,rep(0,p-2)) +# y = x\%*\%beta + sigma*rnorm(n) + +## run forward stepwise +# fsfit = fs(x,y) + +## estimate sigma +# sigmahat = estimateSigma(x,y)$sigmahat + +## run sequential inference with estimated sigma +# out = fsInf(fit,sigma=sigmahat) +# out +} +\keyword{ package } + + diff --git a/selectiveInference/src/matrixcomps.c b/selectiveInference-currentCRAN/src/matrixcomps.c similarity index 100% rename from selectiveInference/src/matrixcomps.c rename to selectiveInference-currentCRAN/src/matrixcomps.c diff --git a/selectiveInference-currentCRAN/src/selinf_init.c b/selectiveInference-currentCRAN/src/selinf_init.c new file mode 100644 index 0000000..3882861 --- /dev/null +++ b/selectiveInference-currentCRAN/src/selinf_init.c @@ -0,0 +1,24 @@ +#include +#include + +/* . entry points */ +extern void update1(double *Q2, double *w, int *mp, int *kp); +static R_NativePrimitiveArgType update1_t[] = { + REALSXP, REALSXP, INTSXP, INTSXP +}; + +extern void downdate1(double *Q1, double *R, int *j0p, int *mp, int *np); +static R_NativePrimitiveArgType downdate1_t[] = { + REALSXP, REALSXP, INTSXP, INTSXP, INTSXP +}; + +static const R_CMethodDef CEntries[] = { + {"update1", (DL_FUNC) &update1, 4}, + {"downdate1", (DL_FUNC) &downdate1, 5}, + {NULL, NULL, 0} +}; + +void R_init_cubature(DllInfo *dll) { + R_registerRoutines(dll, CEntries, NULL, NULL, NULL); + R_useDynamicSymbols(dll, FALSE); +} diff --git a/selectiveInference-currentCRAN/src/truncnorm.c b/selectiveInference-currentCRAN/src/truncnorm.c new file mode 100644 index 0000000..cca61d9 --- /dev/null +++ b/selectiveInference-currentCRAN/src/truncnorm.c @@ -0,0 +1,188 @@ +#include +#include + +// Take a Gibbs hit and run step along a given direction + +// Assumes the covariance is identity + +void gibbs_step(double *state, /* state has law N(0,I) constrained to polyhedral set \{y:Ay \leq b\}*/ + double *direction, /* direction we will take Gibbs step */ + double *U, /* A %*% state - b */ + double *alpha, /* A %*% direction */ + int nconstraint, /* number of rows of A */ + int nstate) /* dimension of state */ +{ + + int istate; + double value = 0; + + /* Compute V=\eta^Ty */ + + for (istate = 0; istate < nstate; istate++) { + value += direction[istate] * state[istate]; + } + + /* Compute upper and lower bounds */ + + double lower_bound = -1e12; + double upper_bound = 1e12; + double bound_val = 0; + double tol=1.e-7; + int iconstraint; + + for (iconstraint = 0; iconstraint < nconstraint; iconstraint++) { + + bound_val = -U[iconstraint] / alpha[iconstraint] + value; + + if ((alpha[iconstraint] > tol) && + (bound_val < upper_bound)) { + upper_bound = bound_val; + } + else if ((alpha[iconstraint] < -tol) && + (bound_val > lower_bound)) { + lower_bound = bound_val; + } + + } + + /* Ensure constraints are satisfied */ + + if (lower_bound > value) { + lower_bound = value - tol; + } + else if (upper_bound < value) { + upper_bound = value + tol; + } + + /* Check to see if constraints are satisfied */ + + /* if (lower_bound > upper_bound) { + + }*/ + + /* Now, take a step */ + + double tnorm; /* the 1D gaussian variable */ + double cdfU, cdfL, unif; /* temp variables */ + + if (upper_bound < -10) { + + /* use Exp approximation */ + /* the approximation is that */ + /* Z | lower_bound < Z < upper_bound */ + /* is fabs(upper_bound) * (upper_bound - Z) = E approx Exp(1) */ + /* so Z = upper_bound - E / fabs(upper_bound) */ + /* and the truncation of the exponential is */ + /* E < fabs(upper_bound - lower_bound) * fabs(upper_bound) = D */ + + /* this has distribution function (1 - exp(-x)) / (1 - exp(-D)) */ + /* so to draw from this distribution */ + /* we set E = - log(1 - U * (1 - exp(-D))) where U is Unif(0,1) */ + /* and Z (= tnorm below) is as stated */ + + unif = runif(0., 1.) * (1 - exp(-fabs((lower_bound - upper_bound) * upper_bound))); + tnorm = (upper_bound + log(1 - unif) / fabs(upper_bound)); + } + else if (lower_bound > 10) { + + /* here Z = lower_bound + E / fabs(lower_bound) (though lower_bound is positive) */ + /* and D = fabs((upper_bound - lower_bound) * lower_bound) */ + + unif = runif(0., 1.) * (1 - exp(-fabs((upper_bound - lower_bound) * lower_bound))); + tnorm = (lower_bound - log(1 - unif) / lower_bound); + } + else if (lower_bound < 0) { + cdfL = pnorm(lower_bound, 0., 1., 1, 0); + cdfU = pnorm(upper_bound, 0., 1., 1, 0); + unif = runif(0., 1.) * (cdfU - cdfL) + cdfL; + if (unif < 0.5) { + tnorm = qnorm(unif, 0., 1., 1, 0); + } + else { + tnorm = -qnorm(1-unif, 0., 1., 1, 0); + } + } + else { + cdfL = pnorm(-lower_bound, 0., 1., 1, 0); + cdfU = pnorm(-upper_bound, 0., 1., 1, 0); + unif = runif(0., 1.) * (cdfL - cdfU) + cdfU; + if (unif < 0.5) { + tnorm = -qnorm(unif, 0., 1., 1, 0); + } + else { + tnorm = qnorm(1-unif, 0., 1., 1, 0); + } + } + + /* Now update the state and U */ + + double delta = tnorm - value; + + for (istate = 0; istate < nstate; istate++) { + state[istate] += delta * direction[istate]; + } + for (iconstraint = 0; iconstraint < nconstraint; iconstraint++) { + U[iconstraint] += delta * alpha[iconstraint] ; + } + + /* End of gibbs_step */ + +} + +void sample_truncnorm_white(double *state, /* state has law N(0,I) constrained to polyhedral set \{y:Ay \leq b\}*/ + double *U, /* A %*% state - b */ + double *directions, /* possible steps for sampler to take */ + /* assumed to be stored as list of columns of dimension nstate */ + /* has shape (nstate, ndirection) */ + double *alphas, /* The matrix A %*% directions */ + /* has shape (nconstraint, ndirection) */ + double *output, /* array in which to store samples */ + /* assumed will stored as list of vectors of dimension nstate */ + /* has shape (nstate, ndraw) */ + int *pnconstraint, /* number of rows of A */ + int *pndirection, /* the possible number of directions to choose from */ + /* `directions` should have size nstate*ndirection */ + int *pnstate, /* dimension of state */ + int *pburnin, /* number of burnin steps */ + int *pndraw) /* total number of samples to return */ +{ + + int iter_count; + int which_direction; + + int nconstraint = *pnconstraint; + int ndirection = *pndirection; + int nstate = *pnstate; + int burnin = *pburnin; + int ndraw = *pndraw; + + double *direction, *alpha; + + for (iter_count = 0; iter_count < burnin + ndraw; iter_count++) { + + which_direction = (int) floor(runif(0., 1.) * ndirection); + direction = ((double *) directions) + nstate * which_direction; + alpha = ((double *) alphas) + nconstraint * which_direction; + + /* take a step, which implicitly updates `state` and `U` */ + + gibbs_step(state, + direction, + U, + alpha, + nconstraint, + nstate); + + /* Store result if after burnin */ + + int istate; + if (iter_count >= burnin) { + for (istate = 0; istate < nstate; istate++) { + *output = state[istate]; + output++; + } + } + } + +} + diff --git a/selectiveInference/DESCRIPTION b/selectiveInference/DESCRIPTION index df66eb5..c765663 100644 --- a/selectiveInference/DESCRIPTION +++ b/selectiveInference/DESCRIPTION @@ -1,14 +1,23 @@ Package: selectiveInference Type: Package -Title: Tools for Selective Inference -Version: 1.1.1 -Date: 2015-09-01 -Author: Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, - Joshua Loftus, Stephen Reid +Title: Tools for Post-Selection Inference +Version: 1.2.2 +Date: 2016-07-3 +Author: Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, + Joshua Loftus, Stephen Reid Maintainer: Rob Tibshirani -Depends: glmnet, intervals -Suggests: Rmpfr -Description: New tools for inference after selection, for use - with forward stepwise regression, least angle regression, the - lasso, and the many means problem. +Depends: + glmnet, + intervals, + survival, + adaptMCMC, +Suggests: + Rmpfr +Description: New tools for post-selection inference, for use with forward + stepwise regression, least angle regression, the lasso, and the many means + problem. The lasso function implements Gaussian, logistic and Cox survival + models. License: GPL-2 +RoxygenNote: 5.0.1 +LinkingTo: Rcpp +Imports: Rcpp diff --git a/selectiveInference/NAMESPACE b/selectiveInference/NAMESPACE index c6e2d09..f6854c5 100644 --- a/selectiveInference/NAMESPACE +++ b/selectiveInference/NAMESPACE @@ -8,10 +8,15 @@ export(lar,fs, fixedLassoInf,print.fixedLassoInf, forwardStop, estimateSigma, - estimateLambda, manyMeans,print.manyMeans, groupfs,groupfsInf, - scaleGroups,factorDesign + scaleGroups,factorDesign, + TG.pvalue, + TG.limits, + TG.interval, + debiasingMatrix, + randomizedLasso, + randomizedLassoInf ) S3method("coef", "lar") @@ -25,6 +30,8 @@ S3method("print", "fs") S3method("plot", "fs") S3method("print", "fsInf") S3method("print", "fixedLassoInf") +S3method("print", "fixedLogitLassoInf") +S3method("print", "fixedCoxLassoInf") S3method("print", "manyMeans") S3method("print", "groupfs") S3method("print", "groupfsInf") @@ -32,7 +39,12 @@ S3method("print", "groupfsInf") useDynLib("selectiveInference") import(glmnet) import(intervals) -importFrom("graphics", "abline", "axis", "matplot") -importFrom("stats", "dnorm", "lsfit", "pexp", "pnorm", "predict", - "qnorm", "rnorm", "sd", "uniroot", "dchisq", "model.matrix", "pchisq") - +import(survival) +importFrom("graphics", abline, axis, matplot) +importFrom("stats", dnorm, lsfit, pexp, pnorm, predict, + qnorm, rnorm, sd, uniroot, dchisq, model.matrix, pchisq, resid) +importFrom("stats", "coef", "df", "lm", "pf") +importFrom("stats", "glm", "residuals", "vcov") +importFrom("stats", "rbinom", "rexp") +importFrom("Rcpp", "sourceCpp") +importFrom("adaptMCMC", "MCMC") diff --git a/selectiveInference/R/funs.common.R b/selectiveInference/R/funs.common.R index 7a4b9aa..678d736 100644 --- a/selectiveInference/R/funs.common.R +++ b/selectiveInference/R/funs.common.R @@ -32,7 +32,7 @@ standardize <- function(x, y, intercept, normalize) { y = as.numeric(y) n = nrow(x) p = ncol(x) - + if (intercept) { bx = colMeans(x) by = mean(y) @@ -56,14 +56,14 @@ standardize <- function(x, y, intercept, normalize) { # Interpolation function to get coefficients -coef.interpolate <- function(betas, s, knots, dec=TRUE) { - # Sort the s values - o = order(s,dec=dec) +coef.interpolate <- function(beta, s, knots, decreasing=TRUE) { + # Sort the s values + o = order(s,decreasing=decreasing) s = s[o] - + k = length(s) mat = matrix(rep(knots,each=k),nrow=k) - if (dec) b = s >= mat + if (decreasing) b = s >= mat else b = s <= mat blo = max.col(b,ties.method="first") bhi = pmax(blo-1,1) @@ -72,8 +72,8 @@ coef.interpolate <- function(betas, s, knots, dec=TRUE) { p = numeric(k) p[i] = 0 p[!i] = ((s-knots[blo])/(knots[bhi]-knots[blo]))[!i] - - beta = t((1-p)*t(betas[,blo,drop=FALSE]) + p*t(betas[,bhi,drop=FALSE])) + + beta = t((1-p)*t(beta[,blo,drop=FALSE]) + p*t(beta[,bhi,drop=FALSE])) colnames(beta) = as.character(round(s,3)) rownames(beta) = NULL @@ -100,7 +100,7 @@ checkargs.misc <- function(sigma=NULL, alpha=NULL, k=NULL, mult=NULL, ntimes=NULL, beta=NULL, lambda=NULL, tol.beta=NULL, tol.kkt=NULL, bh.q=NULL) { - + if (!is.null(sigma) && sigma <= 0) stop("sigma must be > 0") if (!is.null(lambda) && lambda < 0) stop("lambda must be >= 0") if (!is.null(alpha) && (alpha <= 0 || alpha >= 1)) stop("alpha must be between 0 and 1") @@ -144,3 +144,37 @@ estimateSigma <- function(x, y, intercept=TRUE, standardize=TRUE) { return(list(sigmahat=sigma, df=nz)) } +# Update the QR factorization, after a column has been +# added. Here Q1 is m x n, Q2 is m x k, and R is n x n. + +updateQR <- function(Q1,Q2,R,col) { + m = nrow(Q1) + n = ncol(Q1) + k = ncol(Q2) + + a = update1_(as.matrix(Q2), t(Q2)%*%col, m, k) # Rcpp call + + Q2 = matrix(a$Q2,nrow=m) + w = c(t(Q1)%*%col,a$w) + + # Re-structure: delete a column from Q2, add one to + # Q1, and expand R + Q1 = cbind(Q1,Q2[,1]) + Q2 = Q2[,-1,drop=FALSE] + R = rbind(R,rep(0,n)) + R = cbind(R,w[Seq(1,n+1)]) + + return(list(Q1=Q1,Q2=Q2,R=R)) +} + +# Moore-Penrose pseudo inverse for symmetric matrices + +pinv <- function(A, tol=.Machine$double.eps) { + e = eigen(A) + v = Re(e$vec) + d = Re(e$val) + d[d > tol] = 1/d[d > tol] + d[d < tol] = 0 + if (length(d)==1) return(v*d*v) + else return(v %*% diag(d) %*% t(v)) +} diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index f3d99ea..5df84d3 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -2,156 +2,539 @@ # for the solution of # min 1/2 || y - \beta_0 - X \beta ||_2^2 + \lambda || \beta ||_1 -fixedLassoInf <- function(x, y, beta, lambda, intercept=TRUE, sigma=NULL, alpha=0.1, - type=c("partial","full"), tol.beta=1e-5, tol.kkt=0.1, - gridrange=c(-100,100), bits=NULL, verbose=FALSE) { - +fixedLassoInf <- function(x, y, beta, + lambda, family=c("gaussian","binomial","cox"), + intercept=TRUE, add.targets=NULL, status=NULL, + sigma=NULL, alpha=0.1, + type=c("partial", "full"), tol.beta=1e-5, tol.kkt=0.1, + gridrange=c(-100,100), bits=NULL, verbose=FALSE, + linesearch.try=10) { + + family = match.arg(family) this.call = match.call() type = match.arg(type) - checkargs.xy(x,y) - if (missing(beta) || is.null(beta)) stop("Must supply the solution beta") - if (missing(lambda) || is.null(lambda)) stop("Must supply the tuning parameter value lambda") - checkargs.misc(beta=beta,lambda=lambda,sigma=sigma,alpha=alpha, - gridrange=gridrange,tol.beta=tol.beta,tol.kkt=tol.kkt) - if (!is.null(bits) && !requireNamespace("Rmpfr",quietly=TRUE)) { - warning("Package Rmpfr is not installed, reverting to standard precision") - bits = NULL - } - n = nrow(x) - p = ncol(x) - beta = as.numeric(beta) - if (length(beta) != p) stop("beta must have length equal to ncol(x)") - - # If glmnet was run with an intercept term, center x and y - if (intercept==TRUE) { - obj = standardize(x,y,TRUE,FALSE) - x = obj$x - y = obj$y + if(family=="binomial") { + if(type!="partial") stop("Only type= partial allowed with binomial family") + out=fixedLogitLassoInf(x,y,beta,lambda,alpha=alpha, type="partial", tol.beta=tol.beta, tol.kkt=tol.kkt, + gridrange=gridrange, bits=bits, verbose=verbose,this.call=this.call) + return(out) + } + else if(family=="cox") { + if(type!="partial") stop("Only type= partial allowed with Cox family") + out=fixedCoxLassoInf(x,y,status,beta,lambda,alpha=alpha, type="partial",tol.beta=tol.beta, + tol.kkt=tol.kkt, gridrange=gridrange, bits=bits, verbose=verbose,this.call=this.call) + return(out) } - - # Check the KKT conditions - g = t(x)%*%(y-x%*%beta) / lambda - if (any(abs(g) > 1+tol.kkt * sqrt(sum(y^2)))) - warning(paste("Solution beta does not satisfy the KKT conditions", - "(to within specified tolerances)")) - - vars = which(abs(beta) > tol.beta / sqrt(colSums(x^2))) - if (any(sign(g[vars]) != sign(beta[vars]))) - warning(paste("Solution beta does not satisfy the KKT conditions", - "(to within specified tolerances). You might try rerunning", - "glmnet with a lower setting of the", - "'thresh' parameter, for a more accurate convergence.")) - # Get lasso polyhedral region, of form Gy >= u - out = fixedLasso.poly(x,y,beta,lambda,vars) - G = out$G - u = out$u - - # Check polyhedral region - tol.poly = 0.01 - if (min(G %*% y - u) < -tol.poly * sqrt(sum(y^2))) - stop(paste("Polyhedral constraints not satisfied; you must recompute beta", - "more accurately. With glmnet, make sure to use exact=TRUE in coef(),", - "and check whether the specified value of lambda is too small", - "(beyond the grid of values visited by glmnet).", - "You might also try rerunning glmnet with a lower setting of the", - "'thresh' parameter, for a more accurate convergence.")) - - # Estimate sigma - if (is.null(sigma)) { - if (n >= 2*p) { - oo = intercept - sigma = sqrt(sum(lsfit(x,y,intercept=oo)$res^2)/(n-p-oo)) + else{ + + checkargs.xy(x,y) + if (missing(beta) || is.null(beta)) stop("Must supply the solution beta") + if (missing(lambda) || is.null(lambda)) stop("Must supply the tuning parameter value lambda") + + n = nrow(x) + p = ncol(x) + beta = as.numeric(beta) + if (type == "full") { + if (p > n) { + # need intercept (if there is one) for debiased lasso + hbeta = beta + if (intercept == T) { + if (length(beta) != p + 1) { + stop("Since type='full', p > n, and intercept=TRUE, beta must have length equal to ncol(x)+1") + } + # remove intercept if included + beta = beta[-1] + } else if (length(beta) != p) { + stop("Since family='gaussian', type='full' and intercept=FALSE, beta must have length equal to ncol(x)") + } + } + } else if (length(beta) != p) { + stop("Since family='gaussian' and type='partial', beta must have length equal to ncol(x)") } + + checkargs.misc(beta=beta,lambda=lambda,sigma=sigma,alpha=alpha, + gridrange=gridrange,tol.beta=tol.beta,tol.kkt=tol.kkt) + if (!is.null(bits) && !requireNamespace("Rmpfr",quietly=TRUE)) { + warning("Package Rmpfr is not installed, reverting to standard precision") + bits = NULL + } + + if (!is.null(add.targets) && (!is.vector(add.targets) + || !all(is.numeric(add.targets)) || !all(add.targets==floor(add.targets)) + || !all(add.targets >= 1 && add.targets <= p))) { + stop("'add.targets' must be a vector of integers between 1 and p") + } + + # If glmnet was run with an intercept term, center x and y + if (intercept==TRUE) { + obj = standardize(x,y,TRUE,FALSE) + x = obj$x + y = obj$y + } + + # Check the KKT conditions + g = t(x)%*%(y-x%*%beta) / lambda + if (any(abs(g) > 1+tol.kkt * sqrt(sum(y^2)))) + warning(paste("Solution beta does not satisfy the KKT conditions", + "(to within specified tolerances)")) + + tol.coef = tol.beta * sqrt(n / colSums(x^2)) + vars = which(abs(beta) > tol.coef) + sign_vars = sign(beta[vars]) + + if(sum(vars)==0){ + cat("Empty model",fill=T) + return() + } + + if (any(sign(g[vars]) != sign_vars)) { + warning(paste("Solution beta does not satisfy the KKT conditions", + "(to within specified tolerances). You might try rerunning", + "glmnet with a lower setting of the", + "'thresh' parameter, for a more accurate convergence.")) + } + + # Get lasso polyhedral region, of form Gy >= u + + logical.vars=rep(FALSE,p) + logical.vars[vars]=TRUE + + if (type == 'full') { + out = fixedLassoPoly(x, y, lambda, beta, logical.vars, inactive=TRUE) + } else { - sigma = sd(y) - warning(paste(sprintf("p > n/2, and sd(y) = %0.3f used as an estimate of sigma;",sigma), - "you may want to use the estimateSigma function")) + out = fixedLassoPoly(x, y, lambda, beta, logical.vars) } - } - - k = length(vars) - pv = vlo = vup = numeric(k) - vmat = matrix(0,k,n) - ci = tailarea = matrix(0,k,2) - sign = numeric(k) - - if (type=="full" & p > n) - warning(paste("type='full' does not make sense when p > n;", - "switching to type='partial'")) - - if (type=="partial" || p > n) { - xa = x[,vars,drop=F] - M = pinv(crossprod(xa)) %*% t(xa) - } - else { - M = pinv(crossprod(x)) %*% t(x) - M = M[vars,,drop=F] - } - + + A = out$A + b = out$b + + # Check polyhedral region + tol.poly = 0.01 + if (max(A %*% y - b) > tol.poly * sqrt(sum(y^2))) + stop(paste("Polyhedral constraints not satisfied; you must recompute beta", + "more accurately. With glmnet, make sure to use exact=TRUE in coef(),", + "and check whether the specified value of lambda is too small", + "(beyond the grid of values visited by glmnet).", + "You might also try rerunning glmnet with a lower setting of the", + "'thresh' parameter, for a more accurate convergence.")) + + # Estimate sigma + if (is.null(sigma)) { + if (n >= 2*p) { + oo = intercept + sigma = sqrt(sum(lsfit(x,y,intercept=oo)$res^2)/(n-p-oo)) + } + else { + sigma = sd(y) + warning(paste(sprintf("p > n/2, and sd(y) = %0.3f used as an estimate of sigma;",sigma), + "you may want to use the estimateSigma function")) + } + } + + # add additional targets for inference if provided + if (!is.null(add.targets)) { + # vars is boolean... + old_vars = vars & TRUE + vars[add.targets] = TRUE + sign_vars = sign(beta[vars]) + sign_vars[!old_vars] = NA + stop("`add.targets` not fully implemented yet") + } + + k = length(vars) + pv = vlo = vup = numeric(k) + vmat = matrix(0,k,n) + ci = tailarea = matrix(0,k,2) + + if (type=="full" & p > n) { + if (intercept == TRUE) { + pp=p+1 + Xint <- cbind(rep(1,n),x) + # indices of selected predictors + S = c(1,vars + 1) + } else { + pp=p + Xint <- x + # indices of selected predictors + S = vars + # notS = which(abs(beta) <= tol.coef) + } + + notS = setdiff(1:pp,S) + + XS = Xint[,S] + hbetaS = hbeta[S] + + # Reorder so that active set S is first + Xordered = Xint[,c(S,notS,recursive=T)] + hsigmaS = 1/n*(t(XS)%*%XS) # hsigma[S,S] + hsigmaSinv = solve(hsigmaS) # pinv(hsigmaS) + + FS = rbind(diag(length(S)),matrix(0,pp-length(S),length(S))) + GS = cbind(diag(length(S)),matrix(0,length(S),pp-length(S))) + + is_wide = n < (2 * p) # somewhat arbitrary decision -- it is really for when we don't want to form with pxp matrices + + # Approximate inverse covariance matrix for when (n < p) from lasso_Inference.R + if (!is_wide) { + hsigma = 1/n*(t(Xordered)%*%Xordered) + htheta = debiasingMatrix(hsigma, is_wide, n, 1:length(S), verbose=FALSE, max_try=linesearch.try, warn_kkt=TRUE) + ithetasigma = (GS-(htheta%*%hsigma)) + } else { + htheta = debiasingMatrix(Xordered, is_wide, n, 1:length(S), verbose=FALSE, max_try=linesearch.try, warn_kkt=TRUE) + ithetasigma = (GS-((htheta%*%t(Xordered)) %*% Xordered)/n) + } + + M <- (((htheta%*%t(Xordered))+ithetasigma%*%FS%*%hsigmaSinv%*%t(XS))/n) + + # vector which is offset for testing debiased beta's + null_value <- (((ithetasigma%*%FS%*%hsigmaSinv)%*%sign(hbetaS))*lambda/n) + + if (intercept == T) { + M = M[-1,] # remove intercept row + null_value = null_value[-1] # remove intercept element + } + } else if (type=="partial" || p > n) { + xa = x[,vars,drop=F] + M = pinv(crossprod(xa)) %*% t(xa) + null_value = rep(0,k) + } else { + M = pinv(crossprod(x)) %*% t(x) + M = M[vars,,drop=F] + null_value = rep(0,k) + } + for (j in 1:k) { if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j])) - + vj = M[j,] mj = sqrt(sum(vj^2)) vj = vj / mj # Standardize (divide by norm of vj) - sign[j] = sign(sum(vj*y)) - vj = sign[j] * vj - a = poly.pval(y,G,u,vj,sigma,bits) - pv[j] = a$pv * mj # Unstandardize (mult by norm of vj) + + if (!is.na(sign_vars[j])) { + vj = sign_vars[j] * vj + } + + limits.info = TG.limits(y, A, b, vj, Sigma=diag(rep(sigma^2, n))) + a = TG.pvalue.base(limits.info, null_value=null_value[j], bits=bits) + pv[j] = a$pv + if (is.na(sign_vars[j])) { # for variables not in the active set, report 2-sided pvalue + pv[j] = 2 * min(pv[j], 1 - pv[j]) + } vlo[j] = a$vlo * mj # Unstandardize (mult by norm of vj) vup[j] = a$vup * mj # Unstandardize (mult by norm of vj) - vmat[j,] = vj * mj # Unstandardize (mult by norm of vj) - - a = poly.int(y,G,u,vj,sigma,alpha,gridrange=gridrange, - flip=(sign[j]==-1),bits=bits) - ci[j,] = a$int * mj # Unstandardize (mult by norm of vj) + if (!is.na(sign_vars[j])) { + vmat[j,] = vj * mj * sign_vars[j] # Unstandardize (mult by norm of vj) and fix sign + } else { + vmat[j,] = vj * mj # Unstandardize (mult by norm of vj) + } + a = TG.interval.base(limits.info, + alpha=alpha, + gridrange=gridrange, + flip=(sign_vars[j]==-1), + bits=bits) + ci[j,] = (a$int-null_value[j]) * mj # Unstandardize (mult by norm of vj) tailarea[j,] = a$tailarea } - - out = list(type=type,lambda=lambda,pv=pv,ci=ci, - tailarea=tailarea,vlo=vlo,vup=vup,vmat=vmat,y=y, - vars=vars,sign=sign,sigma=sigma,alpha=alpha, - call=this.call) + + out = list(type=type, + lambda=lambda, + pv=pv, + ci=ci, + tailarea=tailarea, + vlo=vlo, + vup=vup, + vmat=vmat, + y=y, + vars=vars, + sign=sign_vars, + sigma=sigma, + alpha=alpha, + sd=sigma*sqrt(rowSums(vmat^2)), + coef0=vmat%*%y, + call=this.call) + class(out) = "fixedLassoInf" return(out) } +} + +############################# + + +fixedLassoPoly = + function(X, y, lambda, beta, active, inactive = FALSE) { + + XA = X[, active, drop=FALSE] + XI = X[, !active, drop=FALSE] + XAi = pinv(crossprod(XA)) + XAp = XAi %*% t(XA) + Ir = t(XI) %*% t(XAp) # matrix in the "irrepresentable" condition + + if(length(lambda)>1) { + lambdaA= lambda[active] + lambdaI = lambda[!active] + } else { + lambdaA = rep(lambda, sum(active)) + lambdaI = rep(lambda, sum(!active)) + } + + penalized = lambdaA != 0 + signA = sign(beta[active]) + active_subgrad = signA * lambdaA + if (length(signA)>1) sign_diag = diag(signA) + if (length(signA)==1) sign_diag = matrix(signA, 1, 1) + + if (inactive) { # should we include the inactive constraints? + RA = diag(rep(1, nrow(XA))) - XA %*% XAp # RA is residual forming matrix of selected model + + A = rbind( + t(XI) %*% RA, + -t(XI) %*% RA, + -(sign_diag %*% XAp)[penalized,] # no constraints for unpenalized + ) + + b = c( + lambdaI - Ir %*% active_subgrad, + lambdaI + Ir %*% active_subgrad, + -(sign_diag %*% XAi %*% active_subgrad)[penalized]) + } else { + A = -(sign_diag %*% XAp)[penalized,] # no constraints for unpenalized + b = -(sign_diag %*% XAi %*% active_subgrad)[penalized] + } + + return(list(A=A, b=b)) + } ############################## -fixedLasso.poly <- function(x, y, beta, lambda, a) { - xa = x[,a,drop=F] - xac = x[,!a,drop=F] - xai = pinv(crossprod(xa)) - xap = xai %*% t(xa) - za = sign(beta[a]) - if (length(za)>1) dz = diag(za) - if (length(za)==1) dz = matrix(za,1,1) - - P = diag(1,nrow(xa)) - xa %*% xap - G = -rbind(1/lambda * t(xac) %*% P, - -1/lambda * t(xac) %*% P, - -dz %*% xap) - u = -c(1 - t(xac) %*% t(xap) %*% za, - 1 + t(xac) %*% t(xap) %*% za, - -lambda * dz %*% xai %*% za) - - return(list(G=G,u=u)) +## Approximates inverse covariance matrix theta +## using coordinate descent + +debiasingMatrix = function(Xinfo, # could be X or t(X) %*% X / n depending on is_wide + is_wide, + nsample, + rows, + verbose=FALSE, + bound=NULL, # starting value of bound + linesearch=TRUE, # do a linesearch? + scaling_factor=1.5, # multiplicative factor for linesearch + max_active=NULL, # how big can active set get? + max_try=10, # how many steps in linesearch? + warn_kkt=FALSE, # warn if KKT does not seem to be satisfied? + max_iter=50, # how many iterations for each optimization problem + kkt_stop=TRUE, # stop based on KKT conditions? + parameter_stop=TRUE, # stop based on relative convergence of parameter? + objective_stop=TRUE, # stop based on relative decrease in objective? + kkt_tol=1.e-4, # tolerance for the KKT conditions + parameter_tol=1.e-4, # tolerance for relative convergence of parameter + objective_tol=1.e-4 # tolerance for relative decrease in objective + ) { + + + if (is.null(max_active)) { + max_active = max(50, 0.3 * nsample) + } + + p = ncol(Xinfo); + M = matrix(0, length(rows), p); + + if (is.null(bound)) { + bound = (1/sqrt(nsample)) * qnorm(1-(0.1/(p^2))) + } + + xperc = 0; + xp = round(p/10); + idx = 1; + for (row in rows) { + if ((idx %% xp)==0){ + xperc = xperc+10; + if (verbose) { + print(paste(xperc,"% done",sep="")); } + } + + output = debiasingRow(Xinfo, # could be X or t(X) %*% X / n depending on is_wide + is_wide, + row, + bound, + linesearch=linesearch, + scaling_factor=scaling_factor, + max_active=max_active, + max_try=max_try, + warn_kkt=FALSE, + max_iter=max_iter, + kkt_stop=kkt_stop, + parameter_stop=parameter_stop, + objective_stop=objective_stop, + kkt_tol=kkt_tol, + parameter_tol=parameter_tol, + objective_tol=objective_tol) + + if (warn_kkt && (!output$kkt_check)) { + warning("Solution for row of M does not seem to be feasible") + } + + if (!is.null(output$soln)) { + M[idx,] = output$soln; + } else { + stop(paste("Unable to approximate inverse row ", row)); + } + + idx = idx + 1; + } + return(M) } -# Moore-Penrose pseudo inverse for symmetric matrices +# Find one row of the debiasing matrix -- assuming X^TX/n is not too large -- i.e. X is tall + +debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n depending on is_wide + is_wide, + row, + bound, + linesearch=TRUE, # do a linesearch? + scaling_factor=1.5, # multiplicative factor for linesearch + max_active=NULL, # how big can active set get? + max_try=10, # how many steps in linesearch? + warn_kkt=FALSE, # warn if KKT does not seem to be satisfied? + max_iter=50, # how many iterations for each optimization problem + kkt_stop=TRUE, # stop based on KKT conditions? + parameter_stop=TRUE, # stop based on relative convergence of parameter? + objective_stop=TRUE, # stop based on relative decrease in objective? + kkt_tol=1.e-4, # tolerance for the KKT conditions + parameter_tol=1.e-4, # tolerance for relative convergence of parameter + objective_tol=1.e-4 # tolerance for relative decrease in objective + ) { + + p = ncol(Xinfo) + + if (is.null(max_active)) { + max_active = min(nrow(Xinfo), ncol(Xinfo)) + } + + + # Initialize variables + + soln = rep(0, p) + soln = as.numeric(soln) + ever_active = rep(0, p) + ever_active[1] = row # 1-based + ever_active = as.integer(ever_active) + nactive = as.integer(1) + + linear_func = rep(0, p) + linear_func[row] = -1 + linear_func = as.numeric(linear_func) + gradient = 1. * linear_func + + counter_idx = 1; + incr = 0; + + last_output = NULL + + if (is_wide) { + Xsoln = as.numeric(rep(0, nrow(Xinfo))) + } + + while (counter_idx < max_try) { + + if (!is_wide) { + result = solve_QP(Xinfo, # this is non-neg-def matrix + bound, + max_iter, + soln, + linear_func, + gradient, + ever_active, + nactive, + kkt_tol, + objective_tol, + parameter_tol, + max_active, + kkt_stop, + objective_stop, + parameter_stop) + } else { + result = solve_QP_wide(Xinfo, # this is a design matrix + as.numeric(rep(bound, p)), # vector of Lagrange multipliers + 0, # ridge_term + max_iter, + soln, + linear_func, + gradient, + Xsoln, + ever_active, + nactive, + kkt_tol, + objective_tol, + parameter_tol, + max_active, + kkt_stop, + objective_stop, + parameter_stop) + + } + + iter = result$iter + + # Logic for whether we should continue the line search + + if (!linesearch) { + break + } + + if (counter_idx == 1){ + if (iter == (max_iter+1)){ + incr = 1; # was the original problem feasible? 1 if not + } else { + incr = 0; # original problem was feasible + } + } + + if (incr == 1) { # trying to find a feasible point + if ((iter < (max_iter+1)) && (counter_idx > 1)) { + break; # we've found a feasible point and solved the problem + } + bound = bound * scaling_factor; + } else { # trying to drop the bound parameter further + if ((iter == (max_iter + 1)) && (counter_idx > 1)) { + result = last_output; # problem seems infeasible because we didn't solve it + break; # so we revert to previously found solution + } + bound = bound / scaling_factor; + } + + # If the active set has grown to a certain size + # then we stop, presuming problem has become + # infeasible. + + # We revert to the previous solution + + if (result$max_active_check) { + result = last_output; + break; + } + + counter_idx = counter_idx + 1 + last_output = list(soln=result$soln, + kkt_check=result$kkt_check) + } + + # Check feasibility + + if (warn_kkt && (!result$kkt_check)) { + warning("Solution for row of M does not seem to be feasible") + } + + return(list(soln=result$soln, + kkt_check=result$kkt_check, + gradient=result$gradient)) -pinv <- function(A, tol=.Machine$double.eps) { - e = eigen(A) - v = Re(e$vec) - d = Re(e$val) - d[d > tol] = 1/d[d > tol] - d[d < tol] = 0 - if (length(d)==1) return(v*d*v) - else return(v %*% diag(d) %*% t(v)) } + ############################## print.fixedLassoInf <- function(x, tailarea=TRUE, ...) { @@ -160,12 +543,12 @@ print.fixedLassoInf <- function(x, tailarea=TRUE, ...) { cat(sprintf("\nStandard deviation of noise (specified or estimated) sigma = %0.3f\n", x$sigma)) - + cat(sprintf("\nTesting results at lambda = %0.3f, with alpha = %0.3f\n",x$lambda,x$alpha)) cat("",fill=T) tab = cbind(x$vars, - round(x$sign*x$vmat%*%x$y,3), - round(x$sign*x$vmat%*%x$y/(x$sigma*sqrt(rowSums(x$vmat^2))),3), + round(x$coef0,3), + round(x$coef0 / x$sd,3), round(x$pv,3),round(x$ci,3)) colnames(tab) = c("Var", "Coef", "Z-score", "P-value", "LowConfPt", "UpConfPt") if (tailarea) { @@ -174,21 +557,20 @@ print.fixedLassoInf <- function(x, tailarea=TRUE, ...) { } rownames(tab) = rep("",nrow(tab)) print(tab) - + cat(sprintf("\nNote: coefficients shown are %s regression coefficients\n", ifelse(x$type=="partial","partial","full"))) invisible() } -estimateLambda <- function(x, sigma, nsamp=1000){ - checkargs.xy(x,rep(0,nrow(x))) - if(nsamp < 10) stop("More Monte Carlo samples required for estimation") - if (length(sigma)!=1) stop("sigma should be a number > 0") - if (sigma<=0) stop("sigma should be a number > 0") - - n = nrow(x) - eps = sigma*matrix(rnorm(nsamp*n),n,nsamp) - lambda = 2*mean(apply(t(x)%*%eps,2,max)) - return(lambda) -} - +#estimateLambda <- function(x, sigma, nsamp=1000){ +# checkargs.xy(x,rep(0,nrow(x))) +# if(nsamp < 10) stop("More Monte Carlo samples required for estimation") +# if (length(sigma)!=1) stop("sigma should be a number > 0") + # if (sigma<=0) stop("sigma should be a number > 0") + + # n = nrow(x) + # eps = sigma*matrix(rnorm(nsamp*n),n,nsamp) + # lambda = 2*mean(apply(t(x)%*%eps,2,max)) + # return(lambda) +#} diff --git a/selectiveInference/R/funs.fixedCox.R b/selectiveInference/R/funs.fixedCox.R new file mode 100644 index 0000000..5cbe330 --- /dev/null +++ b/selectiveInference/R/funs.fixedCox.R @@ -0,0 +1,124 @@ +fixedCoxLassoInf=function(x, y, status, + beta, lambda, + alpha=.1, type=c("partial"), + tol.beta=1e-5, tol.kkt=0.1, + gridrange=c(-100,100), + bits=NULL, verbose=FALSE, + this.call=NULL){ + + checkargs.xy(x,y) + if(is.null(status)) stop("Must supply `status' argument") +if( sum(status==0)+sum(status==1)!=length(y)) stop("status vector must have values 0 or 1") + if (missing(beta) || is.null(beta)) stop("Must supply the solution beta") + if (missing(lambda) || is.null(lambda)) stop("Must supply the tuning parameter value lambda") + checkargs.misc(beta=beta,lambda=lambda,alpha=alpha, + gridrange=gridrange,tol.beta=tol.beta,tol.kkt=tol.kkt) + if (!is.null(bits) && !requireNamespace("Rmpfr",quietly=TRUE)) { + warning("Package Rmpfr is not installed, reverting to standard precision") + bits = NULL + } + + n=nrow(x) + p=ncol(x) + nvar=sum(beta!=0) + pv=vlo=vup=sd=rep(NA, nvar) + ci=tailarea=matrix(NA,nvar,2) + + + m=beta!=0 +vars=which(m) +if(sum(m)>0){ + bhat=beta[beta!=0] #penalized coefs just for active variables + sign_bhat=sign(bhat) + + #check KKT + + aaa=coxph(Surv(y,status)~x[,m],init=bhat,iter.max=0) # this gives the Cox model at exactly bhat + # so when we compute gradient and score + # we are evaluating at the LASSO solution + # naming of variables could be improved... + res=residuals(aaa,type="score") +if(!is.matrix(res)) res=matrix(res,ncol=1) +scor=colSums(res) + g=(scor+lambda*sign_bhat)/(2*lambda) +# cat(c(g,lambda,tol.kkt),fill=T) + if (any(abs(g) > 1+tol.kkt) ) + warning(paste("Solution beta does not satisfy the KKT conditions", + "(to within specified tolerances)")) + +# Hessian of partial likelihood at the LASSO solution +MM=vcov(aaa) + +bbar=(bhat+lambda*MM%*%sign_bhat) +A1=-(mydiag(sign_bhat)) +b1= -(mydiag(sign_bhat)%*%MM)%*%sign_bhat*lambda + + temp=max(A1%*%bbar-b1) + + +# compute p-values + +# JT: are we sure the signs of these are correctly handled? +# two sided p-values numerically agree with python but +# the one sided p-values are a bit off + + for(jj in 1:length(bbar)){ + vj=rep(0,length(bbar));vj[jj]=sign_bhat[jj] + + + junk=TG.pvalue(bbar, A1, b1, vj,MM) + + pv[jj] = junk$pv + vlo[jj]=junk$vlo + vup[jj]=junk$vup + sd[jj]=junk$sd + + junk2=TG.interval(bbar, A1, b1, vj, MM, alpha, flip=(sign_bhat[jj]==-1)) + ci[jj,]=junk2$int + tailarea[jj,] = junk2$tailarea + + } + # JT: these don't seem to be the real one-step estimators + fit0=coxph(Surv(y,status)~x[,m]) + coef0=fit0$coef + se0=sqrt(diag(fit0$var)) + zscore0=coef0/se0 + + out = list(lambda=lambda,pv=pv,ci=ci, + tailarea=tailarea,vlo=vlo,vup=vup,sd=sd, + vars=vars,alpha=alpha,coef0=coef0,zscore0=zscore0, + call=this.call) + class(out) = "fixedCoxLassoInf" +} +return(out) +} + + + +print.fixedCoxLassoInf <- function(x, tailarea=TRUE, ...) { + cat("\nCall:\n") + dput(x$call) + + cat(sprintf("\nStandard deviation of noise (specified or estimated) sigma = %0.3f\n", + x$sigma)) + + cat(sprintf("\nTesting results at lambda = %0.3f, with alpha = %0.3f\n",x$lambda,x$alpha)) + cat("",fill=T) + tab = cbind(x$vars, + round(x$coef0,3), + round(x$zscore0,3), + round(x$pv,3),round(x$ci,3)) + colnames(tab) = c("Var", "Coef", "Z-score", "P-value", "LowConfPt", "UpConfPt") + if (tailarea) { + tab = cbind(tab,round(x$tailarea,3)) + colnames(tab)[(ncol(tab)-1):ncol(tab)] = c("LowTailArea","UpTailArea") + } + rownames(tab) = rep("",nrow(tab)) + print(tab) + + cat(sprintf("\nNote: coefficients shown are %s regression coefficients\n", + ifelse(x$type=="partial","partial","full"))) + invisible() +} + + diff --git a/selectiveInference/R/funs.fixedLogit.R b/selectiveInference/R/funs.fixedLogit.R new file mode 100644 index 0000000..60b6451 --- /dev/null +++ b/selectiveInference/R/funs.fixedLogit.R @@ -0,0 +1,151 @@ + +fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial"), tol.beta=1e-5, tol.kkt=0.1, + gridrange=c(-100,100), bits=NULL, verbose=FALSE,this.call=NULL){ + + + type = match.arg(type) + checkargs.xy(x,y) + if (missing(beta) || is.null(beta)) stop("Must supply the solution beta") + if (missing(lambda) || is.null(lambda)) stop("Must supply the tuning parameter value lambda") + checkargs.misc(beta=beta,lambda=lambda,alpha=alpha, + gridrange=gridrange,tol.beta=tol.beta,tol.kkt=tol.kkt) + if (!is.null(bits) && !requireNamespace("Rmpfr",quietly=TRUE)) { + warning("Package Rmpfr is not installed, reverting to standard precision") + bits = NULL + } + + + n=length(y) + p=ncol(x) + # I assume that intcpt was used + if(length(beta)!=p+1) stop("Since family='binomial', beta must be of length ncol(x)+1, that is, it should include an intercept") + nvar=sum(beta[-1]!=0) + pv=vlo=vup=sd=rep(NA, nvar) + ci=tailarea=matrix(NA,nvar,2) + +#do we need to worry about standardization? + +# obj = standardize(x,y,TRUE,FALSE) + # x = obj$x + # y = obj$y + + m=beta[-1]!=0 #active set + + bhat=c(beta[1],beta[-1][beta[-1]!=0]) # intcpt plus active vars + sign_bhat=sign(bhat) + lam2m=diag(c(0,rep(lambda,sum(m)))) + + + xxm=cbind(1,x[,m]) + + etahat = xxm %*% bhat + prhat = as.vector(exp(etahat) / (1 + exp(etahat))) + ww=prhat*(1-prhat) + # w=diag(ww) + +#check KKT + z=etahat+(y-prhat)/ww + # g= t(x)%*%w%*%(z-etahat)/lambda # negative gradient scaled by lambda + g=scale(t(x),FALSE,1/ww)%*%(z-etahat)/lambda # negative gradient scaled by lambda + if (any(abs(g) > 1+tol.kkt) ) + warning(paste("Solution beta does not satisfy the KKT conditions", + "(to within specified tolerances)")) + + vars = which(abs(beta[-1]) > tol.beta / sqrt(colSums(x^2))) + if(length(vars)==0){ + cat("Empty model",fill=T) + return() + } + if (any(sign(g[vars]) != sign(beta[-1][vars]))) + warning(paste("Solution beta does not satisfy the KKT conditions", + "(to within specified tolerances). You might try rerunning", + "glmnet with a lower setting of the", + "'thresh' parameter, for a more accurate convergence.")) + + #constraints for active variables + # MM=solve(t(xxm)%*%w%*%xxm) + MM=solve(scale(t(xxm),F,1/ww)%*%xxm) + gm = c(0,-g[vars]*lambda) # gradient at LASSO solution, first entry is 0 because intercept is unpenalized + # at exact LASSO solution it should be sign_bhat[-1] + dbeta = MM %*% gm + + # bbar=(bhat+lam2m%*%MM%*%sign_bhat) # JT: this is wrong, shouldn't use sign of intercept anywhere... + bbar = bhat - dbeta + + A1=-(mydiag(sign_bhat))[-1,] + b1= (sign_bhat * dbeta)[-1] + + tol.poly = 0.01 + if (max((A1 %*% bbar) - b1) > tol.poly) + stop(paste("Polyhedral constraints not satisfied; you must recompute beta", + "more accurately. With glmnet, make sure to use exact=TRUE in coef(),", + "and check whether the specified value of lambda is too small", + "(beyond the grid of values visited by glmnet).", + "You might also try rerunning glmnet with a lower setting of the", + "'thresh' parameter, for a more accurate convergence.")) + + + + for(jj in 1:sum(m)){ + vj=c(rep(0,sum(m)+1));vj[jj+1]=sign_bhat[jj+1] + # compute p-values + junk=TG.pvalue(bbar, A1, b1, vj, MM) + pv[jj] = junk$pv + + vlo[jj]=junk$vlo + vup[jj]=junk$vup + sd[jj]=junk$sd + + junk2=TG.interval(bbar, A1, b1, vj, MM,alpha=alpha, flip=(sign_bhat[jj+1]==-1)) + + ci[jj,]=junk2$int + tailarea[jj,] = junk2$tailarea + } + + # JT: these are not the one step estimators but they are close + fit0=glm(y~x[,m],family="binomial") + sfit0=summary(fit0) + coef0=bbar[-1] #fit0$coef[-1] + se0=sqrt(diag(MM)[-1]) # sfit0$cov.scaled)[-1]) + zscore0=coef0/se0 + + out = list(type=type,lambda=lambda,pv=pv,ci=ci, + tailarea=tailarea,vlo=vlo,vup=vup,sd=sd, + vars=vars,alpha=alpha,coef0=coef0,zscore0=zscore0, + call=this.call, + info.matrix=MM) # info.matrix is output just for debugging purposes at the moment + class(out) = "fixedLogitLassoInf" + return(out) + + } + + + +print.fixedLogitLassoInf <- function(x, tailarea=TRUE, ...) { + cat("\nCall:\n") + dput(x$call) + + cat(sprintf("\nStandard deviation of noise (specified or estimated) sigma = %0.3f\n", + x$sigma)) + + cat(sprintf("\nTesting results at lambda = %0.3f, with alpha = %0.3f\n",x$lambda,x$alpha)) + cat("",fill=T) + tab = cbind(x$vars, + round(x$coef0,3), + round(x$zscore0,3), + round(x$pv,3),round(x$ci,3)) + colnames(tab) = c("Var", "Coef", "Z-score", "P-value", "LowConfPt", "UpConfPt") + if (tailarea) { + tab = cbind(tab,round(x$tailarea,3)) + colnames(tab)[(ncol(tab)-1):ncol(tab)] = c("LowTailArea","UpTailArea") + } + rownames(tab) = rep("",nrow(tab)) + print(tab) + + cat(sprintf("\nNote: coefficients shown are %s regression coefficients\n", + ifelse(x$type=="partial","partial","full"))) + invisible() +} + + + diff --git a/selectiveInference/R/funs.fs.R b/selectiveInference/R/funs.fs.R index 3a46b29..669cf14 100644 --- a/selectiveInference/R/funs.fs.R +++ b/selectiveInference/R/funs.fs.R @@ -27,61 +27,70 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, ##### # Find the first variable to enter and its sign - xx = scale(x,center=F,scale=sqrt(colSums(x^2))) - uhat = t(xx)%*%y - ihit = which.max(abs(uhat)) # Hitting coordinate - s = Sign(uhat[ihit]) # Sign + working_x = scale(x,center=F,scale=sqrt(colSums(x^2))) + score = t(working_x)%*%y + i_hit = which.max(abs(score)) # Hitting coordinate + sign_hit = Sign(score[i_hit]) # Sign + signs = sign_hit # later signs will be appended to `signs` if (verbose) { - cat(sprintf("1. Adding variable %i, |A|=%i...",ihit,1)) + cat(sprintf("1. Adding variable %i, |A|=%i...",i_hit,1)) } # Now iteratively find the new FS estimates # Things to keep track of, and return at the end + # JT: I guess the "buf" just saves us from making huge + # matrices we don't need? + buf = min(maxsteps,500) action = numeric(buf) # Actions taken df = numeric(buf) # Degrees of freedom beta = matrix(0,p,buf) # FS estimates - action[1] = ihit + action[1] = i_hit df[1] = 0 beta[,1] = 0 # Gamma matrix! gbuf = max(2*p*3,2000) # Space for 3 steps, at least - gi = 0 + gi = 0 # index into rows of Gamma matrix + Gamma = matrix(0,gbuf,n) - Gamma[gi+Seq(1,p-1),] = t(s*xx[,ihit]+xx[,-ihit]); gi = gi+p-1 - Gamma[gi+Seq(1,p-1),] = t(s*xx[,ihit]-xx[,-ihit]); gi = gi+p-1 - Gamma[gi+1,] = t(s*xx[,ihit]); gi = gi+1 + Gamma[gi+Seq(1,p-1),] = t(sign_hit*working_x[,i_hit]+working_x[,-i_hit]); gi = gi+p-1 + Gamma[gi+Seq(1,p-1),] = t(sign_hit*working_x[,i_hit]-working_x[,-i_hit]); gi = gi+p-1 + Gamma[gi+1,] = t(sign_hit*working_x[,i_hit]); gi = gi+1 - # nk - nk = numeric(buf) + # nconstraint + nconstraint = numeric(buf) vreg = matrix(0,buf,n) - nk[1] = gi - vreg[1,] = s*x[,ihit] / sum(x[,ihit]^2) + nconstraint[1] = gi + vreg[1,] = sign_hit*x[,i_hit] / sum(x[,i_hit]^2) # Other things to keep track of, but not return r = 1 # Size of active set - A = ihit # Active set - I = Seq(1,p)[-ihit] # Inactive set - X1 = x[,ihit,drop=FALSE] # Matrix X[,A] - X2 = x[,-ihit,drop=FALSE] # Matrix X[,I] + A = i_hit # Active set -- JT: isn't this basically the same as action? + I = Seq(1,p)[-i_hit] # Inactive set + X_active = x[,i_hit,drop=FALSE] # Matrix X[,A] + X_inactive = x[,-i_hit,drop=FALSE] # Matrix X[,I] k = 2 # What step are we at? + # JT Why keep track of r and k instead of just saying k=r+1? + + # Compute a skinny QR decomposition of X_active + # JT: obs was used as variable name above -- this is something different, no? + # changed it to qr_X - # Compute a skinny QR decomposition of X1 - obj = qr(X1) - Q = qr.Q(obj,complete=TRUE) - Q1 = Q[,1,drop=FALSE]; - Q2 = Q[,-1,drop=FALSE] - R = qr.R(obj) + qr_X = qr(X_active) + Q = qr.Q(qr_X,complete=TRUE) + Q_active = Q[,1,drop=FALSE]; + Q_inactive = Q[,-1,drop=FALSE] + R = qr.R(qr_X) # Throughout the algorithm, we will maintain - # the decomposition X1 = Q1*R. Dimenisons: - # X1: n x r - # Q1: n x r - # Q2: n x (n-r) + # the decomposition X_active = Q_active*R. Dimensions: + # X_active: n x r + # Q_active: n x r + # Q_inactive: n x (n-r) # R: r x r while (k<=maxsteps) { @@ -92,56 +101,61 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, action = c(action,numeric(buf)) df = c(df,numeric(buf)) beta = cbind(beta,matrix(0,p,buf)) - nk = c(nk,numeric(buf)) + nconstraint = c(nconstraint,numeric(buf)) vreg = rbind(vreg,matrix(0,buf,n)) } # Key quantities for the next entry - a = backsolve(R,t(Q1)%*%y) - X2perp = X2 - X1 %*% backsolve(R,t(Q1)%*%X2) - xx = scale(X2perp,center=F,scale=sqrt(colSums(X2perp^2))) - aa = as.numeric(t(xx)%*%y) + keepLs = backsolve(R,t(Q_active)%*%y) + X_inactive_resid = X_inactive - X_active %*% backsolve(R,t(Q_active)%*%X_inactive) + working_x = scale(X_inactive_resid,center=F,scale=sqrt(colSums(X_inactive_resid^2))) + score = as.numeric(t(working_x)%*%y) # If the inactive set is empty, nothing will hit if (r==min(n-intercept,p)) break # Otherwise find the next hitting time else { - shits = Sign(aa) - hits = shits * aa - ihit = which.max(hits) - shit = shits[ihit] + sign_score = Sign(score) + abs_score = sign_score * score + i_hit = which.max(abs_score) + sign_hit = sign_score[i_hit] } # Record the solution - action[k] = I[ihit] + # what is the difference between "action" and "A"? + + action[k] = I[i_hit] df[k] = r - beta[A,k] = a + beta[A,k] = keepLs # Gamma matrix! if (gi + 2*p > nrow(Gamma)) Gamma = rbind(Gamma,matrix(0,2*p+gbuf,n)) - xx = t(shits*t(xx)) - Gamma[gi+Seq(1,p-r),] = t(xx); gi = gi+p-r - Gamma[gi+Seq(1,p-r-1),] = t(xx[,ihit]-xx[,-ihit]); gi = gi+p-r-1 - Gamma[gi+1,] = t(xx[,ihit]); gi = gi+1 + working_x = t(sign_score*t(working_x)) + + Gamma[gi+Seq(1,p-r-1),] = t(working_x[,i_hit]+working_x[,-i_hit]); gi = gi+p-r-1 + Gamma[gi+Seq(1,p-r-1),] = t(working_x[,i_hit]-working_x[,-i_hit]); gi = gi+p-r-1 + Gamma[gi+1,] = t(working_x[,i_hit]); gi = gi+1 - # nk, regression contrast - nk[k] = gi - vreg[k,] = shit*X2perp[,ihit] / sum(X2perp[,ihit]^2) + # nconstraint, regression contrast + nconstraint[k] = gi + vreg[k,] = sign_hit*X_inactive_resid[,i_hit] / sum(X_inactive_resid[,i_hit]^2) # Update all of the variables r = r+1 - A = c(A,I[ihit]) - I = I[-ihit] - s = c(s,shit) - X1 = cbind(X1,X2[,ihit]) - X2 = X2[,-ihit,drop=FALSE] + A = c(A,I[i_hit]) + I = I[-i_hit] + signs = c(signs,sign_hit) + X_active = cbind(X_active,X_inactive[,i_hit]) + X_inactive = X_inactive[,-i_hit,drop=FALSE] # Update the QR decomposition - obj = updateQR(Q1,Q2,R,X1[,r]) - Q1 = obj$Q1 - Q2 = obj$Q2 - R = obj$R + updated_qr = updateQR(Q_active,Q_inactive,R,X_active[,r]) + Q_active = updated_qr$Q1 + + # JT: why do we store Q_inactive? Doesn't seem to be used. + Q_inactive = updated_qr$Q2 + R = updated_qr$R if (verbose) { cat(sprintf("\n%i. Adding variable %i, |A|=%i...",k,A[r],r)) @@ -156,7 +170,7 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, df = df[Seq(1,k-1),drop=FALSE] beta = beta[,Seq(1,k-1),drop=FALSE] Gamma = Gamma[Seq(1,gi),,drop=FALSE] - nk = nk[Seq(1,k-1)] + nconstraint = nconstraint[Seq(1,k-1)] vreg = vreg[Seq(1,k-1),,drop=FALSE] # If we reached the maximum number of steps @@ -176,7 +190,7 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, # Record the least squares solution. Note that # we have already computed this bls = rep(0,p) - bls[A] = a + bls[A] = keepLs } if (verbose) cat("\n") @@ -189,9 +203,9 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, # Assign column names colnames(beta) = as.character(Seq(1,k-1)) - out = list(action=action,sign=s,df=df,beta=beta, + out = list(action=action,sign=signs,df=df,beta=beta, completepath=completepath,bls=bls, - Gamma=Gamma,nk=nk,vreg=vreg,x=x,y=y,bx=bx,by=by,sx=sx, + Gamma=Gamma,nconstraint=nconstraint,vreg=vreg,x=x,y=y,bx=bx,by=by,sx=sx, intercept=intercept,normalize=normalize,call=this.call) class(out) = "fs" return(out) @@ -212,8 +226,8 @@ coef.fs <- function(object, s, ...) { if (min(s)<0 || max(s)>k) stop(sprintf("s must be between 0 and %i",k)) knots = 1:k - dec = FALSE - return(coef.interpolate(beta,s,knots,dec)) + decreasing = FALSE + return(coef.interpolate(beta,s,knots,decreasing)) } # Prediction function for fs @@ -221,7 +235,10 @@ coef.fs <- function(object, s, ...) { predict.fs <- function(object, newx, s, ...) { beta = coef.fs(object,s) if (missing(newx)) newx = scale(object$x,FALSE,1/object$sx) - else newx = scale(newx,object$bx,FALSE) + else { + newx = matrix(newx,ncol=ncol(object$x)) + newx = scale(newx,object$bx,FALSE) + } return(newx %*% beta + object$by) } @@ -250,7 +267,7 @@ fsInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","aic p = ncol(x) n = nrow(x) G = obj$Gamma - nk = obj$nk + nconstraint = obj$nconstraint sx = obj$sx if (is.null(sigma)) { @@ -278,20 +295,26 @@ fsInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","aic for (j in 1:k) { if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j])) - Gj = G[1:nk[j],] - uj = rep(0,nk[j]) + Aj = -G[1:nconstraint[j],] + bj = -rep(0,nconstraint[j]) vj = vreg[j,] mj = sqrt(sum(vj^2)) vj = vj / mj # Standardize (divide by norm of vj) - a = poly.pval(y,Gj,uj,vj,sigma,bits) + + limits.info = TG.limits(y, Aj, bj, vj, Sigma=diag(rep(sigma^2, n))) + a = TG.pvalue.base(limits.info, bits=bits) + pv[j] = a$pv sxj = sx[vars[j]] vlo[j] = a$vlo * mj / sxj # Unstandardize (mult by norm of vj / sxj) vup[j] = a$vup * mj / sxj # Unstandardize (mult by norm of vj / sxj) vmat[j,] = vj * mj / sxj # Unstandardize (mult by norm of vj / sxj) - a = poly.int(y,Gj,uj,vj,sigma,alpha,gridrange=gridrange, - flip=(sign[j]==-1),bits=bits) + a = TG.interval.base(limits.info, + alpha=alpha, + gridrange=gridrange, + flip=(sign[j]==-1), + bits=bits) ci[j,] = a$int * mj / sxj # Unstandardize (mult by norm of vj / sxj) tailarea[j,] = a$tailarea } @@ -304,13 +327,13 @@ fsInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","aic out = aicStop(x,y,obj$action[1:k],obj$df[1:k],sigma,mult,ntimes) khat = out$khat m = out$stopped * ntimes - G = rbind(out$G,G[1:nk[khat+m],]) # Take ntimes more steps past khat - u = c(out$u,rep(0,nk[khat+m])) # (if we need to) + G = rbind(out$G,G[1:nconstraint[khat+m],]) # Take ntimes more steps past khat + u = c(out$u,rep(0,nconstraint[khat+m])) # (if we need to) kk = khat } else { - G = G[1:nk[k],] - u = rep(0,nk[k]) + G = G[1:nconstraint[k],] + u = rep(0,nconstraint[k]) kk = k } @@ -330,23 +353,29 @@ fsInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","aic vj = vj / mj # Standardize (divide by norm of vj) sign[j] = sign(sum(vj*y)) vj = sign[j] * vj - Gj = rbind(G,vj) - uj = c(u,0) + Aj = -rbind(G,vj) + bj = -c(u,0) - a = poly.pval(y,Gj,uj,vj,sigma,bits) + limits.info = TG.limits(y, Aj, bj, vj, Sigma=diag(rep(sigma^2, n))) + a = TG.pvalue.base(limits.info, bits=bits) pv[j] = a$pv sxj = sx[vars[j]] vlo[j] = a$vlo * mj / sxj # Unstandardize (mult by norm of vj / sxj) vup[j] = a$vup * mj / sxj # Unstandardize (mult by norm of vj / sxj) vmat[j,] = vj * mj / sxj # Unstandardize (mult by norm of vj / sxj) - a = poly.int(y,Gj,uj,vj,sigma,alpha,gridrange=gridrange, - flip=(sign[j]==-1),bits=bits) + a = TG.interval.base(limits.info, + alpha=alpha, + gridrange=gridrange, + flip=(sign[j]==-1), + bits=bits) ci[j,] = a$int * mj / sxj # Unstandardize (mult by norm of vj / sxj) tailarea[j,] = a$tailarea } } + # JT: why do we output vup, vlo? Are they used somewhere else? + out = list(type=type,k=k,khat=khat,pv=pv,ci=ci, tailarea=tailarea,vlo=vlo,vup=vup,vmat=vmat,y=y, vars=vars,sign=sign,sigma=sigma,alpha=alpha, @@ -433,6 +462,7 @@ print.fsInf <- function(x, tailarea=TRUE, ...) { invisible() } + plot.fs <- function(x, breaks=TRUE, omit.zeros=TRUE, var.labels=TRUE, ...) { if (x$completepath) { k = length(x$action)+1 @@ -442,24 +472,25 @@ plot.fs <- function(x, breaks=TRUE, omit.zeros=TRUE, var.labels=TRUE, ...) { beta = x$beta } p = nrow(beta) - + xx = 1:k xlab = "Step" - - if (omit.zeros) { - inds = matrix(FALSE,p,k) - for (i in 1:k) { - inds[i,] = beta[i,]!=0 | c(diff(beta[i,]!=0),F) | c(F,diff(beta[i,]!=0)) - } - beta[!inds] = NA + + if (omit.zeros) { + good.inds = matrix(FALSE,p,k) + good.inds[beta!=0] = TRUE + changes = t(apply(beta,1,diff))!=0 + good.inds[cbind(changes,rep(F,p))] = TRUE + good.inds[cbind(rep(F,p),changes)] = TRUE + beta[!good.inds] = NA } plot(c(),c(),xlim=range(xx,na.rm=T),ylim=range(beta,na.rm=T), xlab=xlab,ylab="Coefficients",main="Forward stepwise path",...) abline(h=0,lwd=2) matplot(xx,t(beta),type="l",lty=1,add=TRUE) - if (breaks) abline(v=x,lty=2) - if (var.labels) axis(4,at=beta[,k],labels=1:p,cex=0.8,adj=0) + if (breaks) abline(v=xx,lty=2) + if (var.labels) axis(4,at=beta[,k],labels=1:p,cex=0.8,adj=0) invisible() } diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index 3434bd6..9b3ac6f 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -1,25 +1,28 @@ #' Select a model with forward stepwise. #' -#' This function implements forward selection of linear models almost identically to \code{\link{stepAIC}} with \code{direction = "forward"}. The reason this is a separate function from \code{\link{fs}} is that groups of variables (e.g. dummies encoding levels of a categorical variable) must be handled differently in the selective inference framework. +#' This function implements forward selection of linear models almost identically to \code{\link[stats]{step}} with \code{direction = "forward"}. The reason this is a separate function from \code{\link{fs}} is that groups of variables (e.g. dummies encoding levels of a categorical variable) must be handled differently in the selective inference framework. #' #' @param x Matrix of predictors (n by p). #' @param y Vector of outcomes (length n). -#' @param index Group membership indicator of length p. +#' @param index Group membership indicator of length p. Check that \code{sort(unique(index)) = 1:G} where \code{G} is the number of distinct groups. #' @param maxsteps Maximum number of steps for forward stepwise. -#' @param sigma Estimate of error standard deviation for use in AIC criterion. This determines the relative scale between RSS and the degrees of freedom penalty. Default is NULL corresponding to unknown sigma. See \code{\link{extractAIC}} for details. -#' @param k Multiplier of model size penalty, the default is \code{k = 2} for AIC. Use \code{k = log(n)} for BIC, or \code{k = log(p)} for RIC. -#' @param intercept Should an intercept be included in the model? Default is TRUE. +#' @param sigma Estimate of error standard deviation for use in AIC criterion. This determines the relative scale between RSS and the degrees of freedom penalty. Default is NULL corresponding to unknown sigma. When NULL, \code{link{groupfsInf}} performs truncated F inference instead of truncated \eqn{\chi}. See \code{\link[stats]{extractAIC}} for details on the AIC criterion. +#' @param k Multiplier of model size penalty, the default is \code{k = 2} for AIC. Use \code{k = log(n)} for BIC, or \code{k = 2log(p)} for RIC (best for high dimensions, when \eqn{p > n}). If \eqn{G < p} then RIC may be too restrictive and it would be better to use \code{log(G) < k < 2log(p)}. +#' @param intercept Should an intercept be included in the model? Default is TRUE. Does not count as a step. +#' @param center Should the columns of the design matrix be centered? Default is TRUE. #' @param normalize Should the design matrix be normalized? Default is TRUE. +#' @param aicstop Early stopping if AIC increases. Default is 0 corresponding to no early stopping. Positive integer values specify the number of times the AIC is allowed to increase in a row, e.g. with \code{aicstop = 2} the algorithm will stop if the AIC criterion increases for 2 steps in a row. The default of \code{\link[stats]{step}} corresponds to \code{aicstop = 1}. #' @param verbose Print out progress along the way? Default is FALSE. #' @return An object of class "groupfs" containing information about the sequence of models in the forward stepwise algorithm. Call the function \code{\link{groupfsInf}} on this object to compute selective p-values. #' @examples #' x = matrix(rnorm(20*40), nrow=20) #' index = sort(rep(1:20, 2)) -#' y = rnorm(20) + 2 * (x[,1] - x[,2]) - (x[,3] - x[,4]) +#' y = rnorm(20) + 2 * x[,1] - x[,4] #' fit = groupfs(x, y, index, maxsteps = 5) -#' pvals = groupfsInf(fit) -#' @seealso \code{\link{groupfsInf}} -groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE, normalize = TRUE, verbose = FALSE) { +#' out = groupfsInf(fit) +#' out +#' @seealso \code{\link{groupfsInf}}, \code{\link{factorDesign}}. +groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE, center = TRUE, normalize = TRUE, aicstop = 0, verbose = FALSE) { if (missing(index)) stop("Missing argument: index.") p <- ncol(x) @@ -41,29 +44,35 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE warning(paste("If the largest groups are included the model will be saturated/overdetermined. To prevent this maxsteps has been changed to", maxsteps)) } - # Initialize copies of data for loop by <- mean(y) y.update <- y if (intercept) y.update <- y - by y.last <- y.update - x.update <- x # Center and scale design matrix - xscaled <- scaleGroups(x.update, index, scale = normalize) + xscaled <- scaleGroups(x, index, center, normalize) xm <- xscaled$xm xs <- xscaled$xs x.update <- xscaled$x x.begin <- x.update y.begin <- y.update + stopped <- FALSE # Store all projections computed along the path terms = projections = maxprojs = aicpens = maxpens = cumprojs = vector("list", maxsteps) # Store other information from each step - path.info <- data.frame(imax=integer(maxsteps), L=numeric(maxsteps), df=integer(maxsteps), RSS=numeric(maxsteps), RSSdrop=numeric(maxsteps), chisq=numeric(maxsteps)) + path.info <- data.frame(imax=integer(maxsteps), df=integer(maxsteps), AIC=numeric(maxsteps), RSS=numeric(maxsteps), RSSdrop=numeric(maxsteps), chisq=numeric(maxsteps)) - modelrank <- 1 + modelrank <- as.numeric(intercept) + if (is.null(sigma)) { + modelrank <- modelrank + 1 + aic.begin <- aic.last <- n*(log(2*pi) + log(mean(y.update^2))) + k * (n + modelrank) + } else { + aic.begin <- aic.last <- sum(y.update^2)/sigma^2 - n + k * modelrank + } + if (verbose) print(paste0("Start: AIC=", round(aic.begin, 3)), quote = FALSE) # Begin main loop for (step in 1:maxsteps) { @@ -86,15 +95,17 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE # Regress added group out of y and inactive x P.imax <- added$maxproj %*% t(added$maxproj) - if (is.null(sigma)) { - P.imax <- P.imax / exp(k*added$df/n) - } else { - P.imax <- P.imax * sigma^2 - } P.imax <- diag(rep(1, n)) - P.imax y.update <- P.imax %*% y.update x.update[, inactive.inds] <- P.imax %*% x.update[, inactive.inds] + # Compute AIC + if (is.null(sigma)) { + added$AIC <- n * log(added$maxterm/n) - k * added$df + n*log(2*pi) + k * (n + modelrank) + } else { + added$AIC <- sum(y.update^2)/sigma^2 - n + k * modelrank + } + projections[[step]] <- added$projections maxprojs[[step]] <- added$maxproj aicpens[[step]] <- added$aicpens @@ -112,50 +123,80 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE y.last <- y.update # Projections are stored separately - step.info <- data.frame(added[-c(4:(length(added)-3))]) + step.info <- data.frame(added[-c(3:(length(added)-4))]) path.info[step, ] <- step.info - if (verbose) print(step.info) + if (verbose) print(round(step.info, 3)) + + if (aicstop > 0 && step < maxsteps && step >= aicstop && aic.last < added$AIC) { + if (all(diff(c(aic.begin, path.info$AIC)[(step+1-aicstop):(step+1)]) > 0)) { + + if (is.null(sigma)) { + added$AIC <- n * log(added$maxterm/n) - k * added$df + n + n*log(2*pi) + k * modelrank + } else { + added$AIC <- sum(y.update^2)/sigma^2 - n + k * modelrank + } + + path.info <- path.info[1:step, ] + projections[(step+1):maxsteps] <- NULL + maxprojs[(step+1):maxsteps] <- NULL + aicpens[(step+1):maxsteps] <- NULL + maxpens[(step+1):maxsteps] <- NULL + cumprojs[(step+1):maxsteps] <- NULL + terms[(step+1):maxsteps] <- NULL + maxsteps <- step + stopped <- TRUE + break + } + } + aic.last <- added$AIC } + # Is there a better way of doing this? + # Use some projections already computed? + beta <- coef(lm(y.begin ~ x.begin[,index %in% path.info$imax]-1)) + names(beta) <- index[index %in% path.info$imax] + # Create output object - value <- list(action=path.info$imax, L=path.info$L, projections = projections, maxprojs = maxprojs, aicpens = aicpens, maxpens = maxpens, cumprojs = cumprojs, log = path.info, index = index, y = y.begin, x = x.begin, bx = xm, sx = xs, sigma = sigma, intercept = intercept, terms = terms) + value <- list(action = path.info$imax, L = path.info$L, AIC = path.info$AIC, projections = projections, maxprojs = maxprojs, aicpens = aicpens, maxpens = maxpens, cumprojs = cumprojs, log = path.info, index = index, y = y.begin, x = x.begin, coefficients = beta, bx = xm, by = by, sx = xs, sigma = sigma, intercept = intercept, call = match.call(), terms = terms) + class(value) <- "groupfs" + attr(value, "center") <- center + attr(value, "normalize") <- normalize attr(value, "labels") <- labels - attr(value, "index") <- index attr(value, "maxsteps") <- maxsteps attr(value, "sigma") <- sigma attr(value, "k") <- k + attr(value, "aicstop") <- aicstop + attr(value, "stopped") <- stopped if (is.null(attr(x, "varnames"))) { attr(value, "varnames") <- colnames(x) } else { attr(value, "varnames") <- attr(x, "varnames") } - - invisible(value) + return(value) } #' Add one group to the model in \code{groupfs}. #' #' For internal use by \code{\link{groupfs}}. #' -#' @param x Design matrix. -#' @param y Response vector. +#' @param xr Design matrix at current step. +#' @param yr Response vector residual at current step. #' @param index Group membership indicator of length p. #' @param labels The unique elements of \code{index}. #' @param inactive Labels of inactive groups. #' @param k Multiplier of model size penalty, use \code{k = 2} for AIC, \code{k = log(n)} for BIC, or \code{k = log(p)} for RIC. #' @param sigma Estimate of error standard deviation for use in AIC criterion. This determines the relative scale between RSS and the degrees of freedom penalty. See \code{\link{extractAIC}} for details. #' @return Index \code{imax} of added group, value \code{L} of maximized negative AIC, lists of projection matrices defining quadratic model selection event. -add1.groupfs <- function(x, y, index, labels, inactive, k, sigma = NULL) { +add1.groupfs <- function(xr, yr, index, labels, inactive, k, sigma = NULL) { # Use characters to avoid issues where # list() populates NULL lists in the positions # of the active variables ### Question for later: does this slow down lapply? keys = as.character(inactive) - n2y <- sum(y^2) - n <- ncol(x) + n <- nrow(xr) # Compute sums of squares to determine which group is added # penalized by rank of group if k > 0 @@ -163,127 +204,318 @@ add1.groupfs <- function(x, y, index, labels, inactive, k, sigma = NULL) { names(projections) = names(terms) = names(aicpens) = keys for (key in keys) { inds <- which(index == key) - xi <- x[,inds] + xi <- xr[,inds] ui <- svdu_thresh(xi) dfi <- ncol(ui) projections[[key]] <- ui - dfi <- ncol(ui) - uy <- t(ui) %*% y + uy <- t(ui) %*% yr if (is.null(sigma)) { aicpens[[key]] <- exp(k*dfi/n) - terms[[key]] <- (sum(uy^2) - sum(y^2)) * aicpens[[key]] + terms[[key]] <- (sum(yr^2) - sum(uy^2)) * aicpens[[key]] } else { - aicpens[[key]] <- sigma^2 * k * dfi/n - terms[[key]] <- (sum(uy^2) - sum(y^2)) - aicpens[[key]] + aicpens[[key]] <- sigma^2 * k * dfi + terms[[key]] <- (sum(yr^2) - sum(uy^2)) + aicpens[[key]] } } # Maximizer = group to be added - terms.maxind <- which.max(terms) - imax <- inactive[terms.maxind] - keyind <- which(keys == imax) - maxproj <- projections[[keyind]] - maxpen <- aicpens[[keyind]] - projections[[keyind]] <- NULL - aicpens[[keyind]] <- NULL - - L <- terms[[terms.maxind]] - - return(list(imax=imax, L=L, df = ncol(maxproj), projections = projections, maxproj = maxproj, aicpens = aicpens, maxpen = maxpen, terms = terms)) + terms.optind <- which.min(terms) + imax <- inactive[terms.optind] + optkey <- which(keys == imax) + maxproj <- projections[[optkey]] + maxpen <- aicpens[[optkey]] + maxterm <- terms[[optkey]] + projections[[optkey]] <- NULL + aicpens[[optkey]] <- NULL + + return(list(imax=imax, df = ncol(maxproj), projections = projections, maxproj = maxproj, aicpens = aicpens, maxpen = maxpen, maxterm = maxterm, terms = terms)) } # ----------------------------------------------------------- #' Compute selective p-values for a model fitted by \code{groupfs}. #' -#' Computes p-values for each group of variables in a model fitted by \code{\link{groupfs}}. These p-values adjust for selection by truncating the usual \code{chi^2} statistics to the regions implied by the model selection event. Details are provided in a forthcoming work. +#' Computes p-values for each group of variables in a model fitted by \code{\link{groupfs}}. These p-values adjust for selection by truncating the usual \eqn{\chi^2} statistics to the regions implied by the model selection event. If the \code{sigma} to \code{\link{groupfs}} was NULL then groupfsInf uses truncated \eqn{F} statistics instead of truncated \eqn{\chi}. The \code{sigma} argument to groupfsInf allows users to override and use \eqn{\chi}, but this is not recommended unless \eqn{\sigma} can be estimated well (i.e. \eqn{n > p}). #' #' @param obj Object returned by \code{\link{groupfs}} function -#' @param sigma Estimate of error standard deviation. If NULL (default), this is estimated using the mean squared residual of the full least squares fit when n >= 2p, and the mean squared residual of the selected model when n < 2p. In the latter case, the user should use \code{\link{estimateSigma}} function for a more accurate estimate. -#' @param projs Additional projections to define model selection event. For use with cross-validation. Default is NULL and it is not recommended to change this. -#' @param verbose Print out progress along the way? Default is FALSE. -#' @return An object of class "groupfsInf" containing selective p-values for the fitted model \code{obj}. The default printing behavior should supply adequate information. +#' @param sigma Estimate of error standard deviation. Default is NULL and in this case groupfsInf uses the value of sigma specified to \code{\link{groupfs}}. +#' @param verbose Print out progress along the way? Default is TRUE. +#' @return An object of class "groupfsInf" containing selective p-values for the fitted model \code{obj}. For comparison with \code{\link{fsInf}}, note that the option \code{type = "active"} is not available. #' #' \describe{ #' \item{vars}{Labels of the active groups in the order they were included.} #' \item{pv}{Selective p-values computed from appropriate truncated distributions.} #' \item{sigma}{Estimate of error variance used in computing p-values.} -#' \item{TC}{Observed value of truncated chi.} +#' \item{TC or TF}{Observed value of truncated \eqn{\chi} or \eqn{F}.} #' \item{df}{Rank of group of variables when it was added to the model.} -#' \item{support}{List of intervals defining the truncation region of the truncated chi.} +#' \item{support}{List of intervals defining the truncation region of the corresponding statistic.} #' } -groupfsInf <- function(obj, sigma = NULL, projs = NULL, verbose = FALSE) { +groupfsInf <- function(obj, sigma = NULL, verbose = TRUE) { + + if (!is.null(obj$cvobj) && attr(obj, "stopped")) { + stop("Cross-validation and early stopping cannot be used simultaneously.") + # This shouldn't happen in the first place! + # (it wouldn't anyway unless someone tries to trick it) + } n <- nrow(obj$x) p <- ncol(obj$x) - active <- obj$action maxsteps <- attr(obj, "maxsteps") k <- attr(obj, "k") index <- obj$index x <- obj$x y <- obj$y - Eindex <- which(index %in% active) - Ep <- length(Eindex) + Ep <- sum(index %in% obj$action) - nanconv <- FALSE - pvals <- numeric(maxsteps) - dfs <- numeric(maxsteps) - TCs <- numeric(maxsteps) + pvals = dfs = dfs2 = Tstats = numeric(maxsteps) supports <- list() if (!is.null(sigma)) { + type <- "TC" if (!is.null(obj$sigma)) { cat(paste("Using specified value", sigma, "for sigma in place of the value", obj$sigma, "used by groupfs()\n")) } } else { if (is.null(obj$sigma)) { - if (n >= 2*p) { - sigma <- sqrt(sum(lsfit(obj$x, obj$y, intercept = obj$intercept)$res^2)/(n-p-obj$intercept)) - } else { - sigma = sqrt(obj$log$RSS[length(obj$log$RSS)]/(n-Ep-obj$intercept)) - warning(paste(sprintf("p > n/2, and sigmahat = %0.3f used as an estimate of sigma;",sigma), "you may want to use the estimateSigma function")) - } + type <- "TF" + Pf <- svdu_thresh(obj$x[,which(obj$index %in% obj$action), drop = FALSE]) + dffull <- ncol(Pf) + df2 <- n - dffull - obj$intercept - 1 + Pfull <- Pf %*% t(Pf) } else { + type <- "TC" sigma <- obj$sigma } } # Compute p-value for each active group for (j in 1:maxsteps) { - i <- active[j] - if (verbose) cat(paste0("Step ", j, "/", maxsteps, ": computing P-value for group ", i, "\n")) - # Form projection onto active set minus i - # and project x_i orthogonally - x_i <- x[,which(index == i), drop = FALSE] - if (length(active) > 1) { - minus_i <- setdiff(active, i) - x_minus_i <- svdu_thresh(x[,which(index %in% minus_i), drop = FALSE]) - x_i <- x_i - x_minus_i %*% t(x_minus_i) %*% x_i + i <- obj$action[j] + if (verbose) { + string <- paste0("Step ", j, "/", attr(obj, "maxsteps"), ": computing P-value for group ", i) + if (!is.null(obj$cvobj)) string <- paste0(string, ", including constraints from cross-validation") + if (attr(obj, "stopped")) string <- paste0(string, ", including constraints from AICstop") + cat(paste(string, "\n")) } - # Project y onto what remains of x_i - Ugtilde <- svdu_thresh(x_i) - R <- t(Ugtilde) %*% y - TC <- sqrt(sum(R^2)) - eta <- Ugtilde %*% R / TC - df <- ncol(Ugtilde) - dfs[j] <- df - TCs[j] <- TC + if (type == "TC") { + # Form projection onto active set minus i + # and project x_i orthogonally + x_i <- obj$x[,which(obj$index == i), drop = FALSE] + if (length(obj$action) > 1) { + minus_i <- setdiff(obj$action, i) + x_minus_i <- svdu_thresh(obj$x[,which(obj$index %in% minus_i), drop = FALSE]) + x_i <- x_i - x_minus_i %*% t(x_minus_i) %*% x_i + } + + # Project y onto what remains of x_i + Ugtilde <- svdu_thresh(x_i) + R <- t(Ugtilde) %*% obj$y + TC <- sqrt(sum(R^2)) + eta <- Ugtilde %*% R / TC + Z <- obj$y - eta * TC + dfi <- ncol(Ugtilde) + Tstats[j] <- TC + dfs[j] <- dfi + + ydecomp <- list(Z=Z, eta=eta) + + } else { + + if (length(obj$action) > 1) { + minus_i <- setdiff(obj$action, i) + Psub <- svdu_thresh(obj$x[,which(obj$index %in% minus_i), drop = FALSE]) + Z <- Psub %*% t(Psub) %*% obj$y + df1 <- dffull - ncol(Psub) + } else { + Z <- rep(0, n) + df1 <- dffull + obj$intercept + 1 + } - # For each step... - L <- interval_groupfs(obj, TC, R, eta, Ugtilde) + C <- df1/df2 + R1 <- obj$y - Z + R2 <- obj$y - Pfull %*% obj$y + R1sq <- sum(R1^2) + R2sq <- sum(R2^2) + R <- sqrt(R1sq) + delta <- R1-R2 + Vdelta <- delta/sqrt(sum(delta^2)) + V2 <- R2/sqrt(R2sq) + TF <- (R1sq-R2sq)/(C*R2sq) + Tstats[j] <- TF + dfs[j] <- df1 + + ydecomp <- list(R=R, Z=Z, Vd=Vdelta, V2=V2, C=C) - # Any additional projections, e.g. from cross-validation? - if (!is.null(projs)) L <- c(L, projs) + } + + intervallist <- truncationRegion(obj, ydecomp, type) + + # Additional constraints from cross-validation? + if (!is.null(obj$cvobj)) { + intervallist <- c(intervallist, do.call(c, + lapply(obj$cvobj, function(cvf) { + if (type == "TC") { + ydecomp <- list(R=R[-cvf$fold], eta=eta[-cvf$fold], Z=Z[-cvf$fold]) + } else { + ydecomp <- list(R=R, Z=Z[-cvf$fold], Vd=Vdelta[-cvf$fold], V2=V2[-cvf$fold], C=C) # C correct? + } + truncationRegion(cvf, ydecomp, type) + }))) + intervallist <- c(intervallist, + lapply(obj$cvquad, function(cvquad) { + if (type == "TC") { + etacvquad <- t(eta) %*% cvquad + A <- etacvquad %*% eta + B <- 2 * etacvquad %*% Z + C <- t(Z) %*% cvquad %*% Z + quadratic_roots(A, B, C, tol = 1e-15) + } else { + + zcvquad <- t(Z) %*% cvquad + vdcvquad <- t(Vdelta) %*% cvquad + v2cvquad <- t(V2) %*% cvquad + x0 <- zcvquad %*% Z + x1 <- 2*R*zcvquad %*% Vdelta + x2 <- 2*R*zcvquad %*% V2 + x12 <- 2*R^2*vdcvquad %*% V2 + x11 <- R^2*vdcvquad %*% Vdelta + x22 <- R^2*v2cvquad %*% V2 + TF_roots(R, C, coeffs = list(x0=x0, x1=x1, x2=x2, x12=x12, x11=x11, x22=x22)) + } + })) + } + + # Additional constraints from AIC stopping + if (attr(obj, "stopped")) { + aicintervals <- vector("list", maxsteps) + aicstop <- attr(obj, "aicstop") + if (type == "TC") { + pen0 <- k * obj$intercept + aic.begin <- aic.last <- sum(obj$y^2)/sigma^2 - n + k * obj$intercept + } else { + pen0 <- exp(k * (1+obj$intercept)/n) + aic.begin <- n*(log(2*pi) + log(mean(obj$y^2))) + k * (1 + n + obj$intercept) + } + AICs <- c(aic.begin, obj$AIC) + + ulist <- c(list(matrix(0, n, 1)), obj$maxprojs) + penlist <- c(pen0, obj$maxpens) + zlist <- vector("list", maxsteps+1) + zlist[[1]] <- zlist[[2]] <- Z + if (type == "TC") { + etalist <- vector("list", maxsteps+1) + etalist[[1]] <- etalist[[2]] <- eta + } else { + vdlist <- v2list <- vector("list", maxsteps+1) + vdlist[[1]] <- vdlist[[2]] <- Vdelta + v2list[[1]] <- v2list[[2]] <- V2 + } + if (maxsteps > 1) { + for (step in 1:(maxsteps-1)) { + cproj <- obj$cumprojs[[step]] + zlist[[step+2]] <- cproj %*% Z + if (type == "TC") { + etalist[[step+2]] <- cproj %*% eta + } else { + vdlist[[step+2]] <- cproj %*% Vdelta + v2list[[step+2]] <- cproj %*% V2 + } + } + } + + for (step in 1:maxsteps) { + # Compare AIC at s+1 to AIC at s + # roots() functions assume g indexes smaller AIC + # this is step+1 until the last step + peng <- penlist[[step+1]] + Ug <- ulist[[step+1]] + Uh <- ulist[[step]] + Zg <- zlist[[step+1]] + Zh <- zlist[[step]] + + if (type == "TC") { + penh <- 0 + etag <- etalist[[step+1]] + etah <- etalist[[step]] + coeffs <- quadratic_coefficients(obj$sigma, Ug, Uh, peng, penh, etag, etah, Zg, Zh) + + if (AICs[step] < AICs[step+1]) { + coeffs <- lapply(coeffs, function(coeff) -coeff) + } + + intstep <- quadratic_roots(coeffs$A, coeffs$B, coeffs$C, tol = 1e-15) + + } else { + penh <- 1 + Vdg <- vdlist[[step+1]] + Vdh <- vdlist[[step]] + V2g <- v2list[[step+1]] + V2h <- v2list[[step]] + coeffs <- TF_coefficients(R, Ug, Uh, peng, penh, Zg, Zh, Vdg, Vdh, V2g, V2h) + + if (AICs[step] < AICs[step+1]) { + coeffs <- lapply(coeffs, function(coeff) -coeff) + } + + intstep <- TF_roots(R, C, coeffs) + } + + aicintervals[[step]] <- intstep + } + intervallist <- c(intervallist, aicintervals) + } # Compute intersection: - Lunion <- do.call(interval_union, L) - Lunion <- interval_union(Lunion, Intervals(c(-Inf,0))) - E <- interval_complement(Lunion, check_valid = FALSE) + region <- do.call(interval_union, intervallist) + region <- interval_union(region, Intervals(c(-Inf,0))) + E <- interval_complement(region, check_valid = FALSE) + + if (length(E) == 0) { + stop(paste("Empty support at step", j)) + } supports[[j]] <- E # E is now potentially a union of intervals - if (length(E) == 0) stop("Trivial intersection") + if (type == "TC") { + pvals[j] <- TC_surv(TC, sigma, dfi, E) + } else { + # write TF_surv function first + pvals[j] <- TF_surv(TF, df1, df2, E) + } + + } + + if (any(is.nan(pvals))) { + nanp <- which(is.nan(pvals)) + pvals[nanp] <- 0 + warning(paste0("P-value NaNs of the form 0/0 converted to 0 for group(s) ", paste(obj$action[nanp], collapse=","), ". This typically occurs for numerical reasons in the presence of a large signal-to-noise ratio.")) + } + + names(pvals) <- obj$action + out <- list(vars = obj$action, pv=pvals) + if (type == "TC") { + out$TC <- Tstats + out$sigma <- sigma + } else { + out$TF <- Tstats + out$df2 <- df2 + } + out$df <- dfs + out$support <- supports + class(out) <- "groupfsInf" + if (!is.null(attr(obj, "varnames"))) { + attr(out, "varnames") <- attr(obj, "varnames") + } + return(out) +} + +# ----------------------------------------------------------- + +TC_surv <- function(TC, sigma, df, E) { + if (length(E) == 0) { + stop("Empty TC support") + } # Sum truncated cdf over each part of E denom <- do.call(sum, lapply(1:nrow(E), function(v) { @@ -312,27 +544,12 @@ groupfsInf <- function(obj, sigma = NULL, projs = NULL, verbose = FALSE) { # Survival function value <- numer/denom - if (is.nan(value)) { - value <- 0 - nanconv <- TRUE - } # Force p-value to lie in the [0,1] interval # in case of numerical issues value <- max(0, min(1, value)) - pvals[j] <- value - } - if (nanconv) warning("P-value NaNs of the form 0/0 converted to 0. This typically occurs for numerical reasons in the presence of a large signal-to-noise ratio.") - names(pvals) <- obj$action - out <- list(vars = active, pv=pvals, sigma=sigma, TC=TCs, df = dfs, support=supports) - class(out) <- "groupfsInf" - if (!is.null(attr(obj, "varnames"))) { - attr(out, "varnames") <- attr(obj, "varnames") - } - invisible(out) + value } -# ----------------------------------------------------------- - tchi_interval <- function(lower, upper, sigma, df) { a <- (lower/sigma)^2 b <- (upper/sigma)^2 @@ -353,15 +570,79 @@ num_int_chi <- function(a, b, df, nsamp = 10000) { return((b-a)*mean(integrand)) } +TF_surv <- function(TF, df1, df2, E) { + if (length(E) == 0) { + stop("Empty TF support") + } + + # Sum truncated cdf over each part of E + denom <- do.call(sum, lapply(1:nrow(E), function(v) { + TF_interval(E[v,1], E[v,2], df1, df2) + })) + + # Sum truncated cdf from observed value to max of + # truncation region + numer <- do.call(sum, lapply(1:nrow(E), function(v) { + lower <- E[v,1] + upper <- E[v,2] + if (upper > TF) { + # Observed value is left of this interval's right endpoint + if (lower < TF) { + # Observed value is in this interval + return(TF_interval(TF, upper, df1, df2)) + } else { + # Observed value is not in this interval + return(TF_interval(lower, upper, df1, df2)) + } + } else { + # Observed value is right of this entire interval + return(0) + } + })) + + # Survival function + value <- numer/denom + # Force p-value to lie in the [0,1] interval + # in case of numerical issues + #value <- max(0, min(1, value)) + value +} + +TF_interval <- function(lower, upper, df1, df2) { + a <- lower + b <- upper + if (b == Inf) { + integral <- pf(a, df1, df2, lower.tail = FALSE) + } else { + integral <- pf(b, df1, df2) - pf(a, df1, df2) + } + if ((integral < .Machine$double.eps) && (b < Inf)) { + integral <- num_int_F(a, b, df1, df2) + } + return(integral) +} + +num_int_F <- function(a, b, df1, df2, nsamp = 10000) { + grid <- seq(from=a, to=b, length.out=nsamp) + integrand <- df(grid, df1, df2) + return((b-a)*mean(integrand)) +} #' Center and scale design matrix by groups #' +#' For internal use by \code{\link{groupfs}}. +#' #' @param x Design matrix. #' @param index Group membership indicator of length p. #' @param center Center groups, default is TRUE. -#' @param scale Scale groups by Frobenius norm, default is TRUE. -#' @return Scaled design matrix -scaleGroups <- function(x, index, center = TRUE, scale = TRUE) { +#' @param normalize Scale groups by Frobenius norm, default is TRUE. +#' @return +#' \describe{ +#' \item{x}{Optionally centered/scaled design matrix.} +#' \item{xm}{Means of groups in original design matrix.} +#' \item{xs}{Frobenius norms of groups in original design matrix.} +#' } +scaleGroups <- function(x, index, center = TRUE, normalize = TRUE) { keys <- unique(index) xm <- rep(0, ncol(x)) xs <- rep(1, ncol(x)) @@ -377,7 +658,7 @@ scaleGroups <- function(x, index, center = TRUE, scale = TRUE) { xsj <- sqrt(normsq) xs[inds] <- xsj if (xsj > 0) { - if (scale) x[, inds] <- x[, inds] / xsj + if (normalize) x[, inds] <- x[, inds] / xsj } else { stop(paste("Design matrix contains identically zero group of variables:", j)) } @@ -386,8 +667,8 @@ scaleGroups <- function(x, index, center = TRUE, scale = TRUE) { } #' Expand a data frame with factors to form a design matrix with the full binary encoding of each factor. -#' -#' When using \code{\link{groupfs}} with factor variables call this function first to create a design matrix. +#' +#' When using \code{\link{groupfs}} with factor variables call this function first to create a design matrix. #' #' @param df Data frame containing some columns which are \code{factors}. #' @return List containing @@ -403,33 +684,32 @@ scaleGroups <- function(x, index, center = TRUE, scale = TRUE) { #' pvals = groupfsInf(fit) #' } factorDesign <- function(df) { - factor.inds <- sapply(df[1,], is.factor) + factor.inds <- sapply(df[1, ], is.factor) factor.labels <- which(factor.inds) nfacs <- sum(factor.inds) - nlevs <- sapply(df[1,factor.inds], function(fac) nlevels(fac)) + nlevs <- sapply(df[1, factor.inds], function(fac) nlevels(fac)) totnlevs <- sum(nlevs) num.num = indcounter = ncol(df) - nfacs - x <- matrix(nrow=nrow(df), ncol = totnlevs + num.num) + x <- matrix(NA_real_, nrow = nrow(df), ncol = totnlevs + num.num) colnames(x) <- 1:ncol(x) index <- integer(ncol(x)) - varnames <- character(ncol(df)) + if (num.num > 0) { - x[,1:num.num] <- df[, !factor.inds] - varnames[1:num.num] = colnames(x)[1:num.num] <- colnames(df)[1:num.num] + x[, 1:num.num] <- as.matrix(df[, !factor.inds, drop = FALSE]) + colnames(x)[1:num.num] <- colnames(df)[!factor.inds] index[1:num.num] <- 1:num.num - indcounter <- indcounter + num.num - 1 } + for (j in 1:nfacs) { - submat <- model.matrix(~ df[, factor.labels[j]] - 1) - indcounter <- indcounter+1 - submatinds <- indcounter:(indcounter+nlevs[j]-1) + submat <- model.matrix(~df[, factor.labels[j]] - 1) + indcounter <- indcounter + 1 + submatinds <- indcounter:(indcounter + nlevs[j] - 1) indcounter <- indcounter + nlevs[j] - 1 - colnames(x)[submatinds] <- paste0(colnames(df)[num.num + j], ":", 1:nlevs[j]) - varnames[num.num + j] <- colnames(df)[num.num + j] - x[,submatinds] <- submat + colnames(x)[submatinds] <- paste0(colnames(df)[factor.inds][j], ":", 1:nlevs[j]) + x[, submatinds] <- submat index[submatinds] <- num.num + j } - attr(x, "varnames") <- varnames + attr(x, "varnames") <- c(colnames(df)[!factor.inds], colnames(df)[factor.inds]) return(list(x = x, index = index)) } @@ -450,23 +730,58 @@ print.groupfs <- function(x, ...) { action <- x$action vnames <- attr(x, "varnames") if (length(vnames) > 0) action <- vnames[action] - tab = data.frame(Group = action, Rank = x$log$df, RSS = round(x$log$RSS, 3)) + tab = data.frame(Group = action, Rank = x$log$df, RSS = round(x$log$RSS, 3), AIC = round(x$log$AIC, 3)) rownames(tab) = 1:nsteps print(tab) cat("\nUse groupfsInf() to compute P-values\n") invisible() } + +coef.groupfs <- function(object, ...) { + return(object$coefficients) +} + +#' @name predict.groupfs +#' @aliases predict.groupfs +#' @aliases coef.groupfs +#' +#' @title Prediction and coefficient functions for \code{\link{groupfs}}. +#' +#' Make predictions or extract coefficients from a groupfs forward stepwise object. +#' +#' @param object Object returned by a call to \code{\link{groupfs}}. +#' @param newx Matrix of x values at which the predictions are desired. If NULL, the x values from groupfs fitting are used. +#' @return A vector of predictions or a vector of coefficients. +predict.groupfs <- function(object, newx) { + beta <- coef.groupfs(object) + if (missing(newx)) { + newx = object$x + } else { + newx <- scaleGroups(newx, object$index, attr(object, "center"), attr(object, "normalize"))$x + } + return(newx[, object$index %in% object$action] %*% beta + ifelse(object$intercept, object$by, 0)) +} + print.groupfsInf <- function(x, ...) { - cat(sprintf("\nStandard deviation of noise (specified or estimated) sigma = %0.3f\n", x$sigma)) + if (!is.null(x$sigma)) { + isTF <- FALSE + Tstat <- x$TC + cat(sprintf("\nStandard deviation of noise (specified or estimated) sigma = %0.3f\n", x$sigma)) + } else { + isTF <- TRUE + Tstat <- x$TF + } action <- x$vars vnames <- attr(x, "varnames") if (length(vnames) > 0) action <- vnames[action] - tab = data.frame(Group = action, Pvalue = round(x$pv, 3), Tchi = round(x$TC, 3), + tab = data.frame(Group = action, Pvalue = round(x$pv, 3), + TC = round(Tstat, 3), df = x$df, Size = round(unlist(lapply(lapply(x$support, size), sum)), 3), Ints = unlist(lapply(x$support, nrow)), Min =round(unlist(lapply(x$support, min)), 3), Max = round(unlist(lapply(x$support, max)), 3)) rownames(tab) = 1:length(x$vars) + if (isTF) names(tab)[3] <- "TF" print(tab) cat("\nInts is the number of intervals in the truncated chi selection region and Size is the sum of their lengths. Min and Max are the lowest and highest endpoints of the truncation region. No confidence intervals are reported by groupfsInf.\n") invisible() diff --git a/selectiveInference/R/funs.inf.R b/selectiveInference/R/funs.inf.R index 6a48708..0688c1f 100644 --- a/selectiveInference/R/funs.inf.R +++ b/selectiveInference/R/funs.inf.R @@ -1,48 +1,3 @@ -# Main p-value function - -poly.pval <- function(y, G, u, v, sigma, bits=NULL) { - z = sum(v*y) - vv = sum(v^2) - sd = sigma*sqrt(vv) - - rho = G %*% v / vv - vec = (u - G %*% y + rho*z) / rho - vlo = suppressWarnings(max(vec[rho>0])) - vup = suppressWarnings(min(vec[rho<0])) - - pv = tnorm.surv(z,0,sd,vlo,vup,bits) - return(list(pv=pv,vlo=vlo,vup=vup)) -} - -# Main confidence interval function - -poly.int <- function(y, G, u, v, sigma, alpha, gridrange=c(-100,100), - gridpts=100, griddepth=2, flip=FALSE, bits=NULL) { - - z = sum(v*y) - vv = sum(v^2) - sd = sigma*sqrt(vv) - - rho = G %*% v / vv - vec = (u - G %*% y + rho*z) / rho - vlo = suppressWarnings(max(vec[rho>0])) - vup = suppressWarnings(min(vec[rho<0])) - - xg = seq(gridrange[1]*sd,gridrange[2]*sd,length=gridpts) - fun = function(x) { tnorm.surv(z,x,sd,vlo,vup,bits) } - - int = grid.search(xg,fun,alpha/2,1-alpha/2,gridpts,griddepth) - tailarea = c(fun(int[1]),1-fun(int[2])) - - if (flip) { - int = -int[2:1] - tailarea = tailarea[2:1] - } - - return(list(int=int,tailarea=tailarea)) -} - -############################## # Assuming that grid is in sorted order from smallest to largest, # and vals are monotonically increasing function values over the @@ -244,3 +199,103 @@ aicStop <- function(x, y, action, df, sigma, mult=2, ntimes=2) { return(list(khat=khat,G=G,u=u,aic=aic,stopped=(i 0) { + warning('Constraint not satisfied. A %*% Z should be elementwise less than or equal to b') + } + + if (is.null(Sigma)) { + Sigma = diag(rep(1, n)) + } + + # compute pvalues from poly lemma: full version from Lee et al for full matrix Sigma + + n = length(Z) + eta = matrix(eta, ncol=1, nrow=n) + b = as.vector(b) + var_estimate = sum(matrix(eta, nrow=1, ncol=n) %*% (Sigma %*% matrix(eta, ncol=1, nrow=n))) + cross_cov = Sigma %*% matrix(eta, ncol=1, nrow=n) + + resid = (diag(n) - matrix(cross_cov / var_estimate, ncol=1, nrow=n) %*% matrix(eta, nrow=1, ncol=n)) %*% Z + rho = A %*% cross_cov / var_estimate + vec = (b - as.numeric(A %*% resid)) / rho + + vlo = suppressWarnings(max(vec[rho < 0])) + vup = suppressWarnings(min(vec[rho > 0])) + + sd = sqrt(var_estimate) + return(list(vlo=vlo, vup=vup, sd=sd, estimate=target_estimate)) +} + +TG.pvalue = function(Z, A, b, eta, Sigma=NULL, null_value=0, bits=NULL) { + + limits.info = TG.limits(Z, A, b, eta, Sigma) + + return(TG.pvalue.base(limits.info, null_value=null_value, bits=bits)) +} + +TG.interval = function(Z, A, b, eta, Sigma=NULL, alpha=0.1, + gridrange=c(-100,100), + gridpts=100, + griddepth=2, + flip=FALSE, + bits=NULL) { + + limits.info = TG.limits(Z, A, b, eta, Sigma) + + return(TG.interval.base(limits.info, + alpha=alpha, + gridrange=gridrange, + griddepth=griddepth, + flip=flip, + bits=bits)) +} + +TG.interval.base = function(limits.info, alpha=0.1, + gridrange=c(-100,100), + gridpts=100, + griddepth=2, + flip=FALSE, + bits=NULL) { + + # compute sel intervals from poly lemmma, full version from Lee et al for full matrix Sigma + + param_grid = seq(gridrange[1] * limits.info$sd, gridrange[2] * limits.info$sd, length=gridpts) + + pivot = function(param) { + tnorm.surv(limits.info$estimate, param, limits.info$sd, limits.info$vlo, limits.info$vup, bits) + } + + interval = grid.search(param_grid, pivot, alpha/2, 1-alpha/2, gridpts, griddepth) + tailarea = c(pivot(interval[1]), 1- pivot(interval[2])) + + if (flip) { + interval = -interval[2:1] + tailarea = tailarea[2:1] + } + + # int is not a good variable name, synonymous with integer... + return(list(int=interval, + tailarea=tailarea)) +} + +TG.pvalue.base = function(limits.info, null_value=0, bits=NULL) { + pv = tnorm.surv(limits.info$estimate, null_value, limits.info$sd, limits.info$vlo, limits.info$vup, bits) + return(list(pv=pv, vlo=limits.info$vlo, vup=limits.info$vup, sd=limits.info$sd)) +} + + +mydiag=function(x){ + if(length(x)==1) out=x + if(length(x)>1) out=diag(x) + return(out) + } + diff --git a/selectiveInference/R/funs.lar.R b/selectiveInference/R/funs.lar.R index db4082e..0fad04f 100644 --- a/selectiveInference/R/funs.lar.R +++ b/selectiveInference/R/funs.lar.R @@ -17,7 +17,7 @@ lar <- function(x, y, maxsteps=2000, minlam=0, intercept=TRUE, normalize=TRUE, this.call = match.call() checkargs.xy(x=x,y=y) - + # Center and scale, etc. obj = standardize(x,y,intercept,normalize) x = obj$x @@ -49,7 +49,7 @@ lar <- function(x, y, maxsteps=2000, minlam=0, intercept=TRUE, normalize=TRUE, action = numeric(buf) # Actions taken df = numeric(buf) # Degrees of freedom beta = matrix(0,p,buf) # LAR estimates - + lambda[1] = hit action[1] = ihit df[1] = 0 @@ -91,14 +91,14 @@ lar <- function(x, y, maxsteps=2000, minlam=0, intercept=TRUE, normalize=TRUE, Q1 = Q[,1,drop=FALSE]; Q2 = Q[,-1,drop=FALSE] R = qr.R(obj) - + # Throughout the algorithm, we will maintain # the decomposition X1 = Q1*R. Dimenisons: # X1: n x r # Q1: n x r # Q2: n x (n-r) # R: r x r - + while (k<=maxsteps && lambda[k-1]>=minlam) { ########## # Check if we've reached the end of the buffer @@ -118,7 +118,7 @@ lar <- function(x, y, maxsteps=2000, minlam=0, intercept=TRUE, normalize=TRUE, b = backsolve(R,backsolve(R,s,transpose=TRUE)) aa = as.numeric(t(X2) %*% (y - X1 %*% a)) bb = as.numeric(t(X2) %*% (X1 %*% b)) - + # If the inactive set is empty, nothing will hit if (r==min(n-intercept,p)) hit = 0 @@ -128,9 +128,9 @@ lar <- function(x, y, maxsteps=2000, minlam=0, intercept=TRUE, normalize=TRUE, hits = aa/(shits-bb) # Make sure none of the hitting times are larger - # than the current lambda + # than the current lambda hits[hits>lambda[k-1]] = 0 - + ihit = which.max(hits) hit = hits[ihit] shit = shits[ihit] @@ -138,13 +138,13 @@ lar <- function(x, y, maxsteps=2000, minlam=0, intercept=TRUE, normalize=TRUE, # Stop if the next critical point is negative if (hit<=0) break - + # Record the critical lambda and solution lambda[k] = hit action[k] = I[ihit] df[k] = r beta[A,k] = a-hit*b - + # Gamma matrix! if (gi + 2*p > nrow(Gamma)) Gamma = rbind(Gamma,matrix(0,2*p+gbuf,n)) X2perp = X2 - X1 %*% backsolve(R,t(Q1)%*%X2) @@ -162,7 +162,7 @@ lar <- function(x, y, maxsteps=2000, minlam=0, intercept=TRUE, normalize=TRUE, crit = (t(c[,-ihit])%*%y - ratio*sum(c[,ihit]*y))/(1-ratio) mp[k] = max(max(crit[ip]),0) } - + # Update all of the variables r = r+1 A = c(A,I[ihit]) @@ -176,12 +176,12 @@ lar <- function(x, y, maxsteps=2000, minlam=0, intercept=TRUE, normalize=TRUE, Q1 = obj$Q1 Q2 = obj$Q2 R = obj$R - + if (verbose) { cat(sprintf("\n%i. lambda=%.3f, adding variable %i, |A|=%i...", k,hit,A[r],r)) } - + # Step counter k = k+1 } @@ -195,7 +195,7 @@ lar <- function(x, y, maxsteps=2000, minlam=0, intercept=TRUE, normalize=TRUE, nk = nk[Seq(1,k-1)] mp = mp[Seq(1,k-1)] vreg = vreg[Seq(1,k-1),,drop=FALSE] - + # If we reached the maximum number of steps if (k>maxsteps) { if (verbose) { @@ -215,11 +215,11 @@ lar <- function(x, y, maxsteps=2000, minlam=0, intercept=TRUE, normalize=TRUE, completepath = FALSE bls = NULL } - + # Otherwise, note that we completed the path else { completepath = TRUE - + # Record the least squares solution. Note that # we have already computed this bls = rep(0,p) @@ -227,19 +227,19 @@ lar <- function(x, y, maxsteps=2000, minlam=0, intercept=TRUE, normalize=TRUE, } if (verbose) cat("\n") - + # Adjust for the effect of centering and scaling if (intercept) df = df+1 if (normalize) beta = beta/sx if (normalize && completepath) bls = bls/sx - + # Assign column names colnames(beta) = as.character(round(lambda,3)) out = list(lambda=lambda,action=action,sign=s,df=df,beta=beta, completepath=completepath,bls=bls, Gamma=Gamma,nk=nk,vreg=vreg,mp=mp,x=x,y=y,bx=bx,by=by,sx=sx, - intercept=intercept,normalize=normalize,call=this.call) + intercept=intercept,normalize=normalize,call=this.call) class(out) = "lar" return(out) } @@ -253,15 +253,8 @@ lar <- function(x, y, maxsteps=2000, minlam=0, intercept=TRUE, normalize=TRUE, downdateQR <- function(Q1,Q2,R,col) { m = nrow(Q1) n = ncol(Q1) - - a = .C("downdate1", - Q1=as.double(Q1), - R=as.double(R), - col=as.integer(col-1), - m=as.integer(m), - n=as.integer(n), - dup=FALSE, - package="selectiveInference") + + a = downdate1_(as.matrix(Q1), R, col, m, n) # Rcpp call Q1 = matrix(a$Q1,nrow=m) R = matrix(a$R,nrow=n) @@ -275,35 +268,6 @@ downdateQR <- function(Q1,Q2,R,col) { return(list(Q1=Q1,Q2=Q2,R=R)) } -# Update the QR factorization, after a column has been -# added. Here Q1 is m x n, Q2 is m x k, and R is n x n. - -updateQR <- function(Q1,Q2,R,col) { - m = nrow(Q1) - n = ncol(Q1) - k = ncol(Q2) - - a = .C("update1", - Q2=as.double(Q2), - w=as.double(t(Q2)%*%col), - m=as.integer(m), - k=as.integer(k), - dup=FALSE, - package="selectiveInference") - - Q2 = matrix(a$Q2,nrow=m) - w = c(t(Q1)%*%col,a$w) - - # Re-structure: delete a column from Q2, add one to - # Q1, and expand R - Q1 = cbind(Q1,Q2[,1]) - Q2 = Q2[,-1,drop=FALSE] - R = rbind(R,rep(0,n)) - R = cbind(R,w[Seq(1,n+1)]) - - return(list(Q1=Q1,Q2=Q2,R=R)) -} - ############################## # Coefficient function for lar @@ -320,18 +284,18 @@ coef.lar <- function(object, s, mode=c("step","lambda"), ...) { lambda = object$lambda beta = object$beta } - + if (mode=="step") { if (min(s)<0 || max(s)>k) stop(sprintf("s must be between 0 and %i",k)) knots = 1:k - dec = FALSE + decreasing = FALSE } else { if (min(s)= %0.3f",min(lambda))) knots = lambda - dec = TRUE + decreasing = TRUE } - - return(coef.interpolate(beta,s,knots,dec)) + + return(coef.interpolate(beta,s,knots,decreasing)) } # Prediction function for lar @@ -339,7 +303,10 @@ coef.lar <- function(object, s, mode=c("step","lambda"), ...) { predict.lar <- function(object, newx, s, mode=c("step","lambda"), ...) { beta = coef.lar(object,s,mode) if (missing(newx)) newx = scale(object$x,FALSE,1/object$sx) - else newx = scale(newx,object$bx,FALSE) + else { + newx = matrix(newx,ncol=ncol(object$x)) + newx = scale(newx,object$bx,FALSE) + } return(newx %*% beta + object$by) } @@ -350,9 +317,9 @@ predict.lasso <- predict.lar # Lar inference function -larInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","aic"), +larInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","aic"), gridrange=c(-100,100), bits=NULL, mult=2, ntimes=2, verbose=FALSE) { - + this.call = match.call() type = match.arg(type) checkargs.misc(sigma=sigma,alpha=alpha,k=k, @@ -364,7 +331,7 @@ larInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","ai warning("Package Rmpfr is not installed, reverting to standard precision") bits = NULL } - + k = min(k,length(obj$action)) # Round to last step x = obj$x y = obj$y @@ -385,11 +352,11 @@ larInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","ai "you may want to use the estimateSigma function")) } } - + pv.spacing = pv.modspac = pv.covtest = khat = NULL - + if (type == "active") { - pv = vlo = vup = numeric(k) + pv = vlo = vup = numeric(k) vmat = matrix(0,k,n) ci = tailarea = matrix(0,k,2) pv.spacing = pv.modspac = pv.covtest = numeric(k) @@ -399,24 +366,29 @@ larInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","ai for (j in 1:k) { if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j])) - - Gj = G[1:nk[j],] - uj = rep(0,nk[j]) + + Aj = -G[1:nk[j],] + bj = -rep(0,nk[j]) vj = vreg[j,] mj = sqrt(sum(vj^2)) vj = vj / mj # Standardize (divide by norm of vj) - a = poly.pval(y,Gj,uj,vj,sigma,bits) + + limits.info = TG.limits(y, Aj, bj, vj, Sigma=diag(rep(sigma^2, n))) + a = TG.pvalue.base(limits.info, bits=bits) pv[j] = a$pv sxj = sx[vars[j]] vlo[j] = a$vlo * mj / sxj # Unstandardize (mult by norm of vj / sxj) vup[j] = a$vup * mj / sxj # Unstandardize (mult by norm of vj) vmat[j,] = vj * mj / sxj # Unstandardize (mult by norm of vj / sxj) - - a = poly.int(y,Gj,uj,vj,sigma,alpha,gridrange=gridrange, - flip=(sign[j]==-1),bits=bits) - ci[j,] = a$int * mj / sxj # Unstandardize (mult by norm of vj / sxj) + + a = TG.interval.base(limits.info, + alpha=alpha, + gridrange=gridrange, + flip=(sign[j]==-1), + bits=bits) + ci[j,] = a$int * mj / sxj # Unstandardize (mult by norm of vj / sxj) tailarea[j,] = a$tailarea - + pv.spacing[j] = spacing.pval(obj,sigma,j) pv.modspac[j] = modspac.pval(obj,sigma,j) pv.covtest[j] = covtest.pval(obj,sigma,j) @@ -424,7 +396,7 @@ larInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","ai khat = forwardStop(pv,alpha) } - + else { if (type == "aic") { out = aicStop(x,y,obj$action[1:k],obj$df[1:k],sigma,mult,ntimes) @@ -439,40 +411,45 @@ larInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","ai u = rep(0,nk[k]) kk = k } - - pv = vlo = vup = numeric(kk) + + pv = vlo = vup = numeric(kk) vmat = matrix(0,kk,n) ci = tailarea = matrix(0,kk,2) sign = numeric(kk) vars = obj$action[1:kk] xa = x[,vars] M = pinv(crossprod(xa)) %*% t(xa) - + for (j in 1:kk) { if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j])) - + vj = M[j,] mj = sqrt(sum(vj^2)) vj = vj / mj # Standardize (divide by norm of vj) - sign[j] = sign(sum(vj*y)) + sign[j] = sign(sum(vj*y)) vj = sign[j] * vj - Gj = rbind(G,vj) - uj = c(u,0) + Aj = -rbind(G,vj) + bj = -c(u,0) + + limits.info = TG.limits(y, Aj, bj, vj, Sigma=diag(rep(sigma^2, n))) + a = TG.pvalue.base(limits.info, bits=bits) - a = poly.pval(y,Gj,uj,vj,sigma,bits) pv[j] = a$pv sxj = sx[vars[j]] vlo[j] = a$vlo * mj / sxj # Unstandardize (mult by norm of vj / sxj) vup[j] = a$vup * mj / sxj # Unstandardize (mult by norm of vj / sxj) vmat[j,] = vj * mj / sxj # Unstandardize (mult by norm of vj / sxj) - a = poly.int(y,Gj,uj,vj,sigma,alpha,gridrange=gridrange, - flip=(sign[j]==-1),bits=bits) + a = TG.interval.base(limits.info, + alpha=alpha, + gridrange=gridrange, + flip=(sign[j]==-1), + bits=bits) ci[j,] = a$int * mj / sxj # Unstandardize (mult by norm of vj / sxj) tailarea[j,] = a$tailarea } } - + out = list(type=type,k=k,khat=khat,pv=pv,ci=ci, tailarea=tailarea,vlo=vlo,vup=vup,vmat=vmat,y=y, pv.spacing=pv.spacing,pv.modspac=pv.modspac,pv.covtest=pv.covtest, @@ -488,10 +465,10 @@ spacing.pval <- function(obj, sigma, k) { v = obj$Gamma[obj$nk[k],] sd = sigma*sqrt(sum(v^2)) a = obj$mp[k] - + if (k==1) b = Inf else b = obj$lambda[k-1] - + return(tnorm.surv(obj$lambda[k],0,sd,a,b)) } @@ -505,7 +482,7 @@ modspac.pval <- function(obj, sigma, k) { warning(sprintf("Modified spacing p-values at step %i require %i steps of the lar path",k,k+1)) return(NA) } - + if (k==1) b = Inf else b = obj$lambda[k-1] @@ -543,7 +520,7 @@ covtest.pval <- function(obj, sigma, k) { print.lar <- function(x, ...) { cat("\nCall:\n") dput(x$call) - + cat("\nSequence of LAR moves:\n") nsteps = length(x$action) tab = cbind(1:nsteps,x$action,x$sign) @@ -566,7 +543,7 @@ print.larInf <- function(x, tailarea=TRUE, ...) { tab = cbind(1:length(x$pv),x$vars, round(x$sign*x$vmat%*%x$y,3), round(x$sign*x$vmat%*%x$y/(x$sigma*sqrt(rowSums(x$vmat^2))),3), - round(x$pv,3),round(x$ci,3),round(x$pv.spacing,3),round(x$pv.cov,3)) + round(x$pv,3),round(x$ci,3),round(x$pv.spacing,3),round(x$pv.cov,3)) colnames(tab) = c("Step", "Var", "Coef", "Z-score", "P-value", "LowConfPt", "UpConfPt", "Spacing", "CovTest") if (tailarea) { @@ -609,7 +586,7 @@ print.larInf <- function(x, tailarea=TRUE, ...) { } rownames(tab) = rep("",nrow(tab)) print(tab) - + cat(sprintf("\nEstimated stopping point from AIC rule = %i\n",x$khat)) } @@ -618,7 +595,7 @@ print.larInf <- function(x, tailarea=TRUE, ...) { plot.lar <- function(x, xvar=c("norm","step","lambda"), breaks=TRUE, omit.zeros=TRUE, var.labels=TRUE, ...) { - + if (x$completepath) { k = length(x$action)+1 lambda = c(x$lambda,0) @@ -629,32 +606,33 @@ plot.lar <- function(x, xvar=c("norm","step","lambda"), breaks=TRUE, beta = x$beta } p = nrow(beta) - + xvar = match.arg(xvar) if (xvar=="norm") { - x = colSums(abs(beta)) + xx = colSums(abs(beta)) xlab = "L1 norm" } else if (xvar=="step") { - x = 1:k + xx = 1:k xlab = "Step" } else { - x = lambda + xx = lambda xlab = "Lambda" } if (omit.zeros) { - inds = matrix(FALSE,p,k) - for (i in 1:k) { - inds[i,] = beta[i,]!=0 | c(diff(beta[i,]!=0),F) | c(F,diff(beta[i,]!=0)) - } - beta[!inds] = NA + good.inds = matrix(FALSE,p,k) + good.inds[beta!=0] = TRUE + changes = t(apply(beta,1,diff))!=0 + good.inds[cbind(changes,rep(F,p))] = TRUE + good.inds[cbind(rep(F,p),changes)] = TRUE + beta[!good.inds] = NA } - plot(c(),c(),xlim=range(x,na.rm=T),ylim=range(beta,na.rm=T), - xlab=xlab,ylab="Coefficients",main="LAR path",...) + plot(c(),c(),xlim=range(xx,na.rm=T),ylim=range(beta,na.rm=T), + xlab=xlab,ylab="Coefficients",main="Least angle regression path",...) abline(h=0,lwd=2) - matplot(x,t(beta),type="l",lty=1,add=TRUE) - if (breaks) abline(v=x,lty=2) - if (var.labels) axis(4,at=beta[,k],labels=1:p,cex=0.8,adj=0) + matplot(xx,t(beta),type="l",lty=1,add=TRUE) + if (breaks) abline(v=xx,lty=2) + if (var.labels) axis(4,at=beta[,k],labels=1:p,cex=0.8,adj=0) invisible() } diff --git a/selectiveInference/R/funs.quadratic.R b/selectiveInference/R/funs.quadratic.R index 1aff7ec..799352b 100644 --- a/selectiveInference/R/funs.quadratic.R +++ b/selectiveInference/R/funs.quadratic.R @@ -1,20 +1,35 @@ +truncationRegion <- function(obj, ydecomp, type, tol = 1e-15) { -interval_groupfs <- function(obj, TC, R, eta, Ugtilde, tol = 1e-15) { - - Z <- obj$y - eta * TC n <- nrow(obj$x) - + Z <- ydecomp$Z + if (type == "TC") { + eta <- ydecomp$eta + } else { + Vd <- ydecomp$Vd + V2 <- ydecomp$V2 + C <- ydecomp$C + R <- ydecomp$R + } L <- lapply(1:length(obj$action), function(s) { Ug <- obj$maxprojs[[s]] - dfg <- ncol(Ug) - + peng <- obj$maxpens[[s]] if (s > 1) { - etas <- obj$cumprojs[[s-1]] %*% eta Zs <- obj$cumprojs[[s-1]] %*% Z + if (type == "TC") { + etas <- obj$cumprojs[[s-1]] %*% eta + } else { + Vds <- obj$cumprojs[[s-1]] %*% Vd + V2s <- obj$cumprojs[[s-1]] %*% V2 + } } else { - etas <- eta Zs <- Z + if (type == "TC") { + etas <- eta + } else { + Vds <- Vd + V2s <- V2 + } } num.projs <- length(obj$projections[[s]]) @@ -23,145 +38,191 @@ interval_groupfs <- function(obj, TC, R, eta, Ugtilde, tol = 1e-15) { } else { lapply(1:num.projs, function(l) { - Uh <- obj$projections[[s]][[l]] - dfh <- ncol(Uh) - # The quadratic form corresponding to - # (t*U + Z)^T %*% Q %*% (t*U + Z) \geq 0 - # we find the roots in t, if there are any - # and return the interval of potential t - - Uheta <- t(Uh) %*% etas - Ugeta <- t(Ug) %*% etas - UhZ <- t(Uh) %*% Zs - UgZ <- t(Ug) %*% Zs - etasZs <- t(etas) %*% Zs - peng <- obj$maxpens[[s]] - penh <- obj$aicpens[[s]][[l]] - pendiff <- peng-penh - if (is.null(obj$sigma)) { - A <- sum(Ugeta^2) * peng - sum(Uheta^2) * penh - sum(etas^2) * pendiff - B <- 2 * as.numeric(t(Ugeta) %*% UgZ * peng - t(Uheta) %*% UhZ * penh - etasZs * pendiff) - C <- sum(UgZ^2) * peng - sum(UhZ^2) * penh - sum(Zs^2) * pendiff - } else { - # Check this - A <- sum(Ugeta^2) - sum(Uheta^2) - B <- 2 * as.numeric(t(Ugeta) %*% UgZ - t(Uheta) %*% UhZ) - C <- sum(UgZ^2) - sum(UhZ^2) - pendiff - } - - disc <- B^2 - 4*A*C - b2a <- -B/(2*A) - - if (disc > tol) { + Uh <- obj$projections[[s]][[l]] + penh <- obj$aicpens[[s]][[l]] + # The quadratic form corresponding to + # (t*U + Z)^T %*% Q %*% (t*U + Z) \geq 0 + # we find the roots in t, if there are any + # and return the interval of potential t + if (type == "TC") { + coeffs <- quadratic_coefficients(obj$sigma, Ug, Uh, peng, penh, etas, etas, Zs, Zs) + quadratic_roots(coeffs$A, coeffs$B, coeffs$C, tol) + } else { + coeffs <- TF_coefficients(R, Ug, Uh, peng, penh, Zs, Zs, Vds, Vds, V2s, V2s) + roots <- TF_roots(R, C, coeffs) + return(roots) + } + }) + } + # LL is a list of intervals + }) + # L is now a list of lists of intervals + return(unlist(L, recursive = FALSE, use.names = FALSE)) +} + +quadratic_coefficients <- function(sigma, Ug, Uh, peng, penh, etag, etah, Zg, Zh) { + # g indexes minimizer, h the comparison + Uheta <- t(Uh) %*% etah + Ugeta <- t(Ug) %*% etag + UhZ <- t(Uh) %*% Zh + UgZ <- t(Ug) %*% Zg + etaZh <- t(etah) %*% Zh + etaZg <- t(etag) %*% Zg + if (is.null(sigma)) { + A <- penh * (sum(etah^2) - sum(Uheta^2)) - peng * (sum(etag^2) - sum(Ugeta^2)) + B <- 2 * penh * (etaZh - t(Uheta) %*% UhZ) - 2 * peng * (etaZg - t(Ugeta) %*% UgZ) + C <- penh * (sum(Zh^2) - sum(UhZ^2)) - peng * (sum(Zg^2) - sum(UgZ^2)) + } else { + A <- (sum(etah^2) - sum(Uheta^2)) - (sum(etag^2) - sum(Ugeta^2)) + B <- 2 * (etaZh - t(Uheta) %*% UhZ) - 2 * (etaZg - t(Ugeta) %*% UgZ) + C <- (sum(Zh^2) - sum(UhZ^2) + penh) - (sum(Zg^2) - sum(UgZ^2) + peng) + } + return(list(A = A, B = B, C = C)) +} + +quadratic_roots <- function(A, B, C, tol) { + disc <- B^2 - 4*A*C + b2a <- -B/(2*A) + + if (disc > tol) { # Real roots pm <- sqrt(disc)/(2*A) endpoints <- sort(c(b2a - pm, b2a + pm)) - } else { - + } else { # No real roots if (A > -tol) { # Quadratic form always positive - return(Intervals(c(-Inf,0))) + return(Intervals(c(-Inf,0))) } else { # Quadratic form always negative - stop(paste("Empty TC support is infeasible", s, "-", l)) + stop("Empty TC support is infeasible") } - } + } - if (A > tol) { + if (A > tol) { # Parabola opens upward if (min(endpoints) > 0) { # Both roots positive, union of intervals - return(Intervals(rbind(c(-Inf,0), endpoints))) + return(Intervals(rbind(c(-Inf,0), endpoints))) } else { # At least one negative root - return(Intervals(c(-Inf, max(0, endpoints[2])))) + return(Intervals(c(-Inf, max(0, endpoints[2])))) } - } else { + } else { if (A < -tol) { # Parabola opens downward - if (endpoints[2] < 0) { + if (endpoints[2] < 0) { # Positive quadratic form only when t negative - stop(paste("Negative TC support is infeasible", s, "-", l)) - } else { - # Part which is positive - if (endpoints[1] > 0) { - return(Intervals(rbind(c(-Inf, endpoints[1]), c(endpoints[2], Inf)))) + stop("Negative TC support is infeasible") } else { - return(Intervals(c(endpoints[2], Inf))) + # Part which is positive + if (endpoints[1] > 0) { + return(Intervals(rbind(c(-Inf, endpoints[1]), c(endpoints[2], Inf)))) + } else { + return(Intervals(c(endpoints[2], Inf))) + } } - } } else { # a is too close to 0, quadratic is actually linear - if (abs(B) > tol) { - if (B > 0) { - return(Intervals(c(-Inf, max(0, -C/B)))) + if (abs(B) > tol) { + if (B > 0) { + return(Intervals(c(-Inf, max(0, -C/B)))) + } else { + if (-C/B < 0) stop("Infeasible linear equation") + return(Intervals(rbind(c(-Inf, 0), c(-C/B, Inf)))) + } } else { - if (-C/B < 0) stop("Error: infeasible linear equation") - return(Intervals(rbind(c(-Inf, 0), c(-C/B, Inf)))) + warning("Ill-conditioned quadratic") + return(Intervals(c(-Inf,0))) } - } else { - warning("Ill-conditioned quadratic") - return(Intervals(c(-Inf,0))) - } } - } - }) } - # LL is a list of intervals - }) - # L is now a list of lists of intervals - return(unlist(L, recursive = FALSE, use.names = FALSE)) } - +# Helper functions for TF roots roots_to_checkpoints <- function(roots) { checkpoints <- unique(sort(c(0, roots))) - return(c(0, (checkpoints + c(checkpoints[-1], 2 + checkpoints[length(checkpoints)]))/2)) + return(c(0, (checkpoints + c(checkpoints[-1], 200 + checkpoints[length(checkpoints)]))/2)) } - roots_to_partition <- function(roots) { checkpoints <- unique(sort(c(0, roots))) - return(list(endpoints = c(checkpoints, Inf), midpoints = (checkpoints + c(checkpoints[-1], 2 + checkpoints[length(checkpoints)]))/2)) + return(list(endpoints = c(checkpoints, Inf), midpoints = (checkpoints + c(checkpoints[-1], 200 + checkpoints[length(checkpoints)]))/2)) +} + +# Efficiently compute coefficients of one-dimensional TF slice function +TF_coefficients <- function(R, Ug, Uh, peng, penh, Zg, Zh, Vdg, Vdh, V2g, V2h) { + + UhZ <- t(Uh) %*% Zh + UgZ <- t(Ug) %*% Zg + UhVd <- t(Uh) %*% Vdh + UgVd <- t(Ug) %*% Vdg + UhV2 <- t(Uh) %*% V2h + UgV2 <- t(Ug) %*% V2g + VdZh <- sum(Vdh*Zh) + VdZg <- sum(Vdg*Zg) + V2Zh <- sum(V2h*Zh) + V2Zg <- sum(V2g*Zg) + + x0 <- penh * (sum(Zh^2) - sum(UhZ^2)) - peng * (sum(Zg^2) - sum(UgZ^2)) + x1 <- 2*R*(penh * (VdZh - sum(UhZ*UhVd)) - peng * (VdZg - sum(UgZ*UgVd))) + x2 <- 2*R*(penh * (V2Zh - sum(UhZ*UhV2)) - peng * (V2Zg - sum(UgZ*UgV2))) + x12 <- 2*R^2*(penh * (sum(Vdh*V2h) - sum(UhVd*UhV2)) - peng * (sum(Vdg*V2g) - sum(UgVd*UgV2))) + x11 <- R^2*(penh * (sum(Vdh^2) - sum(UhVd^2)) - peng * (sum(Vdg^2) - sum(UgVd^2))) + x22 <- R^2*(penh * (sum(V2h^2) - sum(UhV2^2)) - peng * (sum(V2g^2) - sum(UgV2^2))) + + return(list(x11=x11, x22=x22, x12=x12, x1=x1, x2=x2, x0=x0)) } -# Tchi_roots <- function(Q, a, b, ) +# Numerically solve for roots of TF slice using +# hybrid polyroot/uniroot approach -TF_roots <- function(Q, a, b, Vdelta, V2, z, C, r, tol = 1e-14) { +TF_roots <- function(R, C, coeffs, tol = 1e-8, tol2 = 1e-6) { - # z = y - R1 - VdeltaQ <- t(Vdelta) %*% Q - V2Q <- t(V2) %*% Q - x11 <- VdeltaQ %*% Vdelta - x12 <- 2 * VdeltaQ %*% V2 - x22 <- V2Q %*% V2 - x1 <- 2 * VdeltaQ %*% z + t(a) %*% Vdelta - x2 <- 2 * V2Q %*% z + t(a) %*% V2 - x0 <- t(z) %*% Q %*% z + t(a) %*% z + b + x11 <- coeffs$x11 + x22 <- coeffs$x22 + x12 <- coeffs$x12 + x1 <- coeffs$x1 + x2 <- coeffs$x2 + x0 <- coeffs$x0 - g1 <- function(t) r*sqrt(C*t/(1+C*t)) - g2 <- function(t) r/sqrt(1+C*t) + g1 <- function(t) sqrt(C*t/(1+C*t)) + g2 <- function(t) 1/sqrt(1+C*t) I <- function(t) x11*g1(t)^2 + x12*g1(t)*g2(t) + x22*g2(t)^2 + x1*g1(t) + x2*g2(t) + x0 - z4 <- r*complex(real = -x11 + x22, imaginary = -x12)/2 - z3 <- complex(real = x2, imaginary = -x1) - z2 <- complex(real = r*x11+r*x22+2*x0/r) + z4 <- complex(real = -x11 + x22, imaginary = -x12)/4 + z3 <- complex(real = x2, imaginary = -x1)/2 + z2 <- complex(real = x11/2+x22/2+x0) z1 <- Conj(z3) z0 <- Conj(z4) - zcoefs <- r*c(z0, z1, z2, z3, z4)/2 + + zcoefs <- c(z0, z1, z2, z3, z4) croots <- polyroot(zcoefs) thetas <- Arg(croots) - modinds <- Mod(croots) <= 1 + tol & Mod(croots) >= 1 - tol + # Can't specify polyroot precision :( + modinds <- Mod(croots) <= 1 + tol2 & Mod(croots) >= 1 - tol2 angleinds <- thetas >=0 & thetas <= pi/2 - roots <- unique(thetas[modinds * angleinds]) + roots <- unique(thetas[which(modinds & angleinds)]) troots <- tan(roots)^2/C - if (length(roots) == 0) { - return(list(intervals = Intervals(c(0,Inf)), I=I)) - } + checkpoints <- c() + if (length(troots) > 0) checkpoints <- roots_to_checkpoints(troots) + checkpoints <- sort( + c(checkpoints, 0, tol, tol2, + seq(from = sqrt(tol2), to = 1, length.out = 50), + seq(from = 1.2, to=50, length.out = 20), + 100, 1000, 10000)) + ## if (length(troots) == 0) { + ## # Polyroot didn't catch any roots + ## # ad-hoc check: + ## checkpoints <- c(0, tol, tol2, + ## seq(from = sqrt(tol2), to = 1, length.out = 50), + ## seq(from = 1.2, to=50, length.out = 20), + ## 100, 1000, 10000) + ## } else { + ## checkpoints <- roots_to_checkpoints(troots) + ## } - checkpoints <- roots_to_checkpoints(troots) signs <- sign(I(checkpoints)) diffs <- c(0, diff(signs)) changeinds <- which(diffs != 0) @@ -169,23 +230,28 @@ TF_roots <- function(Q, a, b, Vdelta, V2, z, C, r, tol = 1e-14) { if (length(changeinds) > 0) { roots <- unlist(lapply(changeinds, function(ind) { - uniroot(I, lower = checkpoints[ind-1], upper = checkpoints[ind])$root + uniroot(I, lower = checkpoints[ind-1], upper = checkpoints[ind], tol = tol)$root })) + partition <- roots_to_partition(roots) - positive <- which(I(partition$midpoints) > 0) + negative <- which(I(partition$midpoints) < 0) intervals <- matrix(NA, ncol=2) - for (i in 1:length(positive)) { - ind <- positive[i] - if ((i > 1) && (ind == positive[i-1] + 1)) { + for (i in 1:length(negative)) { + ind <- negative[i] + if ((i > 1) && (ind == negative[i-1] + 1)) { + # There was not a sign change at end of previous interval intervals[nrow(intervals), 2] <- partition$endpoints[ind+1] } else { intervals <- rbind(intervals, c(partition$endpoints[ind], partition$endpoints[ind+1])) } } - return(list(intervals = Intervals(intervals[-1,]), I=I)) + return(Intervals(intervals[-1,])) } - return(list(intervals = Intervals(c(0,Inf)), I=I)) + # Apparently no roots, always positive + if (I(0) < 0) stop("Infeasible constraint!") + return(Intervals(c(-Inf,0))) } + diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R new file mode 100644 index 0000000..5be7938 --- /dev/null +++ b/selectiveInference/R/funs.randomized.R @@ -0,0 +1,552 @@ +# Functions to fit and "infer" about parameters in the +# randomized LASSO +# +# min 1/2 || y - \beta_0 - X \beta ||_2^2 + \lambda || \beta ||_1 - \omega^T\beta + \frac{\epsilon}{2} \|\beta\|^2_2 + +randomizedLasso = function(X, + y, + lam, + family=c("gaussian","binomial"), + noise_scale=NULL, + ridge_term=NULL, + noise_type=c('gaussian', 'laplace'), + max_iter=100, # how many iterations for each optimization problem + kkt_tol=1.e-4, # tolerance for the KKT conditions + parameter_tol=1.e-8, # tolerance for relative convergence of parameter + objective_tol=1.e-8, # tolerance for relative decrease in objective + objective_stop=FALSE, + kkt_stop=TRUE, + parameter_stop=TRUE) +{ + family = match.arg(family) + + n = nrow(X); p = ncol(X) + + mean_diag = mean(apply(X^2, 2, sum)) + + # default ridge term + + if (is.null(ridge_term)) { + ridge_term = sqrt(mean_diag) * sd(y) / sqrt(n) + } + + # default noise level + + if (is.null(noise_scale)) { + noise_scale = 0.5 * sd(y) * sqrt(mean_diag) + } + + noise_type = match.arg(noise_type) + + if (noise_scale > 0) { + if (noise_type == 'gaussian') { + perturb_ = rnorm(p) * noise_scale + } + else if (noise_type == 'laplace') { + perturb_ = rexp(p) * (2 * rbinom(p, 1, 0.5) - 1) * noise_scale + } + } else { + perturb_ = rep(0, p) + } + + lam = as.numeric(lam) + if (length(lam) == 1) { + lam = rep(lam, p) + } + + if (length(lam) != p) { + stop("Lagrange parameter should be single float or of length ncol(X)") + } + + soln = rep(0, p) + Xsoln = rep(0, n) + linear_func = (- t(X) %*% y - perturb_) / n + + gradient = 1. * linear_func + ever_active = rep(0, p) + nactive = as.integer(0) + + result = solve_QP_wide(X, # design matrix + lam / n, # vector of Lagrange multipliers + ridge_term / n, # ridge_term + max_iter, + soln, + linear_func, + gradient, + Xsoln, + ever_active, + nactive, + kkt_tol, + objective_tol, + parameter_tol, + p, + objective_stop, # objective_stop + kkt_stop, # kkt_stop + parameter_stop) # param_stop + + sign_soln = sign(result$soln) + + unpenalized = lam == 0 + active = (!unpenalized) & (sign_soln != 0) + inactive = (!unpenalized) & (sign_soln == 0) + + unpenalized_set = which(unpenalized) + active_set = which(active) + inactive_set = which(inactive) + + # observed opt state + + observed_scalings = abs(result$soln)[active] + observed_unpen = result$soln[unpenalized] + observed_subgrad = -n*result$gradient[inactive] + + if (sum(abs(observed_subgrad)>lam[inactive]*(1.001)) > 0){ + stop("subgradient eq not satisfied") + } + + observed_opt_state = c(observed_unpen, observed_scalings, observed_subgrad) + + # affine transform for optimization variables + + E = c(unpenalized_set, active_set) + I = inactive_set + X_E = X[,E] + X_I = X[,I] + + if (length(E)==0){ + return(list(active_set=c())) + } + + if (family=="binomial"){ + unpen_reg = glm(y~X_E-1, family="binomial") + unpen_est = unpen_reg$coefficients + pi_fn = function(beta){ + temp = X_E %*% as.matrix(beta) + return(as.vector(exp(temp)/(1+exp(temp)))) # n-dimensional + } + pi_vec = pi_fn(unpen_est) + W_E = diag(pi_vec*(1-pi_vec)) + } else if (family=="gaussian"){ + W_E = diag(rep(1,n)) + } + + L_E = t(X) %*% W_E %*% X[,E] + + coef_term = L_E + + signs_ = c(rep(1, sum(unpenalized)), sign_soln[active]) + + coef_term[active,] = coef_term[active,] + ridge_term * diag(rep(1, sum(active))) # ridge term + + if (length(signs_) == 1) { + coef_term = coef_term * signs_ + } else { + coef_term = coef_term %*% diag(signs_) # scaligns are non-negative + } + + if (sum(inactive) > 0) { + subgrad_term = matrix(0, p, sum(inactive)) # for subgrad + for (i in 1:sum(inactive)) { + subgrad_term[inactive_set[i], i] = 1 + } + + linear_term = cbind(coef_term, + subgrad_term) + } else { + linear_term = coef_term + } + offset_term = rep(0, p) + offset_term[active] = lam[active] * sign_soln[active] + + opt_transform = list(linear_term=linear_term, + offset_term=offset_term) + + # affine transform for internal (data) variables + # for now just use parametric in terms of + # (\bar{\beta}_E, X_{-E}^T(y-X_E\bar{\beta}_E) + # + # we have to reconstruct -X^TY from this pair + # + + active_term = -L_E # for \bar{\beta}_E + + if (sum(inactive) > 0) { + inactive_term = -subgrad_term + linear_term = cbind(active_term, + inactive_term) + } else { + linear_term = active_term + } + + offset_term = rep(0, p) + internal_transform = list(linear_term = linear_term, + offset_term = offset_term) + + # density for sampling optimization variables + + observed_raw = -t(X) %*% y + if (family=="binomial"){ + beta_E = result$soln[active_set] + observed_raw = observed_raw + t(X)%*%pi_fn(beta_E) - L_E %*% beta_E + } + inactive_lam = lam[inactive_set] + inactive_start = sum(unpenalized) + sum(active) + active_start = sum(unpenalized) + + + # XXX only for Gaussian so far + + log_optimization_density = function(opt_state) { + + if ((sum(abs(opt_state[(inactive_start + 1):p]) > inactive_lam) > 0) || + (sum(opt_state[(active_start + 1):inactive_start] < 0) > 0)) { + return(-Inf) + } + + use_C_code = TRUE + if (!use_C_code) { + A = opt_transform$linear_term %*% opt_state + observed_raw + opt_transform$offset_term + D = -apply(A^2, 2, sum) / noise_scale^2 + } else { + D = log_density_gaussian_conditional_(noise_scale, + opt_transform$linear_term, + as.matrix(opt_state), + observed_raw + opt_transform$offset_term) + } + return(D) + } + + return(list(active_set = active_set, + inactive_set = inactive_set, + unpenalized_set = unpenalized_set, + sign_soln = sign_soln, + optimization_transform = opt_transform, + internal_transform = internal_transform, + log_optimization_density = log_optimization_density, + observed_opt_state = observed_opt_state, + observed_raw = observed_raw, + noise_scale = noise_scale, + soln = result$soln, + perturb = perturb_ + )) + +} + +sample_opt_variables = function(randomizedLASSO_obj, jump_scale, nsample=10000) { + return(MCMC(randomizedLASSO_obj$log_optimization_density, + nsample, + randomizedLASSO_obj$observed_opt_state, + acc.rate=0.2, + scale=jump_scale)) +} + +# Carry out a linear decompositon of an internal +# representation with respect to a target + +# Returns an affine transform into raw coordinates (i.e. \omega or randomization coordinates) + +linear_decomposition = function(observed_target, + observed_internal, + var_target, + cov_target_internal, + internal_transform) { + var_target = as.matrix(var_target) + if (nrow(var_target) == 1) { + nuisance = observed_internal - cov_target_internal * observed_target / var_target + target_linear = internal_transform$linear_term %*% cov_target_internal / var_target[1,1] + } else { + nuisance = observed_internal - cov_target_internal %*% solve(var_target) %*% observed_target + target_linear = internal_transform$linear_term %*% cov_target_internal %*% solve(var_target) + } + target_offset = internal_transform$linear_term %*% nuisance + internal_transform$offset_term + return(list(linear_term=target_linear, + offset_term=target_offset)) +} + +# XXX only for Gaussian so far + +importance_weight = function(noise_scale, + target_sample, + opt_sample, + opt_transform, + target_transform, + observed_raw) { + + use_C_code = TRUE + if (!use_C_code) { + A = (opt_transform$linear_term %*% opt_sample + + target_transform$linear_term %*% target_sample) + A = apply(A, 2, function(x) {return(x + target_transform$offset_term + opt_transform$offset_term)}) + log_num = -apply(A^2, 2, sum) / noise_scale^2 + } else { + log_num = log_density_gaussian_(noise_scale, + target_transform$linear_term, + as.matrix(target_sample), + opt_transform$linear_term, + as.matrix(opt_sample), + target_transform$offset_term + opt_transform$offset_term) + } + + if (!use_C_code) { + A = opt_transform$linear_term %*% opt_sample + A = apply(A, 2, function(x) {return(x + observed_raw + opt_transform$offset_term)}) + log_den = -apply(A^2, 2, sum) / noise_scale^2 + } else { + log_den = log_density_gaussian_conditional_(noise_scale, + opt_transform$linear_term, + as.matrix(opt_sample), + observed_raw+opt_transform$offset_term) + } + W = log_num - log_den + W = W - max(W) + return(exp(W)) +} + +get_mean_cov = function(noise_scale, linear_term, offset_term){ + temp = solve(t(linear_term) %*% linear_term) + cov = noise_scale^2*temp + mean = temp %*% t(linear_term) %*% offset_term + return(list(mean=mean, cov=cov)) +} + + + +conditional_density = function(noise_scale, lasso_soln) { + + active_set = lasso_soln$active_set + observed_raw = lasso_soln$observed_raw + opt_linear = lasso_soln$optimization_transform$linear_term + opt_offset = lasso_soln$optimization_transform$offset_term + observed_opt_state = lasso_soln$observed_opt_state + + nactive = length(active_set) + B = opt_linear[,1:nactive,drop=FALSE] + beta_offset = opt_offset + p = length(observed_opt_state) + + if (nactive < p) { + beta_offset = beta_offset+(opt_linear[,(nactive+1):p] %*% observed_opt_state[(nactive+1):p]) + } + opt_transform = list(linear_term=B, + offset_term = beta_offset) + reduced_B = chol(t(B) %*% B) + beta_offset = beta_offset + observed_raw + reduced_beta_offset = solve(t(reduced_B)) %*% (t(B) %*% beta_offset) + + log_condl_optimization_density = function(opt_state) { + if (sum(opt_state < 0) > 0) { + return(-Inf) + } + + use_C_code = TRUE + if (!use_C_code) { + A = reduced_B %*% as.matrix(opt_state) + reduced_beta_offset + A = apply(A, 2, function(x) {x + reduced_beta_offset}) + log_den = -apply(A^2, 2, sum) / noise_scale^2 + } else { + log_den = log_density_gaussian_conditional_(noise_scale, + reduced_B, + as.matrix(opt_state), + reduced_beta_offset) + } + return(log_den) + } + lasso_soln$log_optimization_density = log_condl_optimization_density + lasso_soln$observed_opt_state = observed_opt_state[1:nactive] + lasso_soln$optimization_transform = opt_transform + reduced_opt_transform =list(linear_term = reduced_B, offset_term = reduced_beta_offset) + return(list(lasso_soln=lasso_soln, + reduced_opt_transform = reduced_opt_transform)) +} + +randomizedLassoInf = function(X, + y, + lam, + family=c("gaussian", "binomial"), + sigma=NULL, + noise_scale=NULL, + ridge_term=NULL, + condition_subgrad=TRUE, + level=0.9, + sampler=c("norejection", "adaptMCMC"), + nsample=10000, + burnin=2000, + max_iter=100, # how many iterations for each optimization problem + kkt_tol=1.e-4, # tolerance for the KKT conditions + parameter_tol=1.e-8, # tolerance for relative convergence of parameter + objective_tol=1.e-8, # tolerance for relative decrease in objective + objective_stop=FALSE, + kkt_stop=TRUE, + parameter_stop=TRUE) + { + + n = nrow(X) + p = ncol(X) + + family = match.arg(family) + + lasso_soln = randomizedLasso(X, + y, + lam, + family=family, + noise_scale=noise_scale, + ridge_term=ridge_term, + max_iter=max_iter, + kkt_tol=kkt_tol, + parameter_tol=parameter_tol, + objective_tol=objective_tol, + objective_stop=objective_stop, + kkt_stop=kkt_stop, + parameter_stop=parameter_stop) + + active_set = lasso_soln$active_set + nactive = length(active_set) + + if (nactive==0){ + return (list(active_set=active_set, pvalues=c(), ci=c())) + } + inactive_set = lasso_soln$inactive_set + + noise_scale = lasso_soln$noise_scale # set to default value in randomizedLasso + + constraints = matrix(0,nactive,2) + constraints[,2] = Inf + if (condition_subgrad==TRUE){ + condl_lasso=conditional_density(noise_scale, lasso_soln) + lasso_soln = condl_lasso$lasso_soln + cur_opt_transform = condl_lasso$reduced_opt_transform + } else{ + if (nactive0 + tau_min = 0 + tau_max = 10 + if (sum(neg_velocity)>0){ + R = (-constraints[neg_velocity,1]+pos[neg_velocity])/(-velocity[neg_velocity]) + tau_max = min(tau_max, min(R)) + L = (-constraints[neg_velocity,2]+pos[neg_velocity])/(-velocity[neg_velocity]) + tau_min = max(tau_min, max(L)) + } + if (sum(pos_velocity)>0){ + R = (constraints[pos_velocity,2]-pos[pos_velocity])/velocity[pos_velocity] + tau_max = min(tau_max, min(R)) + L = (constraints[pos_velocity,1]-pos[pos_velocity])/velocity[pos_velocity] + tau_min = max(tau_min, max(L)) + } + + f=function(t){as.numeric(t(velocity) %*% grad_negative_log_density(pos+velocity*t))} + tau_star = tau_max + if (f(tau_min)*f(tau_max)<0){ + tau_star = uniroot(f, c(tau_min, tau_max))$root + } else{ + if (negative_log_density(pos+velocity*tau_min)= 2p, and using the standard deviation of y when n < 2p. In the latter case, the user -should use \code{\link{estimateSigma}} function for a more accurate estimate +should use \code{\link{estimateSigma}} function for a more accurate estimate. +Not used for family= "binomial", or "cox" } \item{alpha}{ Significance level for confidence intervals (target is miscoverage alpha/2 in each tail) } \item{intercept}{ Was the lasso problem solved (e.g., by glmnet) with an intercept in the model? -Default is TRUE +Default is TRUE. Must be TRUE for "binomial" family. Not used for 'cox" family, where no intercept is assumed. } +\item{add.targets}{Optional vector of predictors to be included as targets of inference, regardless of whether or not they are selected by the lasso. Default is NULL.} +\item{status}{Censoring status for Cox model; 1=failurem 0=censored} \item{type}{Contrast type for p-values and confidence intervals: default is "partial"---meaning that the contrasts tested are the partial population regression coefficients, within the active set of predictors; the alternative is @@ -77,23 +85,40 @@ helpful (though computationally more costly). In particular, extra precision mig if the values in the output columns of \code{tailarea} differ noticeably from alpha/2. } \item{verbose}{ -Print out progress along the way? Default is FALSE} +Print out progress along the way? Default is FALSE +} +\item{linesearch.try}{ +When running type="full" (i.e. debiased LASSO) how many attempts in the line search? +} } \details{ This function computes selective p-values and confidence intervals for the lasso, given a fixed value of the tuning parameter lambda. +Three different response types are supported: gaussian, binomial and Cox. The confidence interval construction involves numerical search and can be fragile: if the observed statistic is too close to either end of the truncation interval (vlo and vup, see references), then one or possibly both endpoints of the interval of desired coverage cannot be computed, and default to +/- Inf. The output \code{tailarea} gives the achieved Gaussian tail areas for the reported intervals---these should be close to alpha/2, and can be used for error-checking purposes. + +Important!: Before running glmnet (or some other lasso-solver) x should be centered, that is x <- scale(X,TRUE,FALSE). +In addition, if standardization of the predictors is desired, x should be scaled as well: x <- scale(x,TRUE,TRUE). +Then when running glmnet, set standardize=F. See example below. + +The penalty.factor facility in glmmet-- allowing different penalties lambda for each predictor, +is not yet implemented in fixedLassoInf. However you can finesse this--- see the example below. One caveat- using this approach, a penalty factor of zero (forcing a predictor in) +is not allowed. + +Note that the coefficients and standard errors reported are unregularized. +Eg for the Gaussian, they are the usual least squares estimates and standard errors +for the model fit to the active set from the lasso. } \value{ \item{type}{Type of coefficients tested (partial or full)} \item{lambda}{Value of tuning parameter lambda used} -\item{pv}{P-values for active variables} +\item{pv}{One-sided P-values for active variables, uses the fact we have conditioned on the sign.} \item{ci}{Confidence intervals} \item{tailarea}{Realized tail areas (lower and upper) for each confidence interval} \item{vlo}{Lower truncation limits for statistics} @@ -108,53 +133,178 @@ to alpha/2, and can be used for error-checking purposes. } \references{ -Jason Lee, Dennis Sun, Yuekai Sun, Jonathan Taylor (2013). +Jason Lee, Dennis Sun, Yuekai Sun, and Jonathan Taylor (2013). Exact post-selection inference, with application to the lasso. arXiv:1311.6238. + + Jonathan Taylor and Robert Tibshirani (2016) Post-selection inference for L1-penalized likelihood models. +arXiv:1602.07358 + } \author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} \examples{ +set.seed(43) +n = 50 +p = 10 +sigma = 1 + +x = matrix(rnorm(n*p),n,p) +x = scale(x,TRUE,TRUE) + +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) -#NOT RUN -#set.seed(43) -#n=50 -#p=10 -#sigma=.7 -#x=matrix(rnorm(n*p),n,p) -#x=scale(x,T,T)/sqrt(n-1) -#beta=c(3,2,0,0,rep(0,p-4)) -#y=x%*%beta+sigma*rnorm(n) -#y=y-mean(y) -# first run glmnet -#gfit=glmnet(x,y,standardize=F) -#lambda = .1 -#extract coef for a given lambda; Note the 1/n factor! -#beta = coef(gfit, s=lambda/n, exact=TRUE)[-1] +# first run glmnet +gfit = glmnet(x,y,standardize=FALSE) + +# extract coef for a given lambda; note the 1/n factor! +# (and we don't save the intercept term) +lambda = .8 +beta = coef(gfit, x=x, y=y, s=lambda/n, exact=TRUE)[-1] + +# compute fixed lambda p-values and selection intervals +out = fixedLassoInf(x,y,beta,lambda,sigma=sigma) +out + + +## as above, but use lar function instead to get initial +## lasso fit (should get same results) +lfit = lar(x,y,normalize=FALSE) +beta = coef(lfit, s=lambda, mode="lambda") +out2 = fixedLassoInf(x, y, beta, lambda, sigma=sigma) +out2 + +## mimic different penalty factors by first scaling x + set.seed(43) +n = 50 +p = 10 +sigma = 1 + +x = matrix(rnorm(n*p),n,p) +x=scale(x,TRUE,TRUE) + +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) +pf=c(rep(1,7),rep(.1,3)) #define penalty factors +pf=p*pf/sum(pf) # penalty factors should be rescaled so they sum to p +xs=scale(x,FALSE,pf) #scale cols of x by penalty factors +# first run glmnet +gfit = glmnet(xs, y, standardize=FALSE) + +# extract coef for a given lambda; note the 1/n factor! +# (and we don't save the intercept term) +lambda = .8 +beta_hat = coef(gfit, x=xs, y=y, s=lambda/n, exact=TRUE)[-1] # compute fixed lambda p-values and selection intervals -#aa=fixedLassoInf(x,y,beta,lambda,sigma=sigma) -# -# as above, but use lar function instead to get initial lasso fit (should get same result) -# fit=lar(x,y,normalize=F) -# beta=coef(fit,s=lambda,mode="lambda") -# fixedLassoInf(x,y,beta,lambda,sigma=sigma) - - -## -# states data example: predict life expectancy -#x=state.x77[,-4] -#y=state.x77[,4] -#x=scale(x,T,T) -#n=nrow(x) -# estimate sigma from cross-validated lasso fit -# cvf=cv.glmnet(x,y) -# sigmahat=estimateSigma(x,y,stand=F)$sigmahat -# -# fit lasso -# lambda=n*cvf$lambda.min -# gfit=glmnet(x,y,standardize=F) -# bhat=coef(gfit, s=lambda/n, exact=TRUE)[-1] -# -# compute p-values and confidence intervals -# fixedLassoInf(x,y,bhat,lambda,sigma=sigmahat) +out = fixedLassoInf(xs,y,beta_hat,lambda,sigma=sigma) + +#rescale conf points to undo the penalty factor +out$ci=t(scale(t(out$ci),FALSE,pf[out$vars])) +out + +#logistic model +set.seed(43) + n = 50 + p = 10 + sigma = 1 + + x = matrix(rnorm(n*p),n,p) + x=scale(x,TRUE,TRUE) + + beta = c(3,2,rep(0,p-2)) + y = x\%*\%beta + sigma*rnorm(n) + y=1*(y>mean(y)) + # first run glmnet + gfit = glmnet(x,y,standardize=FALSE,family="binomial") + + # extract coef for a given lambda; note the 1/n factor! + # (and here we DO include the intercept term) + lambda = .8 + beta_hat = coef(gfit, x=x, y=y, s=lambda/n, exact=TRUE) + + # compute fixed lambda p-values and selection intervals + out = fixedLassoInf(x,y,beta_hat,lambda,family="binomial") + out + + + # Cox model + + set.seed(43) + n = 50 + p = 10 + sigma = 1 + + x = matrix(rnorm(n*p), n, p) + x=scale(x, TRUE, TRUE) + + beta = c(3,2,rep(0,p-2)) + tim = as.vector(x\%*\%beta + sigma*rnorm(n)) + tim= tim-min(tim)+1 + status=sample(c(0,1),size=n,replace=TRUE) + # first run glmnet + + + y = Surv(tim,status) + gfit = glmnet(x, y, standardize=FALSE, family="cox") + + # extract coef for a given lambda; note the 1/n factor! + + lambda = 1.5 + beta_hat = as.numeric(coef(gfit, x=x, y=y, s=lambda/n, exact=TRUE)) + + # compute fixed lambda p-values and selection intervals + out = fixedLassoInf(x, tim, beta_hat, lambda, status=status, family="cox") + out + + # Debiased lasso or "full" + + n = 50 + p = 100 + sigma = 1 + + x = matrix(rnorm(n*p),n,p) + x = scale(x,TRUE,TRUE) + + beta = c(3,2,rep(0,p-2)) + y = x\%*\%beta + sigma*rnorm(n) + + # first run glmnet + gfit = glmnet(x, y, standardize=FALSE, intercept=FALSE) + + # extract coef for a given lambda; note the 1/n factor! + # (and we don't save the intercept term) + lambda = 2.8 + beta = coef(gfit, x=x, y=y, s=lambda/n, exact=TRUE)[-1] + + # compute fixed lambda p-values and selection intervals + out = fixedLassoInf(x, y, beta, lambda, sigma=sigma, type='full', intercept=FALSE) + out + + # When n > p and "full" we use the full inverse + # instead of Javanmard and Montanari's approximate inverse + + n = 200 + p = 50 + sigma = 1 + + x = matrix(rnorm(n*p),n,p) + x = scale(x,TRUE,TRUE) + + beta = c(3,2,rep(0,p-2)) + y = x\%*\%beta + sigma*rnorm(n) + + # first run glmnet + gfit = glmnet(x, y, standardize=FALSE, intercept=FALSE) + + # extract coef for a given lambda; note the 1/n factor! + # (and we don't save the intercept term) + lambda = 2.8 + beta = coef(gfit, x=x, y=y, s=lambda/n, exact=TRUE)[-1] + + # compute fixed lambda p-values and selection intervals + out = fixedLassoInf(x, y, beta, lambda, sigma=sigma, type='full', intercept=FALSE) + out + } + \ No newline at end of file diff --git a/selectiveInference/man/forwardStop.Rd b/selectiveInference/man/forwardStop.Rd index 5ca36df..87eb7ab 100644 --- a/selectiveInference/man/forwardStop.Rd +++ b/selectiveInference/man/forwardStop.Rd @@ -27,27 +27,29 @@ Guarantees FDR control at the level alpha, for independent p-values. Step number for sequential stop. } \references{ -Max Grazier G'Sell, Stefan Wager, Alexandra Chouldechova, Rob Tibshirani (2014). +Max Grazier G'Sell, Stefan Wager, Alexandra Chouldechova, and Rob Tibshirani (2014). Sequential selection procedures and Fflse Discovery Rate Control. arXiv:1309.5352. To appear in Journal of the Royal Statistical Society: Series B. } \author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} \examples{ -#NOT RUN -#set.seed(433) -#n=50 -#p=10 -#sigma=.7 -#x=matrix(rnorm(n*p),n,p) +set.seed(33) +n = 50 +p = 10 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) -#x=scale(x,T,T)/sqrt(n-1) -#beta=c(4,2,0,0,rep(0,p-4)) -#y=x%*%beta+sigma*rnorm(n) -#y=y-mean(y) -#first run forward stepwise -# fsfit=fs(x,y) -# -# aa=fsInf(fsfit) -# forwardStop(aa, alpha=.10) +# run forward stepwise +fsfit = fs(x,y) + +# compute sequential p-values and confidence intervals +# (sigma estimated from full model) +out = fsInf(fsfit) +out + +# estimate optimal stopping point +forwardStop(out$pv, alpha=.10) } diff --git a/selectiveInference/man/fs.Rd b/selectiveInference/man/fs.Rd index 434edc4..2a61c83 100644 --- a/selectiveInference/man/fs.Rd +++ b/selectiveInference/man/fs.Rd @@ -32,12 +32,14 @@ the interface for the \code{lar} function) \details{ This function implements forward stepwise regression, adding the predictor at each -step that maximizes the absolute correlation between the predictors-- orthogonalized wrt the current model-- and the residual. This entry +step that maximizes the absolute correlation between the predictors---once +orthogonalized with respect to the current model---and the residual. This entry criterion is standard, and is equivalent to choosing the variable that achieves the biggest drop in RSS at each step; it is used, e.g., by the \code{step} function -in R. Note that, for example, the \code{lars} package implements a stepwise option (with type="step"), but -uses a (mildly) different entry criterion, based on maximal absolute correlation between the original (non-orthogonalized) -predictors and the residual. +in R. Note that, for example, the \code{lars} package implements a stepwise option +(with type="step"), but uses a (mildly) different entry criterion, based on maximal +absolute correlation between the original (non-orthogonalized) predictors and the +residual. } \value{ \item{action}{Vector of predictors in order of entry} @@ -68,36 +70,22 @@ to enter along the path} } \examples{ -#NOT RUN -#set.seed(43) -#n=50 -#p=10 -#sigma=.7 -#x=matrix(rnorm(n*p),n,p) -#x=scale(x,T,F) -#beta=c(3,2,0,0,rep(0,p-4)) -#y=x%*%beta+sigma*rnorm(n) -#y=y-mean(y) +set.seed(33) +n = 50 +p = 10 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) -#fsfit=fs(x,y) -# -#out=fsInf(x,y) -# -# states data example: predict life expectancy -#x=state.x77[,-4] -#y=state.x77[,4] -#x=scale(x,T,T) -#n=nrow(x) -# -# estimate sigma from cross-validated lasso fit -#cvf=cv.glmnet(x,y) -#sigmahat=estimateSigma(x,y,stand=F)$sigmahat -# -# fit forwardStepwise -# fsfit=fs(x,y,normalize=F) -# -# compute p-values and confidence intervals -# fsInf(fsfit,sigma=sigmahat) +# run forward stepwise, plot results +fsfit = fs(x,y) +plot(fsfit) + +# compute sequential p-values and confidence intervals +# (sigma estimated from full model) +out = fsInf(fsfit) +out } diff --git a/selectiveInference/man/fsInf.Rd b/selectiveInference/man/fsInf.Rd index f8afc93..613bc5a 100644 --- a/selectiveInference/man/fsInf.Rd +++ b/selectiveInference/man/fsInf.Rd @@ -82,7 +82,7 @@ to alpha/2, and can be used for error-checking purposes. \item{khat}{When type is "active", this is an estimated stopping point declared by \code{\link{forwardStop}}; when type is "aic", this is the value chosen by the modified AIC scheme} -\item{pv}{P-values for active variables} +\item{pv}{One sided P-values for active variables, uses the sign that a variable entered the model with.} \item{ci}{Confidence intervals} \item{tailarea}{Realized tail areas (lower and upper) for each confidence interval} \item{vlo}{Lower truncation limits for statistics} @@ -97,7 +97,7 @@ value chosen by the modified AIC scheme} } \references{ -Ryan Tibshirani, Jonathan Taylor, Richard Lockhart, Rob Tibshirani (2014). +Ryan Tibshirani, Jonathan Taylor, Richard Lockhart, and Rob Tibshirani (2014). Exact post-selection inference for sequential regression procedures. arXiv:1401.3889. Joshua Loftus and Jonathan Taylor (2014). A significance test for forward stepwise @@ -109,47 +109,27 @@ model selection. arXiv:1405.3920. \seealso{\code{\link{fs}}} \examples{ -#NOT RUN -#set.seed(433) +set.seed(33) +n = 50 +p = 10 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) -#n=50 -#p=10 -#sigma=.7 -#x=matrix(rnorm(n*p),n,p) +# run forward stepwise +fsfit = fs(x,y) -#x=scale(x,T,T)/sqrt(n-1) -#beta=c(4,2,0,0,rep(0,p-4)) -#y=x%*%beta+sigma*rnorm(n) -#y=y-mean(y) -#first run forward stepwise -# fsfit=fs(x,y) -# -# forward stepwise inference for each successive entry of a predictor; -# -# sigma estimated from mean squared residual -# -# aa=fsInf(fsfit) +# compute sequential p-values and confidence intervals +# (sigma estimated from full model) +out.seq = fsInf(fsfit) +out.seq -# forward stepwise inference for fixed model of size 4, testing deletion of each predictor; -# known value of sigma used -# aa2=fsInf(fsfit,sigma=sigma,type="all",k=4) +# compute p-values and confidence intervals after AIC stopping +out.aic = fsInf(fsfit,type="aic") +out.aic -# forward stepwise inference for model chosen by AIC, testing deletion of each predictor; -# known value of sigma used -# aa3=fsInf(fsfit,sigma=sigma,type="aic") -# -# states data example: predict life expectancy -#x=state.x77[,-4] -#y=state.x77[,4] -#x=scale(x,T,T) -#n=nrow(x) -# estimate sigma from cross-validated lasso fit -#cvf=cv.glmnet(x,y) -#sigmahat=estimateSigma(x,y,stand=F)$sigmahat -# -# run forward stepwise -#fsfit=fs(x,y,normalize=F) -# -# compute p-values and confidence intervals -# fsInf(fsfit,sigma=sigmahat) +# compute p-values and confidence intervals after 5 fixed steps +out.fix = fsInf(fsfit,type="all",k=5) +out.fix } diff --git a/selectiveInference/man/groupfs.Rd b/selectiveInference/man/groupfs.Rd index 4567c6f..5c2171a 100644 --- a/selectiveInference/man/groupfs.Rd +++ b/selectiveInference/man/groupfs.Rd @@ -1,45 +1,50 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/funs.groupfs.R \name{groupfs} \alias{groupfs} \title{Select a model with forward stepwise.} \usage{ groupfs(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE, - normalize = TRUE, verbose = FALSE) + center = TRUE, normalize = TRUE, aicstop = 0, verbose = FALSE) } \arguments{ \item{x}{Matrix of predictors (n by p).} \item{y}{Vector of outcomes (length n).} -\item{index}{Group membership indicator of length p.} +\item{index}{Group membership indicator of length p. Check that \code{sort(unique(index)) = 1:G} where \code{G} is the number of distinct groups.} \item{maxsteps}{Maximum number of steps for forward stepwise.} -\item{sigma}{Estimate of error standard deviation for use in AIC criterion. This determines the relative scale between RSS and the degrees of freedom penalty. Default is NULL corresponding to unknown sigma. See \code{\link{extractAIC}} for details.} +\item{sigma}{Estimate of error standard deviation for use in AIC criterion. This determines the relative scale between RSS and the degrees of freedom penalty. Default is NULL corresponding to unknown sigma. When NULL, \code{link{groupfsInf}} performs truncated F inference instead of truncated \eqn{\chi}. See \code{\link[stats]{extractAIC}} for details on the AIC criterion.} -\item{k}{Multiplier of model size penalty, the default is \code{k = 2} for AIC. Use \code{k = log(n)} for BIC, or \code{k = log(p)} for RIC.} +\item{k}{Multiplier of model size penalty, the default is \code{k = 2} for AIC. Use \code{k = log(n)} for BIC, or \code{k = 2log(p)} for RIC (best for high dimensions, when \eqn{p > n}). If \eqn{G < p} then RIC may be too restrictive and it would be better to use \code{log(G) < k < 2log(p)}.} -\item{intercept}{Should an intercept be included in the model? Default is TRUE.} +\item{intercept}{Should an intercept be included in the model? Default is TRUE. Does not count as a step.} + +\item{center}{Should the columns of the design matrix be centered? Default is TRUE.} \item{normalize}{Should the design matrix be normalized? Default is TRUE.} +\item{aicstop}{Early stopping if AIC increases. Default is 0 corresponding to no early stopping. Positive integer values specify the number of times the AIC is allowed to increase in a row, e.g. with \code{aicstop = 2} the algorithm will stop if the AIC criterion increases for 2 steps in a row. The default of \code{\link[stats]{step}} corresponds to \code{aicstop = 1}.} + \item{verbose}{Print out progress along the way? Default is FALSE.} } \value{ An object of class "groupfs" containing information about the sequence of models in the forward stepwise algorithm. Call the function \code{\link{groupfsInf}} on this object to compute selective p-values. } \description{ -This function implements forward selection of linear models almost identically to \code{\link{stepAIC}} with \code{direction = "forward"}. The reason this is a separate function from \code{\link{fs}} is that groups of variables (e.g. dummies encoding levels of a categorical variable) must be handled differently in the selective inference framework. +This function implements forward selection of linear models almost identically to \code{\link[stats]{step}} with \code{direction = "forward"}. The reason this is a separate function from \code{\link{fs}} is that groups of variables (e.g. dummies encoding levels of a categorical variable) must be handled differently in the selective inference framework. } \examples{ x = matrix(rnorm(20*40), nrow=20) index = sort(rep(1:20, 2)) -y = rnorm(20) + 2 * (x[,1] - x[,2]) - (x[,3] - x[,4]) +y = rnorm(20) + 2 * x[,1] - x[,4] fit = groupfs(x, y, index, maxsteps = 5) -pvals = groupfsInf(fit) +out = groupfsInf(fit) +out } \seealso{ -\code{\link{groupfsInf}} +\code{\link{groupfsInf}}, \code{\link{factorDesign}}. } diff --git a/selectiveInference/man/groupfsInf.Rd b/selectiveInference/man/groupfsInf.Rd index 43cf25b..16efcab 100644 --- a/selectiveInference/man/groupfsInf.Rd +++ b/selectiveInference/man/groupfsInf.Rd @@ -1,51 +1,31 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/funs.groupfs.R \name{groupfsInf} \alias{groupfsInf} \title{Compute selective p-values for a model fitted by \code{groupfs}.} \usage{ -groupfsInf(obj, sigma = NULL, projs = NULL, verbose = FALSE) +groupfsInf(obj, sigma = NULL, verbose = TRUE) } \arguments{ \item{obj}{Object returned by \code{\link{groupfs}} function} -\item{sigma}{Estimate of error standard deviation. If NULL (default), this is estimated using the mean squared residual of the full least squares fit when n >= 2p, and the mean squared residual of the selected model when n < 2p. In the latter case, the user should use \code{\link{estimateSigma}} function for a more accurate estimate.} +\item{sigma}{Estimate of error standard deviation. Default is NULL and in this case groupfsInf uses the value of sigma specified to \code{\link{groupfs}}.} -\item{projs}{Additional projections to define model selection event. For use with cross-validation. Default is NULL and it is not recommended to change this.} - -\item{verbose}{Print out progress along the way? Default is FALSE.} +\item{verbose}{Print out progress along the way? Default is TRUE.} } \value{ -An object of class "groupfsInf" containing selective p-values for the fitted model \code{obj}. The default printing behavior should supply adequate information. +An object of class "groupfsInf" containing selective p-values for the fitted model \code{obj}. For comparison with \code{\link{fsInf}}, note that the option \code{type = "active"} is not available. \describe{ \item{vars}{Labels of the active groups in the order they were included.} \item{pv}{Selective p-values computed from appropriate truncated distributions.} \item{sigma}{Estimate of error variance used in computing p-values.} - \item{TC}{Observed value of truncated chi.} + \item{TC or TF}{Observed value of truncated \eqn{\chi} or \eqn{F}.} \item{df}{Rank of group of variables when it was added to the model.} - \item{support}{List of intervals defining the truncation region of the truncated chi.} + \item{support}{List of intervals defining the truncation region of the corresponding statistic.} } } \description{ -Computes p-values for each group of variables in a model fitted by \code{\link{groupfs}}. These p-values adjust for selection by truncating the usual \code{chi^2} statistics to the regions implied by the model selection event. Details are provided in a forthcoming work. -} -\examples{ -#NOT RUN -#set.seed(1) -#n <- 40 -#p <- 20 -#index <- sort(rep(1:(p/2), 2)) -#steps <- 10 -#sparsity <- 5 -#snr <- 3 -# x <- matrix(rnorm(n*p), nrow=n) -# beta <- rep(0, p) -# beta[which(index %in% 1:sparsity)] <- snr -# y <- x %*% beta+rnorm(n) - -#fit <- groupfs(x, y, index=1:p, maxsteps = steps) - -#out<- groupfsInf(fit) +Computes p-values for each group of variables in a model fitted by \code{\link{groupfs}}. These p-values adjust for selection by truncating the usual \eqn{\chi^2} statistics to the regions implied by the model selection event. If the \code{sigma} to \code{\link{groupfs}} was NULL then groupfsInf uses truncated \eqn{F} statistics instead of truncated \eqn{\chi}. The \code{sigma} argument to groupfsInf allows users to override and use \eqn{\chi}, but this is not recommended unless \eqn{\sigma} can be estimated well (i.e. \eqn{n > p}). } diff --git a/selectiveInference/man/lar.Rd b/selectiveInference/man/lar.Rd index 5bafbf9..590a663 100644 --- a/selectiveInference/man/lar.Rd +++ b/selectiveInference/man/lar.Rd @@ -62,10 +62,10 @@ to enter along the path} } \references{ -Brad Efron, Trevor Hastie, Iain Johnstone and Rob Tibshirani (2002). +Brad Efron, Trevor Hastie, Iain Johnstone, and Rob Tibshirani (2002). Least angle regression. Annals of Statistics (with discussion). -See also the descriptions in Trevor Hastie, Rob Tibshirani and +See also the descriptions in Trevor Hastie, Rob Tibshirani, and Jerome Friedman (2002, 2009). Elements of Statistical Learning. } @@ -76,17 +76,20 @@ Jerome Friedman (2002, 2009). Elements of Statistical Learning. } \examples{ -#NOT RUN -#set.seed(33) -#n=20 -#p=10 -#sigma=1 -#x=matrix(rnorm(n*p),n,p) -#generate data -#beta=c(3,3,rep(0,p-2)) -#y=x%*%beta+sigma*rnorm(n) -#y=y-mean(y) -#run lar -#larfit=lar(x,y,verbose=TRUE) -#plot(larfit) +set.seed(43) +n = 50 +p = 10 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) + +# run LAR, plot results +larfit = lar(x,y) +plot(larfit) + +# compute sequential p-values and confidence intervals +# (sigma estimated from full model) +out = larInf(larfit) +out } diff --git a/selectiveInference/man/larInf.Rd b/selectiveInference/man/larInf.Rd index 86de5d2..8e3b2d0 100644 --- a/selectiveInference/man/larInf.Rd +++ b/selectiveInference/man/larInf.Rd @@ -102,7 +102,7 @@ value chosen by the modified AIC scheme} } \references{ -Ryan Tibshirani, Jonathan Taylor, Richard Lockhart, Rob Tibshirani (2014). +Ryan Tibshirani, Jonathan Taylor, Richard Lockhart, and Rob Tibshirani (2014). Exact post-selection inference for sequential regression procedures. arXiv:1401.3889. } @@ -113,37 +113,28 @@ Exact post-selection inference for sequential regression procedures. arXiv:1401. } \examples{ +set.seed(43) +n = 50 +p = 10 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) -#NOT RUN -#set.seed(43) -#n=50 -#p=10 -#sigma=.7 -#x=matrix(rnorm(n*p),n,p) -#x=scale(x,T,T)/sqrt(n-1) -#beta=c(3,2,0,0,rep(0,p-4)) -#y=x%*%beta+sigma*rnorm(n) -#y=y-mean(y) -#first run lar -# larfit=lar(x,y) -# -#lar inference for each successive entry of a predictor; sigma estimated -# from mean squared residual from least squares fit -# aa=larInf(larfit) -# -# states data example: predict life expectancy -#x=state.x77[,-4] -#y=state.x77[,4] -#x=scale(x,T,T) -#n=nrow(x) -# estimate sigma from cross-validated lasso fit -#cvf=cv.glmnet(x,y) -#sigmahat=estimateSigma(x,y,stand=F)$sigmahat -# # run LAR -#larfit=lar(x,y,normalize=F) -# -# compute p-values and confidence intervals -# larInf(larfit,sigma=sigmahat) +larfit = lar(x,y) + +# compute sequential p-values and confidence intervals +# (sigma estimated from full model) +out.seq = larInf(larfit) +out.seq + +# compute p-values and confidence intervals after AIC stopping +out.aic = larInf(larfit,type="aic") +out.aic + +# compute p-values and confidence intervals after 5 fixed steps +out.fix = larInf(larfit,type="all",k=5) +out.fix } diff --git a/selectiveInference/man/manyMeans.Rd b/selectiveInference/man/manyMeans.Rd index db2d992..57fc429 100644 --- a/selectiveInference/man/manyMeans.Rd +++ b/selectiveInference/man/manyMeans.Rd @@ -43,7 +43,7 @@ were selected by the procedure (either BH(q) or top-K). Labelled "Selind" in out } \references{ -Stephen Reid, Jonathan Taylor, Rob Tibshirani (2014). +Stephen Reid, Jonathan Taylor, and Rob Tibshirani (2014). Post-selection point and interval estimation of signal sizes in Gaussian samples. arXiv:1405.3340. } @@ -51,12 +51,10 @@ arXiv:1405.3340. \author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} \examples{ - -#NOT RUN -#set.seed(12345) -#n = 100 # sample size -#signal = 3 # signal size -#mu = c(rep(signal, floor (n/5)), rep(0, n-floor(n/5))) # 20% of elements get the signal; rest 0 -#y = mu + rnorm (n, 0, 1) -#mmObj = manyMeans(y, bh.q=0.1) +set.seed(12345) +n = 100 +mu = c(rep(3,floor(n/5)), rep(0,n-floor(n/5))) +y = mu + rnorm(n) +out = manyMeans(y, bh.q=0.1) +out } diff --git a/selectiveInference/man/plot.fs.Rd b/selectiveInference/man/plot.fs.Rd index 3c27ad1..4f77013 100644 --- a/selectiveInference/man/plot.fs.Rd +++ b/selectiveInference/man/plot.fs.Rd @@ -28,18 +28,15 @@ Default is TRUE} \author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} \examples{ -#NOT RUN -#set.seed(33) -#n=200 -#p=20 -#sigma=1 -#x=matrix(rnorm(n*p),n,p) -#x=scale(x,T,T)/sqrt(n-1) -#generate y -#beta=c(3,-2,rep(0,p-2)) -#beta=c(rep(3,10),rep(0,p-10)) -#y=x%*%beta+sigma*rnorm(n) -#y=y-mean(y) -#fsfit=fs(x,y) -#plot(fsfit) +set.seed(33) +n = 50 +p = 10 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) + +# run forward stepwise, plot results +fsfit = fs(x,y) +plot(fsfit) } diff --git a/selectiveInference/man/plot.lar.Rd b/selectiveInference/man/plot.lar.Rd index ebf6c0b..baa1195 100644 --- a/selectiveInference/man/plot.lar.Rd +++ b/selectiveInference/man/plot.lar.Rd @@ -32,18 +32,15 @@ Default is TRUE} \author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} \examples{ -#NOT RUN -#set.seed(33) -#n=200 -#p=20 -#sigma=1 -#x=matrix(rnorm(n*p),n,p) -#x=scale(x,T,T)/sqrt(n-1) -#generate y -#beta=c(3,-2,rep(0,p-2)) -#beta=c(rep(3,10),rep(0,p-10)) -#y=x%*%beta+sigma*rnorm(n) -#y=y-mean(y) -#larfit=lar(x,y) -#plot(larfit) +set.seed(43) +n = 50 +p = 10 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) + +# run LAR, plot results +larfit = lar(x,y) +plot(larfit) } diff --git a/selectiveInference/man/predict.fs.Rd b/selectiveInference/man/predict.fs.Rd index 32f1a44..5e50482 100644 --- a/selectiveInference/man/predict.fs.Rd +++ b/selectiveInference/man/predict.fs.Rd @@ -35,18 +35,15 @@ Either a vector/matrix of predictions, or a vector/matrix of coefficients. \author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} \examples{ -#NOT RUN -#set.seed(33) -#n=200 -#p=20 -#sigma=1 -#x=matrix(rnorm(n*p),n,p) -#x=scale(x,T,T)/sqrt(n-1) -#generate y -#beta=c(3,-2,rep(0,p-2)) -#beta=c(rep(3,10),rep(0,p-10)) -#y=x%*%beta+sigma*rnorm(n) -#y=y-mean(y) -#obj=fs(x,y) -#fit=predict.fs(obj,x) +set.seed(33) +n = 200 +p = 20 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(rep(3,10),rep(0,p-10)) +y = x\%*\%beta + sigma*rnorm(n) + +# run forward stepwise and predict functions +obj = fs(x,y) +fit = predict(obj,x,s=3) } diff --git a/selectiveInference/man/predict.groupfs.Rd b/selectiveInference/man/predict.groupfs.Rd new file mode 100644 index 0000000..492b74f --- /dev/null +++ b/selectiveInference/man/predict.groupfs.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/funs.groupfs.R +\name{predict.groupfs} +\alias{predict.groupfs} +\title{Prediction and coefficient functions for \code{\link{groupfs}}.} +\description{ +Make predictions or extract coefficients from a groupfs forward stepwise object.} +\usage{ +\method{predict}{groupfs}(object, newx) +} +\arguments{ +\item{object}{Object returned by a call to \code{\link{groupfs}}.} + +\item{newx}{Matrix of x values at which the predictions are desired. If NULL, the x values from groupfs fitting are used.} +} +\value{ +A vector of predictions or a vector of coefficients. +} + diff --git a/selectiveInference/man/predict.lar.Rd b/selectiveInference/man/predict.lar.Rd index 340cd52..c91bed3 100644 --- a/selectiveInference/man/predict.lar.Rd +++ b/selectiveInference/man/predict.lar.Rd @@ -38,18 +38,15 @@ Either a vector/matrix of predictions, or a vector/matrix of coefficients. \author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} \examples{ -#NOT RUN -#set.seed(33) -#n=200 -#p=20 -#sigma=1 -#x=matrix(rnorm(n*p),n,p) -#x=scale(x,T,T)/sqrt(n-1) -#generate y -#beta=c(3,-2,rep(0,p-2)) -#beta=c(rep(3,10),rep(0,p-10)) -#y=x%*%beta+sigma*rnorm(n) -#y=y-mean(y) -#larfit=lar(x,y) -#fit=predict.lar(larfit,x,type="fit") +set.seed(33) +n = 200 +p = 20 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(rep(3,10),rep(0,p-10)) +y = x\%*\%beta + sigma*rnorm(n) + +# run lar and predict functions +obj = lar(x,y) +fit = predict(obj,x,s=3) } diff --git a/selectiveInference/man/randomizedLassoInf.Rd b/selectiveInference/man/randomizedLassoInf.Rd new file mode 100644 index 0000000..351596e --- /dev/null +++ b/selectiveInference/man/randomizedLassoInf.Rd @@ -0,0 +1,165 @@ +\name{randomizedLassoInf} +\alias{randomizedLassoInf} + +\title{ +Inference for the randomized lasso, with a fixed lambda +} +\description{ +Compute p-values and confidence intervals based on selecting +an active set with the randomized lasso, at a +fixed value of the tuning parameter lambda and using Gaussian +randomization. +} +\usage{ +randomizedLassoInf(X, + y, + lam, + family=c("gaussian", "binomial"), + sigma=NULL, + noise_scale=NULL, + ridge_term=NULL, + condition_subgrad=TRUE, + level=0.9, + sampler=c("norejection", "adaptMCMC"), + nsample=10000, + burnin=2000, + max_iter=100, + kkt_tol=1.e-4, + parameter_tol=1.e-8, + objective_tol=1.e-8, + objective_stop=FALSE, + kkt_stop=TRUE, + parameter_stop=TRUE) +} +\arguments{ + \item{X}{ +Matrix of predictors (n by p); +} + \item{y}{ +Vector of outcomes (length n) +} + \item{lam}{ +Value of lambda used to compute beta. See the above warning + Be careful! This function uses the "standard" lasso objective + \deqn{ + 1/2 \|y - x \beta\|_2^2 + \lambda \|\beta\|_1. + } + In contrast, glmnet multiplies the first term by a factor of 1/n. + So after running glmnet, to extract the beta corresponding to a value lambda, + you need to use \code{beta = coef(obj, s=lambda/n)[-1]}, + where obj is the object returned by glmnet (and [-1] removes the intercept, + which glmnet always puts in the first component) +} +\item{family}{ +Response type: "gaussian" (default), "binomial". +} +\item{sigma}{ +Estimate of error standard deviation. If NULL (default), this is estimated +using the mean squared residual of the full least squares based on +selected active set. +} +\item{noise_scale}{ +Scale of Gaussian noise added to objective. Default is +0.5 * sd(y) times the sqrt of the mean of the trace of X^TX. +} +\item{ridge_term}{ +A small "elastic net" or ridge penalty is added to ensure +the randomized problem has a solution. +0.5 * sd(y) times the sqrt of the mean of the trace of X^TX divided by +sqrt(n). +} +\item{condition_subgrad}{ +In forming selective confidence intervals and p-values should we condition +on the inactive coordinates of the subgradient as well? +Default is TRUE. +} +\item{level}{ +Level for confidence intervals. +} +\item{sampler}{ +Which sampler to use -- default is a no-rejection sampler. Otherwise +use MCMC from the adaptMCMC package. +} +\item{nsample}{ +Number of samples of optimization variables to sample. +} +\item{burnin}{ +How many samples of optimization variable to discard (should be less than nsample). +} +\item{max_iter}{ +How many rounds of updates used of coordinate descent in solving randomized +LASSO. +} +\item{kkt_tol}{ +Tolerance for checking convergence based on KKT conditions. +} +\item{parameter_tol}{ +Tolerance for checking convergence based on convergence +of parameters. +} +\item{objective_tol}{ +Tolerance for checking convergence based on convergence +of objective value. +} +\item{kkt_stop}{ +Should we use KKT check to determine when to stop? +} +\item{parameter_stop}{ +Should we use convergence of parameters to determine when to stop? +} +\item{objective_stop}{ +Should we use convergence of objective value to determine when to stop? +} +} + +\details{ +This function computes selective p-values and confidence intervals for a +randomized version of the lasso, +given a fixed value of the tuning parameter lambda. + +} +\value{ +\item{type}{Type of coefficients tested (partial or full)} +\item{lambda}{Value of tuning parameter lambda used} +\item{pv}{One-sided P-values for active variables, uses the fact we have conditioned on the sign.} +\item{ci}{Confidence intervals} +\item{tailarea}{Realized tail areas (lower and upper) for each confidence interval} +\item{vlo}{Lower truncation limits for statistics} +\item{vup}{Upper truncation limits for statistics} +\item{vmat}{Linear contrasts that define the observed statistics} +\item{y}{Vector of outcomes} +\item{vars}{Variables in active set} +\item{sign}{Signs of active coefficients} +\item{alpha}{Desired coverage (alpha/2 in each tail)} +\item{sigma}{Value of error standard deviation (sigma) used} +\item{call}{The call to lassoInf} +} + +\references{ +Xiaoying Tian, and Jonathan Taylor (2015). +Selective inference with a randomized response. arxiv.org:1507.06739 + +Xiaoying Tian, Snigdha Panigrahi, Jelena Markovic, Nan Bi and Jonathan Taylor (2016). +Selective inference after solving a convex problem. +arxiv:1609.05609 + +} +\author{Jelena Markovic, Jonathan Taylor} + +\examples{ +set.seed(43) +n = 50 +p = 10 +sigma = 0.2 +lam = 0.5 + +X = matrix(rnorm(n*p), n, p) +X = scale(X, TRUE, TRUE) / sqrt(n-1) + +beta = c(3,2,rep(0,p-2)) +y = X\%*\%beta + sigma*rnorm(n) + +result = randomizedLassoInf(X, y, lam) + +} + diff --git a/selectiveInference/man/scaleGroups.Rd b/selectiveInference/man/scaleGroups.Rd index 6e41ef6..4607890 100644 --- a/selectiveInference/man/scaleGroups.Rd +++ b/selectiveInference/man/scaleGroups.Rd @@ -1,10 +1,10 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/funs.groupfs.R \name{scaleGroups} \alias{scaleGroups} \title{Center and scale design matrix by groups} \usage{ -scaleGroups(x, index, center = TRUE, scale = TRUE) +scaleGroups(x, index, center = TRUE, normalize = TRUE) } \arguments{ \item{x}{Design matrix.} @@ -13,12 +13,16 @@ scaleGroups(x, index, center = TRUE, scale = TRUE) \item{center}{Center groups, default is TRUE.} -\item{scale}{Scale groups by Frobenius norm, default is TRUE.} +\item{normalize}{Scale groups by Frobenius norm, default is TRUE.} } \value{ -Scaled design matrix +\describe{ + \item{x}{Optionally centered/scaled design matrix.} + \item{xm}{Means of groups in original design matrix.} + \item{xs}{Frobenius norms of groups in original design matrix.} +} } \description{ -Center and scale design matrix by groups +For internal use by \code{\link{groupfs}}. } diff --git a/selectiveInference/man/selectiveInference.Rd b/selectiveInference/man/selectiveInference.Rd index e08f69a..6e038d4 100644 --- a/selectiveInference/man/selectiveInference.Rd +++ b/selectiveInference/man/selectiveInference.Rd @@ -7,7 +7,7 @@ Tools for selective inference \description{ Functions to perform post-selection inference for forward stepwise regression, least angle regression, the lasso and the -many normal means problem +many normal means problem. The lasso function also supports logistic regression and the Cox model. } \details{ \tabular{ll}{ @@ -20,7 +20,8 @@ This package provides tools for inference after selection, in forward stepwise regression, least angle regression, the lasso, and the many normal means problem. The functions compute p-values and selection intervals that properly account for the inherent selection carried out by the procedure. These have exact finite sample -type I error and coverage under Gaussian errors. +type I error and coverage under Gaussian errors. For the logistic and Cox familes (fixedLassoInf), + the coverage is asymptotically valid This R package was developed as part of the selective inference software project in Python and R: @@ -58,121 +59,146 @@ Maintainer: Rob Tibshirani } \references{ -Ryan Tibshirani, Jonathan Taylor, Richard Lockhart, Rob Tibshirani (2014). +Ryan Tibshirani, Jonathan Taylor, Richard Lockhart, and Rob Tibshirani (2014). Exact post-selection inference for sequential regression procedures. arXiv:1401.3889. -Jason Lee, Dennis Sun, Yuekai Sun, Jonathan Taylor (2013). +Jason Lee, Dennis Sun, Yuekai Sun, and Jonathan Taylor (2013). Exact post-selection inference, with application to the lasso. arXiv:1311.6238. -Stephen Reid, Jonathan Taylor, Rob Tibshirani (2014). +Stephen Reid, Jonathan Taylor, and Rob Tibshirani (2014). Post-selection point and interval estimation of signal sizes in Gaussian samples. arXiv:1405.3340. + + +Jonathan Taylor and Robert Tibshirani (2016) Post-selection inference for L1-penalized likelihood models. +arXiv:1602.07358 + } \examples{ -#NOT RUN -# forward stepwise: -# -#generate some data -#set.seed(43) -#n=50 -#p=10 -#sigma=.7 -#x=matrix(rnorm(n*p),n,p) -#x=scale(x,T,T)/sqrt(n-1) -#beta=c(4,2,0,0,rep(0,p-4)) -#y=x%*%beta+sigma*rnorm(n) -#y=y-mean(y) -# -#first run forward stepwise -# fsfit=fs(x,y) -# -# forward stepwise inference for each successive entry of a predictor; -# sigma estimated from mean squared residual -# -# aa=fsInf(fsfit) -## -# lasso with fixed lambda -# -#set.seed(43) -#n=50 -#p=10 -#sigma=.7 -#x=matrix(rnorm(n*p),n,p) -#x=scale(x,T,T)/sqrt(n-1) -#beta=c(4,2,0,0,rep(0,p-4)) -#y=x%*%beta+sigma*rnorm(n) -#y=y-mean(y) -# -# first run glmnet -#gfit=glmnet(x,y,standardize=F) -#lam = .1 -#extract coef for a given lam; Note the 1/n factor in s! -#bhat = coef(gfit, s=lam/n, exact=TRUE)[-1] - -# compute fixed lambda p-values and selection intervals -#aa=fixedLassoInf(x,y,bhat,lam,sigma=sigma) -# -##least angle regression from mean squared residual +set.seed(33) +n = 50 +p = 10 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) + +# run forward stepwise +fsfit = fs(x,y) + +# compute sequential p-values and confidence intervals +# (sigma estimated from full model) +out.seq = fsInf(fsfit) +out.seq + +# compute p-values and confidence intervals after AIC stopping +out.aic = fsInf(fsfit,type="aic") +out.aic + +# compute p-values and confidence intervals after 5 fixed steps +out.fix = fsInf(fsfit,type="all",k=5) +out.fix + +## NOT RUN---lasso at fixed lambda- Gaussian family +## first run glmnet +# gfit = glmnet(x,y) + +## extract coef for a given lambda; note the 1/n factor! +## (and we don't save the intercept term) +# lambda = .1 +# beta = coef(gfit, s=lambda/n, exact=TRUE)[-1] + +## compute fixed lambda p-values and selection intervals +# out = fixedLassoInf(x,y,beta,lambda,sigma=sigma) +# out + + +#lasso at fixed lambda- logistic family #set.seed(43) -#n=50 -#p=10 -#sigma=.7 -#x=matrix(rnorm(n*p),n,p) -#x=scale(x,T,T)/sqrt(n-1) -#beta=c(3,2,0,0,rep(0,p-4)) -#y=x%*%beta+sigma*rnorm(n) -#y=y-mean(y) -#first run lar -# larfit=lar(x,y) -# -#lar inference for each successive entry of a predictor; sigma estimated -# from mean squared residual from least squares fit -# aa=larInf(larfit) - -## -##many normal means - -#set.seed(12345) -#n = 100 # sample size -#signal = 3 # signal size -#mu = c(rep(signal, floor (n/5)), rep(0, n-floor(n/5))) # 20% of elements get the signal; rest 0 -#y = mu + rnorm (n, 0, 1) -#mmObj = manyMeans(y, bh.q=0.1) - - -##estimation of sigma for use in fsInf or larInf or fixedLassoInf -# + # n = 50 + # p = 10 + # sigma = 1 + + # x = matrix(rnorm(n*p),n,p) + x=scale(x,TRUE,TRUE) + # +# beta = c(3,2,rep(0,p-2)) + # y = x%*%beta + sigma*rnorm(n) + # y=1*(y>mean(y)) + # first run glmnet + # gfit = glmnet(x,y,standardize=FALSE,family="binomial") + + # extract coef for a given lambda; note the 1/n factor! + # (and here we DO include the intercept term) + # lambda = .8 + # beta = coef(gfit, s=lambda/n, exact=TRUE) + + # # compute fixed lambda p-values and selection intervals + # out = fixedLassoInf(x,y,beta,lambda,family="binomial") + # out + +##lasso at fixed lambda- Cox family #set.seed(43) -#n=50 -#p=10 -#sigma=.7 -#x=matrix(rnorm(n*p),n,p) -#x=scale(x,T,F) -#beta=c(3,2,0,0,rep(0,p-4)) -#y=x%*%beta+sigma*rnorm(n) -#y=y-mean(y) - -#out=estimateSigma(x,y) - -##estimation of lambda for use in fixedLassoInf -set.seed(43) -#n=50 -#p=10 -#sigma=.7 -#x=matrix(rnorm(n*p),n,p) -#x=scale(x,T,F) -#beta=c(3,2,0,0,rep(0,p-4)) -#y=x%*%beta+sigma*rnorm(n) -#y=y-mean(y) -# -#estimate lambda using the known value of sigma -#lamhat=estimateLambda(x,sigma=.7) -#first estimate sigma -#sigmahat=estimateSigma(x,y)$sigmahat -#lamhat=estimateLambda(x,sigma=sigmahat) -# - +# n = 50 + # p = 10 + # sigma = 1 + + # x = matrix(rnorm(n*p),n,p) + # x=scale(x,TRUE,TRUE) + + # beta = c(3,2,rep(0,p-2)) + # tim = as.vector(x\%*\%beta + sigma*rnorm(n)) + # tim= tim-min(tim)+1 +#status=sample(c(0,1),size=n,replace=T) + # first run glmnet + # gfit = glmnet(x,Surv(tim,status),standardize=FALSE,family="cox") + # extract coef for a given lambda; note the 1/n factor! + + # lambda = 1.5 + # beta = as.numeric(coef(gfit, s=lambda/n, exact=TRUE)) + + # compute fixed lambda p-values and selection intervals + # out = fixedLassoInf(x,tim,beta,lambda,status=status,family="cox") + # out +## NOT RUN---many normal means +# set.seed(12345) +# n = 100 +# mu = c(rep(3,floor(n/5)), rep(0,n-floor(n/5))) +# y = mu + rnorm(n) +# out = manyMeans(y, bh.q=0.1) +# out + +## NOT RUN---forward stepwise with groups +# set.seed(1) +# n = 20 +# p = 40 +# x = matrix(rnorm(n*p), nrow=n) +# index = sort(rep(1:(p/2), 2)) +# y = rnorm(n) + 2 * x[,1] - x[,4] +# fit = groupfs(x, y, index, maxsteps = 5) +# out = groupfsInf(fit) +# out + +## NOT RUN---estimation of sigma for use in fsInf +## (or larInf or fixedLassoInf) +# set.seed(33) +# n = 50 +# p = 10 +# sigma = 1 +# x = matrix(rnorm(n*p),n,p) +# beta = c(3,2,rep(0,p-2)) +# y = x\%*\%beta + sigma*rnorm(n) + +## run forward stepwise +# fsfit = fs(x,y) + +## estimate sigma +# sigmahat = estimateSigma(x,y)$sigmahat + +## run sequential inference with estimated sigma +# out = fsInf(fit,sigma=sigmahat) +# out } \keyword{ package } diff --git a/selectiveInference/src/Makevars b/selectiveInference/src/Makevars new file mode 100644 index 0000000..22c4c7d --- /dev/null +++ b/selectiveInference/src/Makevars @@ -0,0 +1,11 @@ +PKG_CFLAGS= -I. -DCOLUMN_MAJOR_ORDER +PKG_CPPFLAGS= -I. -DCOLUMN_MAJOR_ORDER +PKG_LIBS=-L. + +$(SHLIB): Rcpp Rcpp-matrixcomps.o Rcpp-debias.o RcppExports.o quadratic_program.o quadratic_program_wide.o + +clean: + rm -f *o + +Rcpp: + Rscript -e "library(Rcpp); Rcpp::compileAttributes('..')" \ No newline at end of file diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp new file mode 100644 index 0000000..f5160ba --- /dev/null +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -0,0 +1,191 @@ +#include // need to include the main Rcpp header file +#include // where solve_QP, solve_QP_wide are defined + +// Below, the gradient should be equal to Sigma * theta + linear_func!! +// No check is done on this. + +// [[Rcpp::export]] +Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, + double bound, + int maxiter, + Rcpp::NumericVector theta, + Rcpp::NumericVector linear_func, + Rcpp::NumericVector gradient, + Rcpp::IntegerVector ever_active, + Rcpp::IntegerVector nactive, + double kkt_tol, + double objective_tol, + double parameter_tol, + int max_active, + int kkt_stop, + int objective_stop, + int param_stop + ) { + + int nrow = Sigma.nrow(); // number of features + + // Active set + + int irow; + + // Extract the diagonal + Rcpp::NumericVector Sigma_diag(nrow); + double *sigma_diag_p = Sigma_diag.begin(); + + Rcpp::NumericVector theta_old(nrow); + + for (irow=0; irow= max_active); + + return(Rcpp::List::create(Rcpp::Named("soln") = theta, + Rcpp::Named("gradient") = gradient, + Rcpp::Named("linear_func") = linear_func, + Rcpp::Named("iter") = iter, + Rcpp::Named("kkt_check") = kkt_check, + Rcpp::Named("ever_active") = ever_active, + Rcpp::Named("nactive") = nactive, + Rcpp::Named("max_active_check") = max_active_check)); + +} + + +// [[Rcpp::export]] +Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X, + Rcpp::NumericVector bound, + double ridge_term, + int maxiter, + Rcpp::NumericVector theta, + Rcpp::NumericVector linear_func, + Rcpp::NumericVector gradient, + Rcpp::NumericVector X_theta, + Rcpp::IntegerVector ever_active, + Rcpp::IntegerVector nactive, + double kkt_tol, + double objective_tol, + double parameter_tol, + int max_active, + int kkt_stop, + int objective_stop, + int param_stop + ) { + + int ncase = X.nrow(); // number of cases + int nfeature = X.ncol(); // number of features + + // Active set + + int icase, ifeature; + + // A vector to keep track of gradient updates + + Rcpp::IntegerVector need_update(nfeature); + + Rcpp::NumericVector theta_old(nfeature); + + // Extract the diagonal -- divide by ncase + + Rcpp::NumericVector nndef_diag(nfeature); + double *nndef_diag_p = nndef_diag.begin(); + + for (ifeature=0; ifeature= max_active); + + // Make sure gradient is updated -- essentially a matrix multiply + + update_gradient_wide((double *) gradient.begin(), + (double *) X_theta.begin(), + (double *) X.begin(), + (double *) linear_func.begin(), + (int *) need_update.begin(), + ncase, + nfeature); + + return(Rcpp::List::create(Rcpp::Named("soln") = theta, + Rcpp::Named("gradient") = gradient, + Rcpp::Named("X_theta") = X_theta, + Rcpp::Named("linear_func") = linear_func, + Rcpp::Named("iter") = iter, + Rcpp::Named("kkt_check") = kkt_check, + Rcpp::Named("ever_active") = ever_active, + Rcpp::Named("nactive") = nactive, + Rcpp::Named("max_active_check") = max_active_check)); + +} diff --git a/selectiveInference/src/Rcpp-matrixcomps.cpp b/selectiveInference/src/Rcpp-matrixcomps.cpp new file mode 100644 index 0000000..045b4b7 --- /dev/null +++ b/selectiveInference/src/Rcpp-matrixcomps.cpp @@ -0,0 +1,34 @@ +#include // need to include the main Rcpp header file +#include // where update1, downdate1 are defined + +// [[Rcpp::export]] +Rcpp::List update1_(Rcpp::NumericMatrix Q2, + Rcpp::NumericVector w, + int m, + int k) { + + update1(Q2.begin(), + w.begin(), + m, + k); + + return(Rcpp::List::create(Rcpp::Named("Q2") = Q2, + Rcpp::Named("w") = w)); +} + +// [[Rcpp::export]] +Rcpp::List downdate1_(Rcpp::NumericMatrix Q1, + Rcpp::NumericMatrix R, + int j0, + int m, + int n) { + + downdate1(Q1.begin(), + R.begin(), + j0, + m, + n); + + return(Rcpp::List::create(Rcpp::Named("Q1") = Q1, + Rcpp::Named("R") = R)); +} diff --git a/selectiveInference/src/Rcpp-randomized.cpp b/selectiveInference/src/Rcpp-randomized.cpp new file mode 100644 index 0000000..cf59d77 --- /dev/null +++ b/selectiveInference/src/Rcpp-randomized.cpp @@ -0,0 +1,129 @@ +#include // need to include the main Rcpp header file +#include // where densities are defined +#include +// [[Rcpp::export]] +Rcpp::NumericVector log_density_gaussian_(double noise_scale, // Scale of randomization + Rcpp::NumericMatrix internal_linear, // A_D -- linear part for data + Rcpp::NumericMatrix internal_state, // D -- data state -- matrix of shape (nopt, npts) + Rcpp::NumericMatrix optimization_linear, // A_O -- linear part for optimization variables + Rcpp::NumericMatrix optimization_state, // O -- optimization state -- matrix of shape (ninternal, npts) + Rcpp::NumericVector offset) { // h -- offset in affine transform -- "p" dimensional + + int npt = internal_state.ncol(); // Function is vectorized + if (optimization_state.ncol() != npt) { // Assuming each column is an internal or opt state because arrays are column major + Rcpp::stop("Number of optimization samples should equal the number of (internally represented) data."); + } + + int ndim = optimization_linear.nrow(); + if (internal_linear.nrow() != ndim) { + Rcpp::stop("Dimension of optimization range should be the same as the dimension of the data range."); + } + int ninternal = internal_linear.ncol(); + int noptimization = optimization_linear.ncol(); + + Rcpp::NumericVector result(npt); + + int ipt; + for (ipt=0; ipt 0) { + + # compute fixed lambda p-values and selection intervals + + aa = fixedLassoInf(x,y,bhat,lambda[j]*sqrt(n) / scaling,intercept=F,sigma=sigma,type=type) + bb = oldFixedLassoInf(x,y,bhat,lambda[j]*sqrt(n) / scaling,intercept=F,sigma=sigma,type=type) + pvs_new <- c(pvs_new, aa$pv, recursive=TRUE) + pvs_old <- c(pvs_old, bb$pv,recursive=TRUE) + + cat() + } +} + +#check uniformity + +png(paste('comparison_scaled', j, '.png', sep='')) +plot(ecdf(pvs_old), pch=23, col='green', xlim=c(0,1), ylim=c(0,1), main='ECDF of p-values') +plot(ecdf(pvs_new), pch=24, col='purple', add=TRUE) +abline(0,1) +legend("bottomright", legend=c("Old", "New"), pch=c(23,24), pt.bg=c("green","purple")) +dev.off() +} \ No newline at end of file diff --git a/tests/debiased_lasso/comparison_unscaled.R b/tests/debiased_lasso/comparison_unscaled.R new file mode 100644 index 0000000..eebda68 --- /dev/null +++ b/tests/debiased_lasso/comparison_unscaled.R @@ -0,0 +1,71 @@ +source('javanmard_montanari.R') + +############################################## + +# Runs nsims simulations under the global null, computing p-values +# using both the old code (slow one using Adel's code) and the new +# code (faster using Jon's code), and produces qq-plots for both. +# Runing 50 sims takes about 10-15 mins because old code is slow, so +# feel free to lower nsims if you want + + +library(selectiveInference) +library(glmnet) + +# set.seed(424) + +n=100 +p=200 + +sigma=.5 + +lambda=c(0.25, 0.5, 1) + +for (j in c(3,2,1)) { + +thresh = 1e-10 + +beta=rep(0,p) +type="full" + +nsim = 20 + +scaling = sqrt(n) +pvs_old = c() +pvs_new <- c() +pvs_old_0 = c() # don't add the offset correction +pvs_new_0 = c() # don't add the offset correction +for (i in 1:nsim) { + cat(i,fill=T) + x = matrix(rnorm(n*p),n,p) + x = scale(x,T,T) / scaling + mu = x%*%beta + y=mu+sigma*rnorm(n) + + # first run glmnet + gfit=glmnet(x,y,intercept=F,standardize=F,thresh=thresh) + + bhat = coef(gfit, s=lambda[j]/(sqrt(n) * scaling), exact=TRUE,x=x,y=y)[-1] + + if(sum(bhat != 0) > 0) { + + # compute fixed lambda p-values and selection intervals + + aa = fixedLassoInf(x,y,bhat,lambda[j]*sqrt(n) / scaling,intercept=F,sigma=sigma,type=type) + bb = oldFixedLassoInf(x,y,bhat,lambda[j]*sqrt(n) / scaling,intercept=F,sigma=sigma,type=type) + pvs_new <- c(pvs_new, aa$pv, recursive=TRUE) + pvs_old <- c(pvs_old, bb$pv,recursive=TRUE) + + cat() + } +} + +#check uniformity + +png(paste('comparison_unscaled', j, '.png', sep='')) +plot(ecdf(pvs_old), pch=23, col='green', xlim=c(0,1), ylim=c(0,1), main='ECDF of p-values') +plot(ecdf(pvs_new), pch=24, col='purple', add=TRUE) +abline(0,1) +legend("bottomright", legend=c("Old", "New"), pch=c(23,24), pt.bg=c("green","purple")) +dev.off() +} \ No newline at end of file diff --git a/tests/debiased_lasso/javanmard_montanari.R b/tests/debiased_lasso/javanmard_montanari.R new file mode 100644 index 0000000..09f3355 --- /dev/null +++ b/tests/debiased_lasso/javanmard_montanari.R @@ -0,0 +1,770 @@ +# First part is only functions from the old code. At the bottom is +# the bit of code that actually compares the old vs new code + +###################################################### + +### Old code (using Adel's R code) + +## Approximates inverse covariance matrix theta +InverseLinfty <- function(sigma, n, resol=1.5, mu=NULL, maxiter=50, threshold=1e-10, verbose = TRUE) { + isgiven <- 1; + if (is.null(mu)){ + isgiven <- 0; + } + + p <- nrow(sigma); + M <- matrix(0, p, p); + xperc = 0; + xp = round(p/10); + for (i in 1:p) { + if ((i %% xp)==0){ + xperc = xperc+10; + if (verbose) { + print(paste(xperc,"% done",sep="")); } + } + if (isgiven==0){ + mu <- (1/sqrt(n)) * qnorm(1-(0.1/(p^2))); + } + mu.stop <- 0; + try.no <- 1; + incr <- 0; + while ((mu.stop != 1)&&(try.no<10)){ + last.beta <- beta + output <- InverseLinftyOneRow(sigma, i, mu, maxiter=maxiter, threshold=threshold) + beta <- output$optsol + iter <- output$iter + if (isgiven==1){ + mu.stop <- 1 + } + else{ + if (try.no==1){ + if (iter == (maxiter+1)){ + incr <- 1; + mu <- mu*resol; + } else { + incr <- 0; + mu <- mu/resol; + } + } + if (try.no > 1){ + if ((incr == 1)&&(iter == (maxiter+1))){ + mu <- mu*resol; + } + if ((incr == 1)&&(iter < (maxiter+1))){ + mu.stop <- 1; + } + if ((incr == 0)&&(iter < (maxiter+1))){ + mu <- mu/resol; + } + if ((incr == 0)&&(iter == (maxiter+1))){ + mu <- mu*resol; + beta <- last.beta; + mu.stop <- 1; + } + } + } + try.no <- try.no+1 + } + M[i,] <- beta; + } + return(M) +} + +InverseLinftyOneRow <- function ( sigma, i, mu, maxiter=50, threshold=1e-10) { + p <- nrow(sigma); + rho <- max(abs(sigma[i,-i])) / sigma[i,i]; + mu0 <- rho/(1+rho); + beta <- rep(0,p); + + #if (mu >= mu0){ + # beta[i] <- (1-mu0)/sigma[i,i]; + # returnlist <- list("optsol" = beta, "iter" = 0); + # return(returnlist); + #} + + diff.norm2 <- 1; + last.norm2 <- 1; + iter <- 1; + iter.old <- 1; + beta[i] <- (1-mu0)/sigma[i,i]; + beta.old <- beta; + sigma.tilde <- sigma; + diag(sigma.tilde) <- 0; + vs <- -sigma.tilde%*%beta; + + while ((iter <= maxiter) && (diff.norm2 >= threshold*last.norm2)){ + + for (j in 1:p){ + oldval <- beta[j]; + v <- vs[j]; + if (j==i) + v <- v+1; + beta[j] <- SoftThreshold(v,mu)/sigma[j,j]; + if (oldval != beta[j]){ + vs <- vs + (oldval-beta[j])*sigma.tilde[,j]; + } + } + + iter <- iter + 1; + if (iter==2*iter.old){ + d <- beta - beta.old; + diff.norm2 <- sqrt(sum(d*d)); + last.norm2 <-sqrt(sum(beta*beta)); + iter.old <- iter; + beta.old <- beta; + #if (iter>10) + # vs <- -sigma.tilde%*%beta; + } + + # print(c(iter, maxiter, diff.norm2, threshold * last.norm2, threshold, mu)) + + } + + returnlist <- list("optsol" = beta, "iter" = iter) + return(returnlist) +} + +SoftThreshold <- function( x, lambda ) { + # + # Standard soft thresholding + # + if (x>lambda){ + return (x-lambda);} + else { + if (x< (-lambda)){ + return (x+lambda);} + else { + return (0); } + } +} + + +### Functions borrowed from selective Inference (only fixedLassoInf and fixedLasso.poly are modified) + +# Special linear time order function, works only when x +# is a scrambled vector of integers. + +Order <- function(x) { + n = length(x) + o = numeric(n) + o[x] = Seq(1,n) + return(o) +} + +# Returns a sequence of integers from a to b if a <= b, +# otherwise nothing. You have no idea how important this +# function is... + +Seq <- function(a, b, ...) { + if (a<=b) return(seq(a,b,...)) + else return(numeric(0)) +} + +# Returns the sign of x, with Sign(0)=1. + +Sign <- function(x) { + return(-1+2*(x>=0)) +} + +############################## + +# Centering and scaling convenience function + +standardize <- function(x, y, intercept, normalize) { + x = as.matrix(x) + y = as.numeric(y) + n = nrow(x) + p = ncol(x) + + if (intercept) { + bx = colMeans(x) + by = mean(y) + x = scale(x,bx,FALSE) + y = y-mean(y) + } else { + bx = rep(0,p) + by = 0 + } + if (normalize) { + sx = sqrt(colSums(x^2)) + x = scale(x,FALSE,sx) + } else { + sx = rep(1,p) + } + + return(list(x=x,y=y,bx=bx,by=by,sx=sx)) +} + +############################## + +# Interpolation function to get coefficients + +coef.interpolate <- function(betas, s, knots, dec=TRUE) { + # Sort the s values + o = order(s,dec=dec) + s = s[o] + + k = length(s) + mat = matrix(rep(knots,each=k),nrow=k) + if (dec) b = s >= mat + else b = s <= mat + blo = max.col(b,ties.method="first") + bhi = pmax(blo-1,1) + + i = bhi==blo + p = numeric(k) + p[i] = 0 + p[!i] = ((s-knots[blo])/(knots[bhi]-knots[blo]))[!i] + + beta = t((1-p)*t(betas[,blo,drop=FALSE]) + p*t(betas[,bhi,drop=FALSE])) + colnames(beta) = as.character(round(s,3)) + rownames(beta) = NULL + + # Return in original order + o = order(o) + return(beta[,o,drop=FALSE]) +} + +############################## + +checkargs.xy <- function(x, y) { + if (missing(x)) stop("x is missing") + if (is.null(x) || !is.matrix(x)) stop("x must be a matrix") + if (missing(y)) stop("y is missing") + if (is.null(y) || !is.numeric(y)) stop("y must be numeric") + if (ncol(x) == 0) stop("There must be at least one predictor [must have ncol(x) > 0]") + if (checkcols(x)) stop("x cannot have duplicate columns") + if (length(y) == 0) stop("There must be at least one data point [must have length(y) > 0]") + if (length(y)!=nrow(x)) stop("Dimensions don't match [length(y) != nrow(x)]") +} + +checkargs.misc <- function(sigma=NULL, alpha=NULL, k=NULL, + gridrange=NULL, gridpts=NULL, griddepth=NULL, + mult=NULL, ntimes=NULL, + beta=NULL, lambda=NULL, tol.beta=NULL, tol.kkt=NULL, + bh.q=NULL) { + + if (!is.null(sigma) && sigma <= 0) stop("sigma must be > 0") + if (!is.null(lambda) && lambda < 0) stop("lambda must be >= 0") + if (!is.null(alpha) && (alpha <= 0 || alpha >= 1)) stop("alpha must be between 0 and 1") + if (!is.null(k) && length(k) != 1) stop("k must be a single number") + if (!is.null(k) && (k < 1 || k != floor(k))) stop("k must be an integer >= 1") + if (!is.null(gridrange) && (length(gridrange) != 2 || gridrange[1] > gridrange[2])) + stop("gridrange must be an interval of the form c(a,b) with a <= b") + if (!is.null(gridpts) && (gridpts < 20 || gridpts != round(gridpts))) + stop("gridpts must be an integer >= 20") + if (!is.null(griddepth) && (griddepth > 10 || griddepth != round(griddepth))) + stop("griddepth must be an integer <= 10") + if (!is.null(mult) && mult < 0) stop("mult must be >= 0") + if (!is.null(ntimes) && (ntimes <= 0 || ntimes != round(ntimes))) + stop("ntimes must be an integer > 0") + if (!is.null(beta) && sum(beta!=0)==0) stop("Value of lambda too large, beta is zero") + # if (!is.null(lambda) && length(lambda) != 1) stop("lambda must be a single number") + if (!is.null(lambda) && length(lambda) != 1 && length(lambda) != length(beta)) stop("lambda must be a single number or equal to the length of beta") + if (!is.null(lambda) && lambda < 0) stop("lambda must be >=0") + if (!is.null(tol.beta) && tol.beta <= 0) stop("tol.beta must be > 0") + if (!is.null(tol.kkt) && tol.kkt <= 0) stop("tol.kkt must be > 0") +} + +# Make sure that no two columms of A are the same +# (this works with probability one). + +checkcols <- function(A) { + b = rnorm(nrow(A)) + a = sort(t(A)%*%b) + if (any(diff(a)==0)) return(TRUE) + return(FALSE) +} + +estimateSigma <- function(x, y, intercept=TRUE, standardize=TRUE) { + checkargs.xy(x,rep(0,nrow(x))) + if(nrow(x)<10) stop("Number of observations must be at least 10 to run estimateSigma") + cvfit=cv.glmnet(x,y,intercept=intercept,standardize=standardize) + lamhat=cvfit$lambda.min + fit=glmnet(x,y,standardize=standardize) + yhat=predict(fit,x,s=lamhat) + nz=sum(predict(fit,s=lamhat, type="coef")!=0) + sigma=sqrt(sum((y-yhat)^2)/(length(y)-nz-1)) + return(list(sigmahat=sigma, df=nz)) +} + +# Update the QR factorization, after a column has been +# added. Here Q1 is m x n, Q2 is m x k, and R is n x n. + +updateQR <- function(Q1,Q2,R,col) { + m = nrow(Q1) + n = ncol(Q1) + k = ncol(Q2) + + a = .C("update1", + Q2=as.double(Q2), + w=as.double(t(Q2)%*%col), + m=as.integer(m), + k=as.integer(k), + dup=FALSE, + package="selectiveInference") + + Q2 = matrix(a$Q2,nrow=m) + w = c(t(Q1)%*%col,a$w) + + # Re-structure: delete a column from Q2, add one to + # Q1, and expand R + Q1 = cbind(Q1,Q2[,1]) + Q2 = Q2[,-1,drop=FALSE] + R = rbind(R,rep(0,n)) + R = cbind(R,w[Seq(1,n+1)]) + + return(list(Q1=Q1,Q2=Q2,R=R)) +} + +# Moore-Penrose pseudo inverse for symmetric matrices + +pinv <- function(A, tol=.Machine$double.eps) { + e = eigen(A) + v = Re(e$vec) + d = Re(e$val) + d[d > tol] = 1/d[d > tol] + d[d < tol] = 0 + if (length(d)==1) return(v*d*v) + else return(v %*% diag(d) %*% t(v)) +} + +############################## + +# Assuming that grid is in sorted order from smallest to largest, +# and vals are monotonically increasing function values over the +# grid, returns the grid end points such that the corresponding +# vals are approximately equal to {val1, val2} + +grid.search <- function(grid, fun, val1, val2, gridpts=100, griddepth=2) { + n = length(grid) + vals = fun(grid) + + ii = which(vals >= val1) + jj = which(vals <= val2) + if (length(ii)==0) return(c(grid[n],Inf)) # All vals < val1 + if (length(jj)==0) return(c(-Inf,grid[1])) # All vals > val2 + # RJT: the above logic is correct ... but for simplicity, instead, + # we could just return c(-Inf,Inf) + + i1 = min(ii); i2 = max(jj) + if (i1==1) lo = -Inf + else lo = grid.bsearch(grid[i1-1],grid[i1],fun,val1,gridpts, + griddepth-1,below=TRUE) + if (i2==n) hi = Inf + else hi = grid.bsearch(grid[i2],grid[i2+1],fun,val2,gridpts, + griddepth-1,below=FALSE) + return(c(lo,hi)) +} + +# Repeated bin search to find the point x in the interval [left, right] +# that satisfies f(x) approx equal to val. If below=TRUE, then we seek +# x such that the above holds and f(x) <= val; else we seek f(x) >= val. + +grid.bsearch <- function(left, right, fun, val, gridpts=100, griddepth=1, below=TRUE) { + n = gridpts + depth = 1 + + while (depth <= griddepth) { + grid = seq(left,right,length=n) + vals = fun(grid) + + if (below) { + ii = which(vals >= val) + if (length(ii)==0) return(grid[n]) # All vals < val (shouldn't happen) + if ((i0=min(ii))==1) return(grid[1]) # All vals > val (shouldn't happen) + left = grid[i0-1] + right = grid[i0] + } + + else { + ii = which(vals <= val) + if (length(ii)==0) return(grid[1]) # All vals > val (shouldn't happen) + if ((i0=max(ii))==n) return(grid[n]) # All vals < val (shouldn't happen) + left = grid[i0] + right = grid[i0+1] + } + + depth = depth+1 + } + + return(ifelse(below, left, right)) +} + +# Returns Prob(Z>z | Z in [a,b]), where mean can be a vector + +tnorm.surv <- function(z, mean, sd, a, b, bits=NULL) { + z = max(min(z,b),a) + + # Check silly boundary cases + p = numeric(length(mean)) + p[mean==-Inf] = 0 + p[mean==Inf] = 1 + + # Try the multi precision floating point calculation first + o = is.finite(mean) + mm = mean[o] + pp = mpfr.tnorm.surv(z,mm,sd,a,b,bits) + + # If there are any NAs, then settle for an approximation + oo = is.na(pp) + if (any(oo)) pp[oo] = bryc.tnorm.surv(z,mm[oo],sd,a,b) + + p[o] = pp + return(p) +} + +# Returns Prob(Z>z | Z in [a,b]), where mean cane be a vector, using +# multi precision floating point calculations thanks to the Rmpfr package + +mpfr.tnorm.surv <- function(z, mean=0, sd=1, a, b, bits=NULL) { + # If bits is not NULL, then we are supposed to be using Rmpf + # (note that this was fail if Rmpfr is not installed; but + # by the time this function is being executed, this should + # have been properly checked at a higher level; and if Rmpfr + # is not installed, bits would have been previously set to NULL) + if (!is.null(bits)) { + z = Rmpfr::mpfr((z-mean)/sd, precBits=bits) + a = Rmpfr::mpfr((a-mean)/sd, precBits=bits) + b = Rmpfr::mpfr((b-mean)/sd, precBits=bits) + return(as.numeric((Rmpfr::pnorm(b)-Rmpfr::pnorm(z))/ + (Rmpfr::pnorm(b)-Rmpfr::pnorm(a)))) + } + + # Else, just use standard floating point calculations + z = (z-mean)/sd + a = (a-mean)/sd + b = (b-mean)/sd + return((pnorm(b)-pnorm(z))/(pnorm(b)-pnorm(a))) +} + +# Returns Prob(Z>z | Z in [a,b]), where mean can be a vector, based on +# A UNIFORM APPROXIMATION TO THE RIGHT NORMAL TAIL INTEGRAL, W Bryc +# Applied Mathematics and Computation +# Volume 127, Issues 23, 15 April 2002, Pages 365--374 +# https://math.uc.edu/~brycw/preprint/z-tail/z-tail.pdf + +bryc.tnorm.surv <- function(z, mean=0, sd=1, a, b) { + z = (z-mean)/sd + a = (a-mean)/sd + b = (b-mean)/sd + n = length(mean) + + term1 = exp(z*z) + o = a > -Inf + term1[o] = ff(a[o])*exp(-(a[o]^2-z[o]^2)/2) + term2 = rep(0,n) + oo = b < Inf + term2[oo] = ff(b[oo])*exp(-(b[oo]^2-z[oo]^2)/2) + p = (ff(z)-term2)/(term1-term2) + + # Sometimes the approximation can give wacky p-values, + # outside of [0,1] .. + #p[p<0 | p>1] = NA + p = pmin(1,pmax(0,p)) + return(p) +} + +ff <- function(z) { + return((z^2+5.575192695*z+12.7743632)/ + (z^3*sqrt(2*pi)+14.38718147*z*z+31.53531977*z+2*12.77436324)) +} + +############## MODIFIED FUNCTIONS ############### + +# Lasso inference function (for fixed lambda). Note: here we are providing inference +# for the solution of +# min 1/2 || y - \beta_0 - X \beta ||_2^2 + \lambda || \beta ||_1 + +oldFixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","cox"),intercept=TRUE, status=NULL, + sigma=NULL, alpha=0.1, + type=c("partial","full"), tol.beta=1e-5, tol.kkt=0.1, + gridrange=c(-100,100), bits=NULL, verbose=FALSE, offset_correction=TRUE) { + + family = match.arg(family) + this.call = match.call() + type = match.arg(type) + + if(family=="binomial") { + if(type!="partial") stop("Only type= partial allowed with binomial family") + out=fixedLogitLassoInf(x,y,beta,lambda,alpha=alpha, type="partial", tol.beta=tol.beta, tol.kkt=tol.kkt, + gridrange=gridrange, bits=bits, verbose=verbose,this.call=this.call) + return(out) + } + else if(family=="cox") { + if(type!="partial") stop("Only type= partial allowed with Cox family") + out=fixedCoxLassoInf(x,y,status,beta,lambda,alpha=alpha, type="partial",tol.beta=tol.beta, + tol.kkt=tol.kkt, gridrange=gridrange, bits=bits, verbose=verbose,this.call=this.call) + return(out) + } + + else{ + + + + checkargs.xy(x,y) + if (missing(beta) || is.null(beta)) stop("Must supply the solution beta") + if (missing(lambda) || is.null(lambda)) stop("Must supply the tuning parameter value lambda") + + n = nrow(x) + p = ncol(x) + beta = as.numeric(beta) + if (type == "full") { + if (p > n) { + # need intercept (if there is one) for debiased lasso + hbeta = beta + if (intercept == T) { + if (length(beta) != p + 1) { + stop("Since type='full', p > n, and intercept=TRUE, beta must have length equal to ncol(x)+1") + } + # remove intercept if included + beta = beta[-1] + } else if (length(beta) != p) { + stop("Since family='gaussian', type='full' and intercept=FALSE, beta must have length equal to ncol(x)") + } + } + } else if (length(beta) != p) { + stop("Since family='gaussian' and type='partial', beta must have length equal to ncol(x)") + } + + checkargs.misc(beta=beta,lambda=lambda,sigma=sigma,alpha=alpha, + gridrange=gridrange,tol.beta=tol.beta,tol.kkt=tol.kkt) + if (!is.null(bits) && !requireNamespace("Rmpfr",quietly=TRUE)) { + warning("Package Rmpfr is not installed, reverting to standard precision") + bits = NULL + } + + # If glmnet was run with an intercept term, center x and y + if (intercept==TRUE) { + obj = standardize(x,y,TRUE,FALSE) + x = obj$x + y = obj$y + } + + # Check the KKT conditions + g = t(x)%*%(y-x%*%beta) / lambda + if (any(abs(g) > 1+tol.kkt * sqrt(sum(y^2)))) + warning(paste("Solution beta does not satisfy the KKT conditions", + "(to within specified tolerances)")) + + tol.coef = tol.beta * sqrt(n^2 / colSums(x^2)) + # print(tol.coef) + vars = which(abs(beta) > tol.coef) + # print(beta) + # print(vars) + if(length(vars)==0){ + cat("Empty model",fill=T) + return() + } + if (any(sign(g[vars]) != sign(beta[vars]))) + warning(paste("Solution beta does not satisfy the KKT conditions", + "(to within specified tolerances). You might try rerunning", + "glmnet with a lower setting of the", + "'thresh' parameter, for a more accurate convergence.")) + + # Get lasso polyhedral region, of form Gy >= u + if (type == 'full' & p > n) out = fixedLasso.poly(x,y,beta,lambda,vars,inactive=TRUE) + else out = fixedLasso.poly(x,y,beta,lambda,vars) + G = out$G + u = out$u + + # Check polyhedral region + tol.poly = 0.01 + if (min(G %*% y - u) < -tol.poly * sqrt(sum(y^2))) + stop(paste("Polyhedral constraints not satisfied; you must recompute beta", + "more accurately. With glmnet, make sure to use exact=TRUE in coef(),", + "and check whether the specified value of lambda is too small", + "(beyond the grid of values visited by glmnet).", + "You might also try rerunning glmnet with a lower setting of the", + "'thresh' parameter, for a more accurate convergence.")) + + # Estimate sigma + if (is.null(sigma)) { + if (n >= 2*p) { + oo = intercept + sigma = sqrt(sum(lsfit(x,y,intercept=oo)$res^2)/(n-p-oo)) + } + else { + sigma = sd(y) + warning(paste(sprintf("p > n/2, and sd(y) = %0.3f used as an estimate of sigma;",sigma), + "you may want to use the estimateSigma function")) + } + } + + k = length(vars) + pv = vlo = vup = numeric(k) + vmat = matrix(0,k,n) + ci = tailarea = matrix(0,k,2) + sign = numeric(k) + + if (type=="full" & p > n) { + if (intercept == T) { + pp=p+1 + Xint <- cbind(rep(1,n),x) + # indices of selected predictors + S = c(1,vars + 1) + notS = which(abs(beta) <= tol.coef) + 1 + } else { + pp=p + Xint <- x + # indices of selected predictors + S = vars + notS = which(abs(beta) <= tol.coef) + } + + + XS = Xint[,S] + hbetaS = hbeta[S] + + # Reorder so that active set S is first + Xordered = Xint[,c(S,notS,recursive=T)] + + hsigma <- 1/n*(t(Xordered)%*%Xordered) + hsigmaS <- 1/n*(t(XS)%*%XS) # hsigma[S,S] + hsigmaSinv <- pinv(hsigmaS) # solve(hsigmaS) + + # Approximate inverse covariance matrix for when (n < p) from lasso_Inference.R + htheta <- InverseLinfty(hsigma, n, verbose=FALSE) + + # 0-padding matrix + FS = rbind(diag(length(S)),matrix(0,pp-length(S),length(S))) + ithetasigma = (diag(pp)-(htheta%*%hsigma)) + + M <- (((htheta%*%t(Xordered))+ithetasigma%*%FS%*%hsigmaSinv%*%t(XS))/n) + # vector which is offset for testing debiased beta's + meanoffset <- -(((ithetasigma%*%FS%*%hsigmaSinv)%*%sign(hbetaS))*lambda/n) + if (intercept == T) { + M = M[-1,] # remove intercept row + meanoffset = meanoffset[-1] # remove intercept element + } + if (offset_correction == FALSE) { + meanoffset = 0 * meanoffset + } + } else if (type=="partial" || p > n) { + xa = x[,vars,drop=F] + M = pinv(crossprod(xa)) %*% t(xa) + meanoffset = rep(0,k) + } else { + M = pinv(crossprod(x)) %*% t(x) + M = M[vars,,drop=F] + meanoffset = rep(0,k) + } + + for (j in 1:k) { + if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j])) + + vj = M[j,] + mj = sqrt(sum(vj^2)) + vj = vj / mj # Standardize (divide by norm of vj) + sign[j] = sign(sum(vj*y)) + vj = sign[j] * vj + + a = poly.pval(y,G,u,vj,offset=meanoffset[j],sigma,bits) + pv[j] = a$pv + vlo[j] = a$vlo * mj # Unstandardize (mult by norm of vj) + vup[j] = a$vup * mj # Unstandardize (mult by norm of vj) + vmat[j,] = vj * mj * sign[j] # Unstandardize (mult by norm of vj) + + a = poly.int(y,G,u,vj,offset=meanoffset[j],sigma,alpha,gridrange=gridrange, + flip=(sign[j]==-1),bits=bits) + ci[j,] = a$int * mj # Unstandardize (mult by norm of vj) + tailarea[j,] = a$tailarea + } + + out = list(type=type,lambda=lambda,pv=pv,ci=ci, + tailarea=tailarea,vlo=vlo,vup=vup,vmat=vmat,y=y, + vars=vars,sign=sign,sigma=sigma,alpha=alpha, + sd=sigma*sqrt(rowSums(vmat^2)), + coef0=vmat%*%y, + call=this.call,M=M) + class(out) = "fixedLassoInf" + return(out) + } +} + + +fixedLasso.poly= + function(x, y, beta, lambda, a, inactive = FALSE) { + xa = x[,a,drop=F] + xac = x[,!a,drop=F] + xai = pinv(crossprod(xa)) + xap = xai %*% t(xa) + za = sign(beta[a]) + if (length(za)>1) dz = diag(za) + if (length(za)==1) dz = matrix(za,1,1) + + if (inactive) { + P = diag(1,nrow(xa)) - xa %*% xap + + G = -rbind( + 1/lambda * t(xac) %*% P, + -1/lambda * t(xac) %*% P, + -dz %*% xap + ) + lambda2=lambda + if(length(lambda)>1) lambda2=lambda[a] + u = -c( + 1 - t(xac) %*% t(xap) %*% za, + 1 + t(xac) %*% t(xap) %*% za, + -lambda2 * dz %*% xai %*% za) + } else { + G = -rbind( + # 1/lambda * t(xac) %*% P, + # -1/lambda * t(xac) %*% P, + -dz %*% xap + ) + lambda2=lambda + if(length(lambda)>1) lambda2=lambda[a] + u = -c( + # 1 - t(xac) %*% t(xap) %*% za, + # 1 + t(xac) %*% t(xap) %*% za, + -lambda2 * dz %*% xai %*% za) + } + + return(list(G=G,u=u)) + } + + +# Main p-value function + +poly.pval <- function(y, G, u, v, sigma, offset=0, bits=NULL) { + z = sum(v*y) + vv = sum(v^2) + sd = sigma*sqrt(vv) + + rho = G %*% v / vv + vec = (u - G %*% y + rho*z) / rho + vlo = suppressWarnings(max(vec[rho>0])) + vup = suppressWarnings(min(vec[rho<0])) + + pv = tnorm.surv(z,0-offset,sd,vlo,vup,bits) + return(list(pv=pv,vlo=vlo,vup=vup)) +} + +# Main confidence interval function + +poly.int <- function(y, G, u, v, sigma, alpha, offset=0, gridrange=c(-100,100), + gridpts=100, griddepth=2, flip=FALSE, bits=NULL) { + + z = sum(v*y) + vv = sum(v^2) + sd = sigma*sqrt(vv) + + rho = G %*% v / vv + vec = (u - G %*% y + rho*z) / rho + vlo = suppressWarnings(max(vec[rho>0])) + vup = suppressWarnings(min(vec[rho<0])) + + xg = seq(gridrange[1]*sd,gridrange[2]*sd,length=gridpts) + fun = function(x) { tnorm.surv(z,x-offset,sd,vlo,vup,bits) } + + int = grid.search(xg,fun,alpha/2,1-alpha/2,gridpts,griddepth) + tailarea = c(fun(int[1]),1-fun(int[2])) + + if (flip) { + int = -int[2:1] + tailarea = tailarea[2:1] + } + + return(list(int=int,tailarea=tailarea)) +} diff --git a/tests/debiased_lasso/test_debiasing.R b/tests/debiased_lasso/test_debiasing.R new file mode 100644 index 0000000..8e81b16 --- /dev/null +++ b/tests/debiased_lasso/test_debiasing.R @@ -0,0 +1,203 @@ +library(selectiveInference) + + +## Approximates inverse covariance matrix theta +InverseLinfty <- function(sigma, n, resol=1.5, bound=NULL, maxiter=50, threshold=1e-10, verbose = TRUE) { + isgiven <- 1; + if (is.null(bound)){ + isgiven <- 0; + } + + p <- nrow(sigma); + M <- matrix(0, p, p); + xperc = 0; + xp = round(p/10); + for (i in 1:p) { + if ((i %% xp)==0){ + xperc = xperc+10; + if (verbose) { + print(paste(xperc,"% done",sep="")); } + } + if (isgiven==0){ + bound <- (1/sqrt(n)) * qnorm(1-(0.1/(p^2))); + } + bound.stop <- 0; + try.no <- 1; + incr <- 0; + while ((bound.stop != 1)&&(try.no<10)){ + last.beta <- beta + output <- InverseLinftyOneRow(sigma, i, bound, maxiter=maxiter, threshold=threshold) + beta <- output$optsol + iter <- output$iter + if (isgiven==1){ + bound.stop <- 1 + } + else{ + if (try.no==1){ + if (iter == (maxiter+1)){ + incr <- 1; + bound <- bound*resol; + } else { + incr <- 0; + bound <- bound/resol; + } + } + if (try.no > 1){ + if ((incr == 1)&&(iter == (maxiter+1))){ + bound <- bound*resol; + } + if ((incr == 1)&&(iter < (maxiter+1))){ + bound.stop <- 1; + } + if ((incr == 0)&&(iter < (maxiter+1))){ + bound <- bound/resol; + } + if ((incr == 0)&&(iter == (maxiter+1))){ + bound <- bound*resol; + beta <- last.beta; + bound.stop <- 1; + } + } + } + try.no <- try.no+1 + } + M[i,] <- beta; + } + return(M) +} + +InverseLinftyOneRow <- function ( sigma, i, bound, maxiter=50, threshold=1e-10) { + p <- nrow(sigma); + rho <- max(abs(sigma[i,-i])) / sigma[i,i]; + bound0 <- rho/(1+rho); + beta <- rep(0,p); + + #if (bound >= bound0){ + # beta[i] <- (1-bound0)/sigma[i,i]; + # returnlist <- list("optsol" = beta, "iter" = 0); + # return(returnlist); + #} + + diff.norm2 <- 1; + last.norm2 <- 1; + iter <- 1; + iter.old <- 1; + beta[i] <- (1-bound0)/sigma[i,i]; + beta.old <- beta; + sigma.tilde <- sigma; + diag(sigma.tilde) <- 0; + vs <- -sigma.tilde%*%beta; + + while ((iter <= maxiter) && (diff.norm2 >= threshold*last.norm2)){ + + for (j in 1:p){ + oldval <- beta[j]; + v <- vs[j]; + if (j==i) + v <- v+1; + beta[j] <- SoftThreshold(v,bound)/sigma[j,j]; + if (oldval != beta[j]){ + vs <- vs + (oldval-beta[j])*sigma.tilde[,j]; + } + } + + iter <- iter + 1; + if (iter==2*iter.old){ + d <- beta - beta.old; + diff.norm2 <- sqrt(sum(d*d)); + last.norm2 <-sqrt(sum(beta*beta)); + iter.old <- iter; + beta.old <- beta; + #if (iter>10) + # vs <- -sigma.tilde%*%beta; + } + + # print(c(iter, maxiter, diff.norm2, threshold * last.norm2, threshold, bound)) + + } + + returnlist <- list("optsol" = beta, "iter" = iter) + return(returnlist) +} + +SoftThreshold <- function( x, lambda ) { + # + # Standard soft thresholding + # + if (x>lambda){ + return (x-lambda);} + else { + if (x< (-lambda)){ + return (x+lambda);} + else { + return (0); } + } +} + + +### Test + +n = 100; p = 50 + +X = matrix(rnorm(n * p), n, p) +S = t(X) %*% X / n + +debiasing_bound = 7.791408e-02 + +tol = 1.e-12 + +rows = as.integer(c(1:2)) +print('here') +print(rows) +A1 = debiasingMatrix(S, FALSE, n, rows, bound=debiasing_bound, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol, linesearch=FALSE) + +A2 = debiasingMatrix(S / n, FALSE, n, rows, bound=debiasing_bound, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol, linesearch=FALSE) +B1 = debiasingMatrix(X, TRUE, n, rows, bound=debiasing_bound, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol, linesearch=FALSE) +B2 = debiasingMatrix(X / sqrt(n), TRUE, n, rows, bound=debiasing_bound, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol, linesearch=FALSE) + +C1 = InverseLinfty(S, n, bound=debiasing_bound, maxiter=1000)[rows,] +C2 = InverseLinfty(S / n, n, bound=debiasing_bound, maxiter=1000)[rows,] + +par(mfrow=c(2,3)) + +plot(A1[1,], C1[1,]) +plot(A1[1,], B1[1,]) +plot(B1[1,], C1[1,]) + +plot(A1[1,], A2[1,]) +plot(B1[1,], B2[1,]) +plot(C1[1,], C2[1,]) + +print(c('A', sum(A1[1,] == 0))) +print(c('B', sum(B1[1,] == 0))) +print(c('C', sum(C1[1,] == 0))) + +## Are our points feasible + +feasibility = function(S, soln, j, debiasing_bound) { + p = nrow(S) + E = rep(0, p) + E[j] = 1 + G = S %*% soln - E + return(c(max(abs(G)), debiasing_bound)) +} + +print(c('feasibility A', feasibility(S, A1[1,], 1, debiasing_bound))) +print(c('feasibility B', feasibility(S, B1[1,], 1, debiasing_bound))) +print(c('feasibility C', feasibility(S, C1[1,], 1, debiasing_bound))) + +active_KKT = function(S, soln, j, debiasing_bound) { + p = nrow(S) + E = rep(0, p) + E[j] = 1 + G = S %*% soln - E + print(which(soln != 0)) + print(G[j]) + return(c(G[soln != 0] * sign(soln)[soln != 0], debiasing_bound)) +} + +print(c('active_KKT A', active_KKT(S, A1[1,], 1, debiasing_bound))) +print(c('active_KKT B', active_KKT(S, B1[1,], 1, debiasing_bound))) +print(c('active_KKT C', active_KKT(S, C1[1,], 1, debiasing_bound))) + + diff --git a/tests/debiased_lasso/test_debiasing_wide.R b/tests/debiased_lasso/test_debiasing_wide.R new file mode 100644 index 0000000..62801da --- /dev/null +++ b/tests/debiased_lasso/test_debiasing_wide.R @@ -0,0 +1,202 @@ +library(selectiveInference) + + +## Approximates inverse covariance matrix theta +InverseLinfty <- function(sigma, n, resol=1.5, bound=NULL, maxiter=50, threshold=1e-10, verbose = TRUE) { + isgiven <- 1; + if (is.null(bound)){ + isgiven <- 0; + } + + p <- nrow(sigma); + M <- matrix(0, p, p); + xperc = 0; + xp = round(p/10); + for (i in 1:p) { + if ((i %% xp)==0){ + xperc = xperc+10; + if (verbose) { + print(paste(xperc,"% done",sep="")); } + } + if (isgiven==0){ + bound <- (1/sqrt(n)) * qnorm(1-(0.1/(p^2))); + } + bound.stop <- 0; + try.no <- 1; + incr <- 0; + while ((bound.stop != 1)&&(try.no<10)){ + last.beta <- beta + output <- InverseLinftyOneRow(sigma, i, bound, maxiter=maxiter, threshold=threshold) + beta <- output$optsol + iter <- output$iter + if (isgiven==1){ + bound.stop <- 1 + } + else{ + if (try.no==1){ + if (iter == (maxiter+1)){ + incr <- 1; + bound <- bound*resol; + } else { + incr <- 0; + bound <- bound/resol; + } + } + if (try.no > 1){ + if ((incr == 1)&&(iter == (maxiter+1))){ + bound <- bound*resol; + } + if ((incr == 1)&&(iter < (maxiter+1))){ + bound.stop <- 1; + } + if ((incr == 0)&&(iter < (maxiter+1))){ + bound <- bound/resol; + } + if ((incr == 0)&&(iter == (maxiter+1))){ + bound <- bound*resol; + beta <- last.beta; + bound.stop <- 1; + } + } + } + try.no <- try.no+1 + } + M[i,] <- beta; + } + return(M) +} + +InverseLinftyOneRow <- function ( sigma, i, bound, maxiter=50, threshold=1e-10) { + p <- nrow(sigma); + rho <- max(abs(sigma[i,-i])) / sigma[i,i]; + bound0 <- rho/(1+rho); + beta <- rep(0,p); + + #if (bound >= bound0){ + # beta[i] <- (1-bound0)/sigma[i,i]; + # returnlist <- list("optsol" = beta, "iter" = 0); + # return(returnlist); + #} + + diff.norm2 <- 1; + last.norm2 <- 1; + iter <- 1; + iter.old <- 1; + beta[i] <- (1-bound0)/sigma[i,i]; + beta.old <- beta; + sigma.tilde <- sigma; + diag(sigma.tilde) <- 0; + vs <- -sigma.tilde%*%beta; + + while ((iter <= maxiter) && (diff.norm2 >= threshold*last.norm2)){ + + for (j in 1:p){ + oldval <- beta[j]; + v <- vs[j]; + if (j==i) + v <- v+1; + beta[j] <- SoftThreshold(v,bound)/sigma[j,j]; + if (oldval != beta[j]){ + vs <- vs + (oldval-beta[j])*sigma.tilde[,j]; + } + } + + iter <- iter + 1; + if (iter==2*iter.old){ + d <- beta - beta.old; + diff.norm2 <- sqrt(sum(d*d)); + last.norm2 <-sqrt(sum(beta*beta)); + iter.old <- iter; + beta.old <- beta; + #if (iter>10) + # vs <- -sigma.tilde%*%beta; + } + + # print(c(iter, maxiter, diff.norm2, threshold * last.norm2, threshold, bound)) + + } + + returnlist <- list("optsol" = beta, "iter" = iter) + return(returnlist) +} + +SoftThreshold <- function( x, lambda ) { + # + # Standard soft thresholding + # + if (x>lambda){ + return (x-lambda);} + else { + if (x< (-lambda)){ + return (x+lambda);} + else { + return (0); } + } +} + + +### Test + +n = 100; p = 250 + +X = matrix(rnorm(n * p), n, p) +S = t(X) %*% X / n + +debiasing_bound = 0.2 + +tol = 1.e-12 + +rows = as.integer(c(1:2)) + +A1 = debiasingMatrix(S, FALSE, n, rows, bound=debiasing_bound, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol, linesearch=FALSE) +A2 = debiasingMatrix(S / n, FALSE, n, rows, bound=debiasing_bound, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol, linesearch=FALSE) +B1 = debiasingMatrix(X, TRUE, n, rows, bound=debiasing_bound, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol, linesearch=FALSE) +B2 = debiasingMatrix(X / sqrt(n), TRUE, n, rows, bound=debiasing_bound, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol, linesearch=FALSE) + +C1 = InverseLinfty(S, n, bound=debiasing_bound, maxiter=1000)[rows,] +C2 = InverseLinfty(S / n, n, bound=debiasing_bound, maxiter=1000)[rows,] + +par(mfrow=c(2,3)) + +plot(A1[1,], C1[1,]) +plot(A1[1,], B1[1,]) +plot(B1[1,], C1[1,]) + +plot(A1[1,], A2[1,]) +plot(B1[1,], B2[1,]) +plot(C1[1,], C2[1,]) + +print(c('A', sum(A1[1,] == 0))) +print(c('B', sum(B1[1,] == 0))) +print(c('C', sum(C1[1,] == 0))) + +## Are our points feasible + +feasibility = function(S, soln, j, debiasing_bound) { + p = nrow(S) + E = rep(0, p) + E[j] = 1 + G = S %*% soln - E + return(c(max(abs(G)), debiasing_bound)) +} + +print(c('feasibility A', feasibility(S, A1[1,], 1, debiasing_bound))) +print(c('feasibility B', feasibility(S, B1[1,], 1, debiasing_bound))) +print(c('feasibility C', feasibility(S, C1[1,], 1, debiasing_bound))) + +active_KKT = function(S, soln, j, debiasing_bound) { + p = nrow(S) + E = rep(0, p) + E[j] = 1 + G = S %*% soln - E + print(which(soln != 0)) + print(G[j]) + return(c(G[soln != 0] * sign(soln)[soln != 0], debiasing_bound)) +} + +print(c('active_KKT A', active_KKT(S, A1[1,], 1, debiasing_bound))) +print(c('active_KKT B', active_KKT(S, B1[1,], 1, debiasing_bound))) +print(c('active_KKT C', active_KKT(S, C1[1,], 1, debiasing_bound))) + + +print(summary(lm(A1[1,] ~ C1[1,]))) \ No newline at end of file diff --git a/tests/randomized/test_instances.R b/tests/randomized/test_instances.R new file mode 100644 index 0000000..8155e5f --- /dev/null +++ b/tests/randomized/test_instances.R @@ -0,0 +1,121 @@ +library(selectiveInference) + +get_instance = function(n, p, s, sigma=1, rho=0, signal=6, family="gaussian", + X=NA, random_signs=TRUE, scale=TRUE, center=TRUE, seed=NA){ + if (!is.na(seed)){ + set.seed(seed) + } + + if (is.na(X)){ + X = sqrt(1-rho)*matrix(rnorm(n*p),n, p) + sqrt(rho)*matrix(rep(rnorm(n), p), nrow = n) + X = scale(X)/sqrt(n) + } + beta = rep(0, p) + if (s>0){ + beta[1:s] = seq(3, 6, length.out=s) + } + beta = sample(beta) + if (random_signs==TRUE & s>0){ + signs = sample(c(-1,1), s, replace = TRUE) + beta = beta * signs + } + mu = X %*% beta + if (family=="gaussian"){ + y = mu + rnorm(n)*sigma + } else if (family=="binomial"){ + prob = exp(mu)/(1+exp(mu)) + y= rbinom(n,1, prob) + } + result = list(X=X,y=y,beta=beta) + return(result) +} + + + + +test_randomized_lasso = function(n=100,p=200,s=0){ + set.seed(1) + data = gaussian_instance(n=n,p=p,s=s, rho=0.3, sigma=3) + X=data$X + y=data$y + lam = 2. + noise_scale = 0.5 + ridge_term = 1./sqrt(n) + result = selectiveInference:::randomizedLasso(X,y,lam, noise_scale, ridge_term) + print(result$soln) + print(length(which(result$soln!=0))) + print(result$observed_opt_state) # compared with python code +} + +test_KKT=function(){ + set.seed(1) + n=200 + p=100 + data = gaussian_instance(n=n,p=p,s=0, rho=0.3, sigma=3) + X=data$X + y=data$y + lam = 2. + noise_scale = 0.5 + ridge_term = 1./sqrt(n) + result = selectiveInference:::randomizedLasso(X,y,lam, noise_scale, ridge_term) + print("check KKT") + opt_linear = result$optimization_transform$linear_term + opt_offset = result$optimization_transform$offset_term + observed_opt_state=result$observed_opt_state + #print(dim(opt_linear)) + #print(opt_offset) + #print(result$perturb) + print(opt_linear %*% observed_opt_state+opt_offset+result$observed_raw-result$perturb) ## should be zero +} + + + +collect_results = function(n,p,s, nsim=100, level=0.9, + family = "binomial", + condition_subgrad=FALSE, lam=1.2){ + + rho=0.3 + sigma=1 + sample_pvalues = c() + sample_coverage = c() + for (i in 1:nsim){ + data = get_instance(n=n,p=p,s=s, rho=rho, sigma=sigma, family=family) + X=data$X + y=data$y + result = selectiveInference:::randomizedLassoInf(X, y, + lam, + family = family, + sampler = "adaptMCMC", + sigma=sigma, + level=level, + burnin=1000, + nsample=5000, + condition_subgrad=condition_subgrad) + if (length(result$active_set)>0){ + true_beta = data$beta[result$active_set] + coverage = rep(0, nrow(result$ci)) + for (i in 1:nrow(result$ci)){ + if (result$ci[i,1]true_beta[i]){ + coverage[i]=1 + } + print(paste("ci", toString(result$ci[i,]))) + } + sample_pvalues = c(sample_pvalues, result$pvalues) + sample_coverage = c(sample_coverage, coverage) + print(paste("coverage", mean(sample_coverage))) + } + } + if (length(sample_coverage)>0){ + print(paste("coverage", mean(sample_coverage))) + jpeg('pivots.jpg') + plot(ecdf(sample_pvalues), xlim=c(0,1), main="Empirical CDF of null p-values", xlab="p-values", ylab="ecdf") + abline(0, 1, lty=2) + dev.off() + } +} + +#set.seed(1) +collect_results(n=500, p=200, s=0, lam=0.8) +#test_randomized_lasso() +#test_KKT() + diff --git a/tests/randomized/test_randomized.R b/tests/randomized/test_randomized.R new file mode 100644 index 0000000..1531d43 --- /dev/null +++ b/tests/randomized/test_randomized.R @@ -0,0 +1,119 @@ +library(selectiveInference) + +smoke_test = function() { + n = 100; p = 50 + X = matrix(rnorm(n * p), n, p) + y = rnorm(n) + lam = 20 / sqrt(n) + noise_scale = 0.01 * sqrt(n) + ridge_term = .1 / sqrt(n) + selectiveInference:::randomizedLasso(X, y, lam, noise_scale, ridge_term) +} + +A = smoke_test() + +sampler_test = function() { + + n = 100; p = 50 + X = matrix(rnorm(n * p), n, p) + y = rnorm(n) + lam = 20 / sqrt(n) + noise_scale = 0.01 * sqrt(n) + ridge_term = .1 / sqrt(n) + obj = selectiveInference:::randomizedLasso(X, y, lam, noise_scale, ridge_term) + S = selectiveInference:::sample_opt_variables(obj, jump_scale=rep(1/sqrt(n), p), nsample=10000) + return(S$samples[2001:10000,]) +} +B = sampler_test() + +gaussian_density_test = function() { + + noise_scale = 10. + random_lasso = smoke_test() + p = nrow(random_lasso$internal_transform$linear_term) + internal_state = matrix(rnorm(p * 20), p, 20) + optimization_state = matrix(rnorm(p * 20), p, 20) + offset = rnorm(p) + + V1 = selectiveInference:::log_density_gaussian_(noise_scale, + random_lasso$internal_transform$linear_term, + internal_state, + random_lasso$optimization_transform$linear_term, + optimization_state, + offset) + A1 = random_lasso$internal_transform$linear_term + A2 = random_lasso$optimization_transform$linear_term + arg = A1 %*% internal_state + A2 %*% optimization_state + offset + V2 = -apply(arg^2, 2, sum) / (2 * noise_scale^2) + print(sqrt(sum((V1-V2)^2) / sum(V1^2))) + + U1 = selectiveInference:::log_density_gaussian_conditional_(noise_scale, + random_lasso$optimization_transform$linear_term, + optimization_state, + offset) + arg = A2 %*% optimization_state + offset + U2 = -apply(arg^2, 2, sum) / (2 * noise_scale^2) + print(sqrt(sum((U1-U2)^2) / sum(U1^2))) + + # test that a single column matrix works -- numeric should not + + print(selectiveInference:::log_density_gaussian_conditional_(noise_scale, + random_lasso$optimization_transform$linear_term, + optimization_state[,1,drop=FALSE], + offset)) + print(selectiveInference:::log_density_gaussian_(noise_scale, + random_lasso$internal_transform$linear_term, + internal_state[,1,drop=FALSE], + random_lasso$optimization_transform$linear_term, + optimization_state[,1,drop=FALSE], + offset)) + +} + +gaussian_density_test() + +laplace_density_test = function() { + + noise_scale = 10. + random_lasso = smoke_test() + p = nrow(random_lasso$internal_transform$linear_term) + internal_state = matrix(rnorm(p * 20), p, 20) + optimization_state = matrix(rnorm(p * 20), p, 20) + offset = rnorm(p) + + V1 = selectiveInference:::log_density_laplace_(noise_scale, + random_lasso$internal_transform$linear_term, + internal_state, + random_lasso$optimization_transform$linear_term, + optimization_state, + offset) + A1 = random_lasso$internal_transform$linear_term + A2 = random_lasso$optimization_transform$linear_term + arg = A1 %*% internal_state + A2 %*% optimization_state + offset + V2 = -apply(abs(arg), 2, sum) / noise_scale + print(sqrt(sum((V1-V2)^2) / sum(V1^2))) + + U1 = selectiveInference:::log_density_laplace_conditional_(noise_scale, + random_lasso$optimization_transform$linear_term, + optimization_state, + offset) + arg = A2 %*% optimization_state + offset + U2 = -apply(abs(arg), 2, sum) / noise_scale + print(sqrt(sum((U1-U2)^2) / sum(U1^2))) + + # test that a single column matrix works -- numeric should not + + print(selectiveInference:::log_density_laplace_conditional_(noise_scale, + random_lasso$optimization_transform$linear_term, + optimization_state[,1,drop=FALSE], + offset)) + print(selectiveInference:::log_density_laplace_(noise_scale, + random_lasso$internal_transform$linear_term, + internal_state[,1,drop=FALSE], + random_lasso$optimization_transform$linear_term, + optimization_state[,1,drop=FALSE], + offset)) + +} + +laplace_density_test() diff --git a/tests/randomized/test_sampler.R b/tests/randomized/test_sampler.R new file mode 100644 index 0000000..e19a53f --- /dev/null +++ b/tests/randomized/test_sampler.R @@ -0,0 +1,16 @@ + +test_log_concave_sampler = function(){ + samples = log_concave_sampler(negative_log_density= function(x){x^2/2}, + grad_negative_log_density=function(x){x}, + constraints = t(as.matrix(c(2,3))), + observed = 2, nsamples=10000) + mean(samples) + hist(samples) +} + + +test_gaussian_sampler =function(){ + samples = gaussian_sampler(1, 1, 1, 0,10000) + mean(samples) + hist(samples) +} diff --git a/tests/test.categorical.R b/tests/test.categorical.R new file mode 100644 index 0000000..2b5f4bd --- /dev/null +++ b/tests/test.categorical.R @@ -0,0 +1,66 @@ +#library(selectiveInference) +#library(lars) +library(intervals) +source("../selectiveInference/R/funs.groupfs.R") +source("../selectiveInference/R/funs.quadratic.R") +source("../selectiveInference/R/funs.common.R") + +set.seed(1) +n <- 100 +G <- 10 +maxsteps <- 10 +snr <- 1 +niter <- 100 + +check.mismatch <- function(fsfit, fit) { + fsnames <- names(fsfit$coefficients) + if (length(fsnames) > 0) { + fsnames <- unique(substr(fsnames, 1, nchar(fsnames) - 1)) + k <- length(fsnames) + fitnames <- attr(fit, "varnames")[fit$action][1:(length(fit$action)-attr(fit, "aicstop"))] + if (is.null(fit$sigma)) { + aicdiff <- AIC(fsfit) - fit$log$AIC[k] + } else { + aicdiff <- extractAIC(fsfit, scale = fit$sigma)[2] - fit$log$AIC[k] + } + if (length(fitnames) !=k || any(fsnames != fitnames)) { + print(paste("Mismatch at iteration", iter, ifelse(is.null(fit$sigma), "unknown", "known"))) + print(fsnames) + print(fitnames) + return(list(count = 1, aicdiff = aicdiff)) + } + return(list(count = 0, aicdiff = aicdiff)) + } + return(list(count = 0, aicdiff = 0)) +} + +print("Comparing step with groupfs on random categorical designs") +umismatchcount <- kmismatchcount <- 0 +uaicdiffs <- kaicdiffs <- numeric(niter) +for (iter in 1:niter) { + rles <- 2 + rpois(G, 2) + df <- data.frame(do.call(cbind, lapply(rles, function(g) { + sample(LETTERS[1:g], n, replace = TRUE, prob = runif(g)) + })), stringsAsFactors = TRUE) + if (any(apply(df, 2, function(col) length(unique(col))) == 1)) next + fd <- factorDesign(df) + if (any(duplicated(fd$x, MARGIN = 2))) next + y <- rnorm(n) + x1 <- fd$x[, fd$index == 1] + y <- y + x1 %*% c(snr, rep(0, ncol(x1) - 2), -snr) + y <- y - mean(y) + df$y <- y + capture.output(fsfit <- step(lm(y ~ 0, df), direction="forward", scope = formula(lm(y~.-1, df)), steps = maxsteps), file = "/dev/null") + fit <- groupfs(fd$x, df$y, fd$index, maxsteps = 10, intercept = F, center = F, normalize = F, aicstop = 1) + mismatches <- check.mismatch(fsfit, fit) + umismatchcount <- umismatchcount + mismatches$count + uaicdiffs[iter] <- mismatches$aicdiff + capture.output(fsfit <- step(lm(y ~ 0, df), scale = 1, direction="forward", scope = formula(lm(y~.-1, df)), steps = maxsteps), file = "/dev/null") + fit <- groupfs(fd$x, df$y, fd$index, maxsteps = 10, sigma = 1, intercept = F, center = F, normalize = F, aicstop = 1) + mismatches <- check.mismatch(fsfit, fit) + kmismatchcount <- kmismatchcount + mismatches$count + kaicdiffs[iter] <- mismatches$aicdiff +} +print(paste("Mismatches:", umismatchcount, "for unknown sigma and", kmismatchcount, "for known")) +summary(uaicdiffs) +summary(kaicdiffs) diff --git a/tests/test.fixed.R b/tests/test.fixed.R index e214fe9..9a09072 100644 --- a/tests/test.fixed.R +++ b/tests/test.fixed.R @@ -1,10 +1,135 @@ -#library(selectiveInference) -library(selectiveInference,lib.loc="/Users/tibs/dropbox/git/R/mylib") +library(selectiveInference) +#library(selectiveInference,lib.loc="/Users/tibs/dropbox/git/R/mylib") +library(glmnet) +library(MASS) +library(scalreg) #options(error=dump.frames) #attach("/Users/tibs/dropbox/PAPERS/lasso/lasso3/.RData") -c +##### +#gaussian +n=50 +p=10 +sigma=.7 +beta=c(3,2,0,0,rep(0,p-4)) +set.seed(43) +nsim = 200 +pvals <- matrix(NA, nrow=nsim, ncol=p) +x = matrix(rnorm(n*p),n,p) +x = scale(x,T,T)/sqrt(n-1) +mu = x%*%beta +for (i in 1:nsim) { + cat(i) +y=mu+sigma*rnorm(n) +#y=y-mean(y) +# first run glmnet +gfit=glmnet(x,y,intercept=F,standardize=F,thresh=1e-8) +lambda = 1 +#extract coef for a given lambda; Note the 1/n factor! +beta = coef(gfit, s=lambda/n, exact=TRUE)[-1] +# compute fixed lambda p-values and selection intervals +aa = fixedLassoInf(x,y,beta,lambda,intercept=F,sigma=sigma) +pvals[i, which(beta != 0)] <- aa$pv +} +nulls = which(!is.na(pvals[,1]) & !is.na(pvals[,2])) +np = pvals[nulls,-(1:2)] +mean(np[!is.na(np)] < 0.1) +o=!is.na(np) +plot((1:sum(o))/sum(o),sort(np)) +abline(0,1) +##### + + +S <- diag(10) +n <- 100 +p <- 10 +pval <- matrix(1, nrow = 100, ncol = p) +for(i in 1:100){ + cat(i) + X <- mvrnorm(n = n, mu = rep(0, p), Sigma = S) + Y <- X[, 1] + X[, 2] + rnorm(n) + sig.L <- scalreg(X, Y)$hsigma + + lam <- cv.glmnet(X, Y, standardize = FALSE, intercept = FALSE)$lambda.min + bl <- glmnet(X, Y, lambda = lam, standardize = FALSE, intercept = FALSE)$beta[, 1] + idx <- which(bl != 0) + pval[i, idx] <- fixedLassoInf(X, Y, beta = bl, lambda = lam * n, intercept = FALSE, sigma = sig.L, alpha = 0.05)$pv +} + +p <- pval[, -(1:2)] +mean(p[p < 1] < 0.05) + +##logistic + +n=50 +p=10 +beta=c(3,2,0,0,rep(0,p-4)) +beta=rep(0,p) +set.seed(3) +nsim = 200 +pvals=matrix(NA, nrow=nsim, ncol=p) +ci=array(NA,c(nsim,p,2)) +x = matrix(rnorm(n*p),n,p) +x = scale(x,T,T)/sqrt(n-1) +mu = x%*%beta +for (ii in 1:nsim) { + cat(ii) +y=mu+rnorm(n) +y=1*(y>mean(y)) +# first run glmnet +gfit=glmnet(x,y,standardize=F,thresh=1e-8,family="binomial") +lambda = .25 +#extract coef for a given lambda; Note the 1/n factor! +beta = as.numeric(coef(gfit, s=lambda/n, exact=TRUE)) +# compute fixed lambda p-values and selection intervals + aa = fixedLassoInf(x,y,beta,lambda,family="binomial") + pvals[ii, which(beta[-1] != 0)] <- aa$pv + ci[ii,which(beta[-1] != 0),]=aa$ci +} + +o=!is.na(pvals) +plot((1:sum(o))/sum(o),sort(pvals)) +abline(0,1) +o=ci[,1,1]>0 | ci[,1,2]<0 +mean(o,na.rm=T) + + +## cox + +n=50 +p=10 +#beta=c(6,6,0,0,rep(0,p-4)) +beta=rep(0,p) +set.seed(3) +nsim = 200 +pvals=matrix(NA, nrow=nsim, ncol=p) +ci=array(NA,c(nsim,p,2)) +x = matrix(rnorm(n*p),n,p) +x = scale(x,T,T)/sqrt(n-1) +mu = x%*%beta +for (ii in 1:nsim) { + cat(ii) +tim=as.vector(mu+rnorm(n))+10 +status=sample(c(0,1),size=n,replace=T) + lambda=0.2 + y=cbind(time=tim,status=status) + gfit=glmnet(x,y,family="cox",standardize=FALSE) + b=as.numeric(coef(gfit,s=lambda/n,exact=TRUE)) + + aa= fixedLassoInf(x,tim,b,lambda,status=status,family="cox") + +pvals[ii, which(b != 0)] <- aa$pv[1:sum(!is.na(aa$pv))] + ci[ii,which(b != 0),]=aa$ci +} + +o=!is.na(pvals) +plot((1:sum(o))/sum(o),sort(pvals)) +abline(0,1) + + +#####more Gaussian + a=lar(x,y) aa=larInf(a) @@ -19,13 +144,14 @@ set.seed(3) n=50 p=10 sigma=2 +nsim=100 x=matrix(rnorm(n*p),n,p) #x=scale(x,T,T)/sqrt(n-1) #try with and without standardization beta=c(5,4,3,2,1,rep(0,p-5)) -nsim=100 + seeds=sample(1:9999,size=nsim) pv=rep(NA,nsim) ci=matrix(NA,nsim,2) @@ -42,12 +168,12 @@ for(ii in 1:nsim){ bhat = predict(gfit, s=lambda/n,type="coef",exact=F)[-1] junk= fixedLassoInf(x,y,bhat,lambda,sigma=sigma) - pv[ii]=junk$pv[1] - # oo=junk$pred # for old package - oo=junk$var # for new package - btrue[ii]=lsfit(x[,oo],mu)$coef[2] - ci[ii,]=junk$ci[1,] + pvals[ii, which(bhat != 0)] <- aa$pv[1:sum(!is.na(aa$pv))] + ci[ii,which(bhat != 0),]=aa$ci + } +o=!is.na(pvals) +plot((1:sum(o))/sum(o),sort(pvals)) sum(ci[,1]> btrue) sum(ci[,2]< btrue) @@ -156,6 +282,35 @@ fixedLassoInf(x,y,beta,lambda,sigma=sigma) beta=coef(fit,s=lambda,mode="lambda") fixedLassoInf(x,y,beta,lambda,sigma=sigma) +# now try penalty factors + set.seed(43) + n = 50 + p = 10 + sigma = 1 + + x = matrix(rnorm(n*p),n,p) + x=scale(x,T,T) + + beta = c(3,2,rep(0,p-2)) + y = x%*%beta + sigma*rnorm(n) + + pf=c(rep(1,7),rep(.1,p-7)) + pf=p*pf/sum(pf) # penalty factors should be rescaled so they sum to p + xs=scale(x,F,pf) #scale cols of x + # first run glmnet + gfit = glmnet(xs,y,standardize=F) + + # extract coef for a given lambda; note the 1/n factor! + # (and we don't save the intercept term) + lambda = .8 + beta = coef(gfit, s=lambda/n, exact=TRUE)[-1] + + # compute fixed lambda p-values and selection intervals + out = fixedLassoInf(xs,y,beta,lambda,sigma=sigma) + #rescale conf points to undo the penalty factor + out$ci=t(scale(t(out$ci),F,pf[out$vars])) + + ### x=state.x77[,-4] y=state.x77[,4] @@ -281,3 +436,121 @@ gfit = glmnet(X,y,standardize=F) coef = coef(gfit, s=lam/n, exact=T)[-1] sint = fixedLassoInf(X,y,coef,lam,sigma=sigma,alpha=alpha,type="partial") # Error in v %*% diag(d) : non-conformable arguments + +## lucas again + +load("params_for_Rob.rdata") #variables: X, y, coef, lam, sigma, alpha + +sint = fixedLassoInf(X,y,coef,lam,sigma=sigma,alpha=alpha,type="partial") + + +#### bug from Sen at UW + +library(MASS) +library(scalreg) + +S <- diag(10) +n <- 100 +p <- 10 +pval <- matrix(1, nrow = 100, ncol = p) +for(i in 1:100){ + cat(i) + X <- mvrnorm(n = n, mu = rep(0, p), Sigma = S) + Y <- X[, 1] + X[, 2] + rnorm(n) + sig.L <- scalreg(X, Y)$hsigma + + lam <- cv.glmnet(X, Y, standardize = FALSE, intercept = FALSE)$lambda.min + bl <- glmnet(X, Y, lambda = lam, standardize = FALSE, intercept = FALSE)$beta[, 1] + idx <- which(bl != 0) + pval[i, idx] <- fixedLassoInf(X, Y, beta = bl, lambda = lam * n, intercept = FALSE, sigma = sig.L, alpha = 0.05)$pv +} + +p <- pval[, -(1:2)] +mean(p[p < 1] < 0.05) + + +#test from Chong + + +library(selectiveInference) + +library(glmnet);library(MASS);#library(grplasso);library(gvlma);library(grpreg) +library(penalized) +load("fooXY.RData") + +#d=read.csv("DesignMatrixX_and_y.csv");dim(d); head(d) + +#source("temp.R") +n=length(Y) +p=ncol(X) +#X=scale(X,T,F) +X=X+.01*matrix(rnorm(n*p),n,p) # I added noise to avoid collinearity +#X=scale(X,T,T)/sqrt(n-1) + +Y=Y-mean(Y) + +#X=as.matrix(d[,1:192]); ### design matrix, no intercept +#Y=d$y; ### Response variable Y. +fit = glmnet(x=X, y=Y, family="gaussian",alpha = 1, thresh = 1e-9, standardize=F) +set.seed(39) +lam= fit$lambda[30]; +#lam= fit$lambda[15]## Try getting coefficient at this lambda +beta = coef(fit, s=lam, exact=TRUE)[-1];length(beta);table(beta!=0) + + aa=penalized(Y~X,lambda1=lam*n,model="linear",standardize=F) + b=coef(aa,which="all")[-1] + +lam2=n*lam + +g=t(X)%*%(Y-X%*%beta)/lam2 + +g[beta!=0] + +g=t(X)%*%(Y-X%*%b)/lam2 +out = fixedLassoInf(X,Y,beta,lam*n) + + + + + +# + +#gaussian +n=50 +p=10 +sigma=.7 +beta=c(0,0,0,0,rep(0,p-4)) +set.seed(43) +nsim = 1000 +pvals <- matrix(NA, nrow=nsim, ncol=p) +x = matrix(rnorm(n*p),n,p) +x = scale(x,T,T)/sqrt(n-1) +mu = x%*%beta +for (i in 1:nsim) { + cat(i) +y=mu+sigma*rnorm(n) +#y=y-mean(y) +# first run glmnet + pf=c(rep(.001,4),rep(1,p-4)) + xs=scale(x,FALSE,pf) #scale cols of x by penalty factors + # first run glmnet + gfit = glmnet(xs,y,standardize=FALSE) + + + lambda = .8 + beta = coef(gfit, s=lambda/n, exact=TRUE)[-1] + + # compute fixed lambda p-values and selection intervals + aa = fixedLassoInf(xs,y,beta,lambda,sigma=sigma) + +pvals[i, which(beta != 0)] <- aa$pv +} +nulls = 1:nsim +np = pvals[nulls,-(1:4)] +mean(np[!is.na(np)] < 0.1) +o=!is.na(np) +plot((1:sum(o))/sum(o),sort(np)) +abline(0,1) +##### + + diff --git a/tests/test.fs.R b/tests/test.fs.R index fbe50ce..b7a6733 100644 --- a/tests/test.fs.R +++ b/tests/test.fs.R @@ -1,5 +1,7 @@ -#library(selectiveInference) -library(selectiveInference,lib.loc="/Users/tibs/dropbox/git/R/mylib") +library(selectiveInference) + +options(error=dump.frames) + library(lars) @@ -24,7 +26,7 @@ max(abs(obj$action-unlist(obj2$action))) # These don't always match ... what is the lars function doing? # Checks -max(abs(obj$action-unlist(obj2$action)) +max(abs(obj$action-unlist(obj2$action))) max(abs(coef(obj,s=4.5,mode="step")- lars::predict.lars(obj2,s=4.5,type="coef",mode="step")$coef)) max(abs(predict(obj,s=4.5,mode="step")- @@ -113,8 +115,8 @@ x=matrix(rnorm(n*p),n,p) #x=scale(x,T,T)/sqrt(n-1) #try with and without standardization beta=c(5,4,3,2,1,rep(0,p-5)) - -nsim=100 +beta=rep(0,p) +nsim=500 seeds=sample(1:9999,size=nsim) pv=rep(NA,nsim) ci=matrix(NA,nsim,2) @@ -134,7 +136,10 @@ for(ii in 1:nsim){ btrue[ii]=lsfit(x[,oo],mu)$coef[2] ci[ii,]=junk$ci[1,] } - +plot((1:nsim)/nsim,sort(pv)) + abline(0,1) + + sum(ci[,1]> btrue) sum(ci[,2]< btrue) @@ -171,3 +176,21 @@ out3 = fsInf(obj,sigma=sigma,k=k,type="all") out3 out4 = fsInf(obj,sigma=sigma,k=k,type="all",bits=200) +##plot + + library(selectiveInference) + +options(error=dump.frames) + + + set.seed(33) + n = 50 + p = 10 + sigma = 1 + x = matrix(rnorm(n*p),n,p) + beta = c(3,2,rep(0,p-2)) + y = x%*%beta + sigma*rnorm(n) + + # run forward stepwise, plot results + fsfit = fs(x,y) + plot(fsfit) diff --git a/tests/test.fs.selected.R b/tests/test.fs.selected.R new file mode 100644 index 0000000..37c5512 --- /dev/null +++ b/tests/test.fs.selected.R @@ -0,0 +1,20 @@ +library(selectiveInference) +library(lars) + +set.seed(32) + +n=50 +p=10 +sigma=1 + +x = as.matrix(read.table("X.csv", sep=',', header=FALSE)) +Y = as.numeric(read.table("Y.csv", sep=',', header=FALSE)[,1]) + +beta=c(5,4,3,2,1,rep(0,p-5)) +mu=x%*%beta + +y=mu+Y +fsfit=fs(x,y,norm=TRUE, intercept=TRUE) +out = fsInf_maxZ(fsfit,sigma=sigma) + + diff --git a/tests/test.fs_maxZ.R b/tests/test.fs_maxZ.R new file mode 100644 index 0000000..11e89aa --- /dev/null +++ b/tests/test.fs_maxZ.R @@ -0,0 +1,36 @@ +library(selectiveInference) +options(error=dump.frames) + +set.seed(0) +n = 20 +p = 5 +s = 3 +size = 5 + +sigma = 1.5 +x = matrix(rnorm(n*p),n,p) + + +b = c(sample(c(-1,1),s,replace=T)*rep(size,s),rep(0,p-s)) +b=rep(0,p) +mu = x%*%b +nsim=200 +pv=matrix(NA,nsim,p) +for(ii in 1:nsim){ + cat(ii) +y = mu + sigma*rnorm(n) + +obj = fs(x,y,verb=T,intercept=T,norm=T, maxsteps=p) + + +# Sequential inference +out = fsInf_maxZ(obj,sigma=sigma, ndraw=5000, burnin=1000) +pv[ii,]=out$pv +} + + +par(mfrow=c(3,3)) +for(j in 1:p){ +plot((1:nsim)/nsim,sort(pv[,j])) +abline(0,1) +} diff --git a/tests/test.groupfs.R b/tests/test.groupfs.R index 167dd0a..0895712 100644 --- a/tests/test.groupfs.R +++ b/tests/test.groupfs.R @@ -1,29 +1,27 @@ +#library(selectiveInference) +#library(lars) library(intervals) -source("../selectiveInference/R/funs.common.R") source("../selectiveInference/R/funs.groupfs.R") source("../selectiveInference/R/funs.quadratic.R") -source("../selectiveInference/R/funs.fs.R") -source("../selectiveInference/R/funs.lar.R") -#library(selectiveInference) -#library(lars) +source("../selectiveInference/R/funs.common.R") set.seed(1) n <- 40 p <- 80 index <- sort(rep(1:(p/2), 2)) -maxsteps <- 10 -sparsity <- 5 -snr <- 3 +maxsteps <- 8 +sparsity <- 4 +snr <- 2 system.time({ -for (iter in 1:10) { +for (iter in 1:100) { y <- rnorm(n) x <- matrix(rnorm(n*p), nrow=n) beta <- rep(0, p) beta[which(index %in% 1:sparsity)] <- snr y <- y + x %*% beta fit <- groupfs(x, y, index, maxsteps = maxsteps) - pvals <- groupfsInf(fit) + pvals <- groupfsInf(fit, verbose = T) } }) @@ -65,8 +63,8 @@ for (j in 1:ncol(state.x77)) { states[,j] <- var } states <- cbind(states, state.division) -x <- factor_design(states)$x -X <- scale_groups(x, index)$x +x <- factorDesign(states)$x +X <- scaleGroups(x, index)$x p <- ncol(x) y <- rnorm(n) @@ -78,7 +76,7 @@ y <- y + x %*% beta y <- y-mean(y) df <- data.frame(y = y, states) fsfit <- step(lm(y ~ 0, df), direction="forward", scope = formula(lm(y~., df)), steps = maxsteps, k = 2) -fit <- groupfs(x, y, index, maxsteps, k = 2, normalize = T) +fit <- groupfs(x, y, index, maxsteps, k = 2, intercept = F, center = F, normalize = T) # names(fsfit$coefficients)[-1] if (length(fsfit$coefficients) > 0) { fsnames <- cnames[which(!is.na(charmatch(cnames,names(fsfit$coefficients)[-1])))][order(unlist(lapply(cnames, function(cn) { @@ -92,13 +90,10 @@ cnames[fit$action]#[1:length(fsnames)] print("empty") } -<<<<<<< HEAD:forLater/josh/tests/test.groupfs.R -======= -set.seed(1) n = 100 p = 120 maxsteps = 9 -niter = 50 +niter = 500 # 10 groups of size 10, 10 groups of size 2 index = sort(c(c(1, 1), rep(2:11, 10), rep(12:20, 2))) pvalm = pvalmk = matrix(NA, nrow=niter, ncol=maxsteps) @@ -108,7 +103,7 @@ for (iter in 1:niter) { y = rnorm(n) fit = groupfs(x, y, index, maxsteps) pvals = groupfsInf(fit) - pvalm[iter, ] = pvals$pv + pvalm[iter, ] = pvals$pv fitk = groupfs(x, y, index, maxsteps, sigma = 1) pvalsk = groupfsInf(fitk) pvalmk[iter, ] = pvalsk$pv @@ -133,29 +128,4 @@ print(colMeans(pvalmk)) print(mean(pvalm)) print(mean(pvalmk)) -<<<<<<< HEAD ->>>>>>> 5c372de287b5ff9455ddce7dd513fb868d09e7e6:tests/test.groupfs.R -======= - - - set.seed(1) - n <- 40 - p <- 20 - index <- sort(rep(1:(p/2), 2)) - steps <- 10 - sparsity <- 5 - snr <- 3 - sigma=3 - - y <- rnorm(n)*sigma - x <- matrix(rnorm(n*p), nrow=n) - - - beta <- rep(0, p) - beta[which(index %in% 1:sparsity)] <- snr - y <- y + x %*% beta - - fit <- groupfs(x, y, index=index, maxsteps = steps) - foo=groupfsInf(fit) ->>>>>>> bc141572a79c89a69ca48574049b3006fa4b38ca diff --git a/tests/test.lar.R b/tests/test.lar.R index 6edaa45..be26428 100644 --- a/tests/test.lar.R +++ b/tests/test.lar.R @@ -1,5 +1,5 @@ -#library(selectiveInference) -library(selectiveInference,lib.loc="/Users/tibs/dropbox/git/R/mylib") +library(selectiveInference) +#library(selectiveInference,lib.loc="/Users/tibs/dropbox/git/R/mylib") library(lars) set.seed(0) diff --git a/tests/test_QP.R b/tests/test_QP.R new file mode 100644 index 0000000..852a9b2 --- /dev/null +++ b/tests/test_QP.R @@ -0,0 +1,18 @@ +library(selectiveInference) +### Test +n = 80; p = 50 + + +X = matrix(rnorm(n * p), n, p) +Y = rnorm(n) +lam = 2 + +soln1 = selectiveInference:::fit_randomized_lasso(X, Y, lam, 0, 0)$soln +G = glmnet(X, Y, intercept=FALSE, standardize=FALSE) +soln2 = coef(G, s=lam/n, exact=TRUE, x=X, y=Y)[-1] + +print(soln1) +print(soln2) +plot(soln1, soln2) +print(summary(lm(soln1 ~ soln2))) + diff --git a/tests/unifTest.R b/tests/unifTest.R new file mode 100644 index 0000000..3f251ca --- /dev/null +++ b/tests/unifTest.R @@ -0,0 +1,114 @@ + +library(selectiveInference, lib.loc="/Users/tibs/dropbox/git/R-software/mylib") + +library(glmnet) + +set.seed(424) + +n=100 +p=30 + +n=100 +p=200 + +sigma=.4 +beta=c(3,2,-1,4,-2,2,rep(0,p-6)) +#beta=rep(0,p) + +tr=beta!=0 + +#type="full" +type="partial" + +nsim = 1000 +lambda=.3 +nzb=0 +pvals <- matrix(NA, nrow=nsim, ncol=p) +x = matrix(rnorm(n*p),n,p) +x = scale(x,T,T)/sqrt(n-1) +mu = x%*%beta + +for (i in 1:nsim) { + cat(i,fill=T) +y=mu+sigma*rnorm(n) +y=y-mean(y) +# first run glmnet +gfit=glmnet(x,y,intercept=F,standardize=F,thresh=1e-8) + +#extract coef for a given lambda; Note the 1/n factor! + bhat = coef(gfit, s=lambda/n, exact=TRUE,x=x,y=y)[-1] + nzb=nzb+sum(bhat!=0) +# compute fixed lambda p-values and selection intervals +aa = fixedLassoInf(x,y,bhat,lambda,intercept=F,sigma=sigma,type=type) +pvals[i, aa$vars] <- aa$pv +} + +# summarize results + +if(type=="partial"){ +nulls=rowSums(is.na(pvals[,tr]))==0 # for type=partial, nonnull setting +np = pvals[nulls,-(1:sum(beta!=0))] +} + +if(type=="full"){ +nulls=1:nrow(pvals) # for type=full non null setting +np = pvals[nulls,-(1:sum(beta!=0))] +} + + + +#np=pvals #for null setting + +o=!is.na(np) + +#check uniformity + +plot((1:sum(o))/sum(o),sort(np[o]),xlab="Expected pvalue",ylab="Observed pvalue") +abline(0,1) + + + # estimate and plot FDR + +pvadj=pvadj.by=matrix(NA,nsim,p) +for(ii in 1:nsim){ + oo=!is.na(pvals[ii,]) + pvadj[ii,oo]=p.adjust(pvals[ii,oo],method="BH") + pvadj.by[ii,oo]=p.adjust(pvals[ii,oo],method="BY") + + } +qqlist=c(.05, .1,.15,.2,.25,.3) +fdr=se=fdr.by=se.by=rep(NA,length(qqlist)) +jj=0 +for(qq in qqlist){ + jj=jj+1 + +r=v=r.by=v.by=rep(NA,nsim) +for(ii in 1:nsim){ + v[ii]=sum( (pvadj[ii,]