Merge pull request #28 from jonathan-taylor/export_lasso_poly

jonathan-taylor · web-flow · commit 5a0884b26e33 · 2017-09-25T16:39:57.000-07:00
exporting function to construct polyhedral constraints of LASSO
diff --git a/forLater/fixedLassoPoly.Rd b/forLater/fixedLassoPoly.Rd
@@ -0,0 +1,81 @@
+\name{fixedLassoPoly}
+\alias{fixedLassoPoly}
+
+\title{
+Compute polyhedral constraints for a LASSO problem with
+a fixed value of lambda.
+}
+\description{
+Compute polyhedral representation of the selection region of Lee et al. (2016).
+By construction, y should satisfy A %*% y elementwise less then or equal b.
+}
+\usage{
+fixedLassoPoly(X, y, lambda, beta, active, inactive = FALSE)
+}
+\arguments{
+\item{X}{
+Design matrix of LASSO problem.
+}
+\item{y}{
+Response of LASSO problem.
+}
+\item{lambda}{
+Value of regularization parameter.
+}      
+\item{beta}{
+Solution of LASSO problem with regularization parameter set to lambda.
+}      
+\item{active}{
+Active set of the LASSO problem as a boolean vector. Should correspond
+to the non-zeros of beta.
+} 
+\item{inactive}{
+Form the inactive constraints as well?
+} 
+}
+\details{
+This function computes
+the  polyhedral representation of the selection region of Lee et al. (2016).
+}
+
+\value{  
+\item{A}{Linear part of the affine inequalities.}
+\item{b}{RHS offset the affine inequalities.}
+}
+
+\references{
+Jason Lee, Dennis Sun, Yuekai Sun, and Jonathan Taylor (2016). 
+Exact post-selection inference, with application to the lasso. Annals of Statistics, 44(3), 907-927.
+
+Jonathan Taylor and Robert Tibshirani (2017) Post-selection inference for math L1-penalized likelihood models.
+Canadian Journal of Statistics, xx, 1-21. (Volume still not posted)
+}
+\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid}
+
+\examples{
+
+set.seed(43)
+n = 50
+p = 10
+sigma = 1
+
+x = matrix(rnorm(n*p),n,p)
+x = scale(x,TRUE,TRUE)
+
+beta = c(3,2,rep(0,p-2))
+y = x\%*\%beta + sigma*rnorm(n)
+
+# first run glmnet
+gfit = glmnet(x,y,standardize=FALSE)
+
+# extract coef for a given lambda; note the 1/n factor!
+# (and we don't save the intercept term)
+lambda = .8
+beta = coef(gfit, s=lambda/n, exact=TRUE)[-1]
+active = (beta != 0)
+
+fixedLassoPoly(x, y, lambda, beta, active)
+fixedLassoPoly(x, y, lambda, beta, active, inactive=TRUE)
+
+}
+ 
diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R
@@ -93,14 +93,14 @@ fixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","co
                     "'thresh' parameter, for a more accurate convergence."))
     
     # Get lasso polyhedral region, of form Gy >= u
-    if (type == 'full' & p > n) out = fixedLasso.poly(x,y,beta,lambda,vars,inactive=TRUE)
-    else out = fixedLasso.poly(x,y,beta,lambda,vars)
-    G = out$G
-    u = out$u
+    if (type == 'full' & p > n) out = fixedLassoPoly(x,y,lambda,beta,vars,inactive=TRUE)
+    else out = fixedLassoPoly(x,y,lambda,beta,vars)
+    A = out$A
+    b = out$b
     
     # Check polyhedral region
     tol.poly = 0.01
-    if (min(G %*% y - u) < -tol.poly * sqrt(sum(y^2)))
+    if (max(A %*% y - b) > tol.poly * sqrt(sum(y^2)))
       stop(paste("Polyhedral constraints not satisfied; you must recompute beta",
                  "more accurately. With glmnet, make sure to use exact=TRUE in coef(),",
                  "and check whether the specified value of lambda is too small",
@@ -191,7 +191,7 @@ fixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","co
     sign[j] = sign(sum(vj*y))
     vj = sign[j] * vj
 
-    limits.info = TG.limits(y, -G, -u, vj, Sigma=diag(rep(sigma^2, n)))
+    limits.info = TG.limits(y, A, b, vj, Sigma=diag(rep(sigma^2, n)))
     a = TG.pvalue.base(limits.info, null_value=null_value[j], bits=bits)
     pv[j] = a$pv
     vlo[j] = a$vlo * mj # Unstandardize (mult by norm of vj)
@@ -221,45 +221,39 @@ fixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","co
 #############################
 
 
-fixedLasso.poly=
-  function(x, y, beta, lambda, a, inactive = FALSE) {
-    xa = x[,a,drop=F]
-    xac = x[,!a,drop=F]
-    xai = pinv(crossprod(xa))
-    xap = xai %*% t(xa)
-    za = sign(beta[a])
+fixedLassoPoly =
+  function(X, y, lambda, beta, active, inactive = FALSE) {
+    Xa = X[,active,drop=F]
+    Xac = X[,!active,drop=F]
+    Xai = pinv(crossprod(Xa))
+    Xap = Xai %*% t(Xa)
+
+    za = sign(beta[active])
     if (length(za)>1) dz = diag(za)
     if (length(za)==1) dz = matrix(za,1,1)
     
-    if (inactive) {
-      P = diag(1,nrow(xa)) - xa %*% xap
+    if (inactive) { # should we include the inactive constraints?
+      R = diag(1,nrow(Xa)) - Xa %*% Xap # R is residual forming matrix of selected model
       
-      G = -rbind(
-        1/lambda * t(xac) %*% P,
-        -1/lambda * t(xac) %*% P,
-        -dz %*% xap
+      A = rbind(
+        1/lambda * t(Xac) %*% R,
+        -1/lambda * t(Xac) %*% R,
+        -dz %*% Xap
       )
       lambda2=lambda
-      if(length(lambda)>1) lambda2=lambda[a]
-      u = -c(
-        1 - t(xac) %*% t(xap) %*% za,
-        1 + t(xac) %*% t(xap) %*% za,
-        -lambda2 * dz %*% xai %*% za)
+      if(length(lambda)>1) lambda2=lambda[active]
+      b = c(
+        1 - t(Xac) %*% t(Xap) %*% za,
+        1 + t(Xac) %*% t(Xap) %*% za,
+        -lambda2 * dz %*% Xai %*% za)
     } else {
-      G = -rbind(
-        #   1/lambda * t(xac) %*% P,
-        # -1/lambda * t(xac) %*% P,
-        -dz %*% xap
-      )
+      A = -dz %*% Xap
       lambda2=lambda
-      if(length(lambda)>1) lambda2=lambda[a]
-      u = -c(
-        #   1 - t(xac) %*% t(xap) %*% za,
-        #   1 + t(xac) %*% t(xap) %*% za,
-        -lambda2 * dz %*% xai %*% za)
+      if(length(lambda)>1) lambda2=lambda[active]
+      b = -lambda2 * dz %*% Xai %*% za
     }
     
-    return(list(G=G,u=u))
+    return(list(A=A, b=b))
   }
 
 ##############################
diff --git a/selectiveInference/R/funs.fs.R b/selectiveInference/R/funs.fs.R
@@ -295,13 +295,13 @@ fsInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","aic
     for (j in 1:k) {
       if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j]))
 
-      Gj = G[1:nconstraint[j],]
-      uj = rep(0,nconstraint[j])
+      Aj = -G[1:nconstraint[j],]
+      bj = -rep(0,nconstraint[j])
       vj = vreg[j,]
       mj = sqrt(sum(vj^2)) 
       vj = vj / mj              # Standardize (divide by norm of vj)
 
-      limits.info = TG.limits(y, -Gj, -uj, vj, Sigma=diag(rep(sigma^2, n)))
+      limits.info = TG.limits(y, Aj, bj, vj, Sigma=diag(rep(sigma^2, n)))
       a = TG.pvalue.base(limits.info, bits=bits)
 
       pv[j] = a$pv
@@ -353,10 +353,10 @@ fsInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","aic
       vj = vj / mj              # Standardize (divide by norm of vj)
       sign[j] = sign(sum(vj*y))
       vj = sign[j] * vj
-      Gj = rbind(G,vj)
-      uj = c(u,0)
+      Aj = -rbind(G,vj)
+      bj = -c(u,0)
 
-      limits.info = TG.limits(y, -Gj, -uj, vj, Sigma=diag(rep(sigma^2, n)))
+      limits.info = TG.limits(y, Aj, bj, vj, Sigma=diag(rep(sigma^2, n)))
       a = TG.pvalue.base(limits.info, bits=bits)
       pv[j] = a$pv
       sxj = sx[vars[j]]
diff --git a/selectiveInference/R/funs.lar.R b/selectiveInference/R/funs.lar.R
@@ -367,13 +367,13 @@ larInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","ai
     for (j in 1:k) {
       if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j]))
 
-      Gj = G[1:nk[j],]
-      uj = rep(0,nk[j])
+      Aj = -G[1:nk[j],]
+      bj = -rep(0,nk[j])
       vj = vreg[j,]
       mj = sqrt(sum(vj^2))
       vj = vj / mj              # Standardize (divide by norm of vj)
 
-      limits.info = TG.limits(y, -Gj, -uj, vj, Sigma=diag(rep(sigma^2, n)))
+      limits.info = TG.limits(y, Aj, bj, vj, Sigma=diag(rep(sigma^2, n)))
       a = TG.pvalue.base(limits.info, bits=bits)
       pv[j] = a$pv
       sxj = sx[vars[j]]
@@ -428,10 +428,10 @@ larInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","ai
       vj = vj / mj             # Standardize (divide by norm of vj)
       sign[j] = sign(sum(vj*y))
       vj = sign[j] * vj
-      Gj = rbind(G,vj)
-      uj = c(u,0)
+      Aj = -rbind(G,vj)
+      bj = -c(u,0)
 
-      limits.info = TG.limits(y, -Gj, -uj, vj, Sigma=diag(rep(sigma^2, n)))
+      limits.info = TG.limits(y, Aj, bj, vj, Sigma=diag(rep(sigma^2, n)))
       a = TG.pvalue.base(limits.info, bits=bits)
 
       pv[j] = a$pv