Skip to content

Commit 322e7f0

Browse files
committed
rob put all of maxZ stuff into a folder for later use
1 parent 739de67 commit 322e7f0

File tree

5 files changed

+1284
-0
lines changed

5 files changed

+1284
-0
lines changed

forLater/estimateLambda.Rd

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
\name{estimateLambda}
2+
\alias{estimateLambda}
3+
%- Also NEED an '\alias' for EACH other topic documented here.
4+
\title{
5+
Estimates the lasso tuning parameter lambda.
6+
}
7+
\description{
8+
Estimates the lasso tuning parameter lambda, for use in the selectiveInference
9+
package
10+
}
11+
\usage{
12+
estimateLambda(x, sigma, nsamp=1000)
13+
}
14+
\arguments{
15+
\item{x}{
16+
Matrix of predictors (n by p)
17+
}
18+
\item{sigma}{
19+
Estimate of error standard deviation
20+
}
21+
\item{nsamp}{Number of Monte carlo samples used for the estimation.}
22+
}
23+
\details{
24+
This function estimates the lasso tuning parameter lambda, using the estimate
25+
2*E(||X^T eps||_infty) where eps ~ N(0,sigma^2), a vector of length n.
26+
This estimate was proposed by Negahban et al (2012).
27+
}
28+
\value{
29+
\item{sigmahat}{The estimate of sigma}
30+
\item{df}{The degrees of freedom of lasso fit used}
31+
}
32+
\references{
33+
Negahban, S. N.,
34+
Ravikumar, P.,
35+
Wainwright, M. J.
36+
and Yu, B.
37+
(2012). A unified
38+
framework for high-dimensional analysis of
39+
M-estimators with decomposable regularizers.
40+
Statistical Science vol. 27, p 538-557.
41+
}
42+
43+
\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid}
44+
45+
\examples{
46+
#NOT RUN
47+
#set.seed(43)
48+
#n=50
49+
#p=10
50+
#sigma=.7
51+
#x=matrix(rnorm(n*p),n,p)
52+
#x=scale(x,T,F)
53+
#beta=c(3,2,0,0,rep(0,p-4))
54+
#y=x%*%beta+sigma*rnorm(n)
55+
#y=y-mean(y)
56+
#
57+
#estimate lambda usingthe known value of sigma
58+
#lamhat=estimateLambda(x,sigma=.7)
59+
#
60+
#first estimate sigma
61+
#sigmahat=estimateSigma(x,y)$sigmahat
62+
#lamhat=estimateLambda(x,sigma=sigmahat)
63+
64+
#compare to estimate from cv
65+
66+
#out=cv.glmnet(x,y)
67+
#out$lambda.min*n #remember that value from glmnet must be
68+
# multiplied by n, to make it comparable.
69+
}
70+

forLater/funs.fixed.R

Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
# Lasso inference function (for fixed lambda). Note: here we are providing inference
2+
# for the solution of
3+
# min 1/2 || y - \beta_0 - X \beta ||_2^2 + \lambda || \beta ||_1
4+
5+
fixedLassoInf <- function(x, y, beta, lambda, intercept=TRUE, sigma=NULL, alpha=0.1,
6+
type=c("partial","full"), tol.beta=1e-5, tol.kkt=0.1,
7+
gridrange=c(-100,100), bits=NULL, verbose=FALSE) {
8+
9+
this.call = match.call()
10+
type = match.arg(type)
11+
checkargs.xy(x,y)
12+
if (missing(beta) || is.null(beta)) stop("Must supply the solution beta")
13+
if (missing(lambda) || is.null(lambda)) stop("Must supply the tuning parameter value lambda")
14+
checkargs.misc(beta=beta,lambda=lambda,sigma=sigma,alpha=alpha,
15+
gridrange=gridrange,tol.beta=tol.beta,tol.kkt=tol.kkt)
16+
if (!is.null(bits) && !requireNamespace("Rmpfr",quietly=TRUE)) {
17+
warning("Package Rmpfr is not installed, reverting to standard precision")
18+
bits = NULL
19+
}
20+
21+
n = nrow(x)
22+
p = ncol(x)
23+
beta = as.numeric(beta)
24+
if (length(beta) != p) stop("beta must have length equal to ncol(x)")
25+
26+
# If glmnet was run with an intercept term, center x and y
27+
if (intercept==TRUE) {
28+
obj = standardize(x,y,TRUE,FALSE)
29+
x = obj$x
30+
y = obj$y
31+
}
32+
33+
# Check the KKT conditions
34+
g = t(x)%*%(y-x%*%beta) / lambda
35+
if (any(abs(g) > 1+tol.kkt * sqrt(sum(y^2))))
36+
warning(paste("Solution beta does not satisfy the KKT conditions",
37+
"(to within specified tolerances)"))
38+
39+
vars = which(abs(beta) > tol.beta / sqrt(colSums(x^2)))
40+
if(length(vars)==0){
41+
cat("Empty model",fill=T)
42+
return()
43+
}
44+
if (any(sign(g[vars]) != sign(beta[vars])))
45+
warning(paste("Solution beta does not satisfy the KKT conditions",
46+
"(to within specified tolerances). You might try rerunning",
47+
"glmnet with a lower setting of the",
48+
"'thresh' parameter, for a more accurate convergence."))
49+
50+
# Get lasso polyhedral region, of form Gy >= u
51+
out = fixedLasso.poly(x,y,beta,lambda,vars)
52+
G = out$G
53+
u = out$u
54+
55+
# Check polyhedral region
56+
tol.poly = 0.01
57+
if (min(G %*% y - u) < -tol.poly * sqrt(sum(y^2)))
58+
stop(paste("Polyhedral constraints not satisfied; you must recompute beta",
59+
"more accurately. With glmnet, make sure to use exact=TRUE in coef(),",
60+
"and check whether the specified value of lambda is too small",
61+
"(beyond the grid of values visited by glmnet).",
62+
"You might also try rerunning glmnet with a lower setting of the",
63+
"'thresh' parameter, for a more accurate convergence."))
64+
65+
# Estimate sigma
66+
if (is.null(sigma)) {
67+
if (n >= 2*p) {
68+
oo = intercept
69+
sigma = sqrt(sum(lsfit(x,y,intercept=oo)$res^2)/(n-p-oo))
70+
}
71+
else {
72+
sigma = sd(y)
73+
warning(paste(sprintf("p > n/2, and sd(y) = %0.3f used as an estimate of sigma;",sigma),
74+
"you may want to use the estimateSigma function"))
75+
}
76+
}
77+
78+
k = length(vars)
79+
pv = vlo = vup = numeric(k)
80+
vmat = matrix(0,k,n)
81+
ci = tailarea = matrix(0,k,2)
82+
sign = numeric(k)
83+
84+
if (type=="full" & p > n)
85+
warning(paste("type='full' does not make sense when p > n;",
86+
"switching to type='partial'"))
87+
88+
if (type=="partial" || p > n) {
89+
xa = x[,vars,drop=F]
90+
M = pinv(crossprod(xa)) %*% t(xa)
91+
}
92+
else {
93+
M = pinv(crossprod(x)) %*% t(x)
94+
M = M[vars,,drop=F]
95+
}
96+
97+
for (j in 1:k) {
98+
if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j]))
99+
100+
vj = M[j,]
101+
mj = sqrt(sum(vj^2))
102+
vj = vj / mj # Standardize (divide by norm of vj)
103+
sign[j] = sign(sum(vj*y))
104+
vj = sign[j] * vj
105+
a = poly.pval(y,G,u,vj,sigma,bits)
106+
pv[j] = a$pv
107+
vlo[j] = a$vlo * mj # Unstandardize (mult by norm of vj)
108+
vup[j] = a$vup * mj # Unstandardize (mult by norm of vj)
109+
vmat[j,] = vj * mj # Unstandardize (mult by norm of vj)
110+
111+
a = poly.int(y,G,u,vj,sigma,alpha,gridrange=gridrange,
112+
flip=(sign[j]==-1),bits=bits)
113+
ci[j,] = a$int * mj # Unstandardize (mult by norm of vj)
114+
tailarea[j,] = a$tailarea
115+
}
116+
117+
out = list(type=type,lambda=lambda,pv=pv,ci=ci,
118+
tailarea=tailarea,vlo=vlo,vup=vup,vmat=vmat,y=y,
119+
vars=vars,sign=sign,sigma=sigma,alpha=alpha,
120+
call=this.call)
121+
class(out) = "fixedLassoInf"
122+
return(out)
123+
}
124+
125+
##############################
126+
127+
fixedLasso.poly <- function(x, y, beta, lambda, a) {
128+
xa = x[,a,drop=F]
129+
xac = x[,!a,drop=F]
130+
xai = pinv(crossprod(xa))
131+
xap = xai %*% t(xa)
132+
za = sign(beta[a])
133+
if (length(za)>1) dz = diag(za)
134+
if (length(za)==1) dz = matrix(za,1,1)
135+
136+
P = diag(1,nrow(xa)) - xa %*% xap
137+
G = -rbind(1/lambda * t(xac) %*% P,
138+
-1/lambda * t(xac) %*% P,
139+
-dz %*% xap)
140+
u = -c(1 - t(xac) %*% t(xap) %*% za,
141+
1 + t(xac) %*% t(xap) %*% za,
142+
-lambda * dz %*% xai %*% za)
143+
144+
return(list(G=G,u=u))
145+
}
146+
147+
# Moore-Penrose pseudo inverse for symmetric matrices
148+
149+
pinv <- function(A, tol=.Machine$double.eps) {
150+
e = eigen(A)
151+
v = Re(e$vec)
152+
d = Re(e$val)
153+
d[d > tol] = 1/d[d > tol]
154+
d[d < tol] = 0
155+
if (length(d)==1) return(v*d*v)
156+
else return(v %*% diag(d) %*% t(v))
157+
}
158+
159+
##############################
160+
161+
print.fixedLassoInf <- function(x, tailarea=TRUE, ...) {
162+
cat("\nCall:\n")
163+
dput(x$call)
164+
165+
cat(sprintf("\nStandard deviation of noise (specified or estimated) sigma = %0.3f\n",
166+
x$sigma))
167+
168+
cat(sprintf("\nTesting results at lambda = %0.3f, with alpha = %0.3f\n",x$lambda,x$alpha))
169+
cat("",fill=T)
170+
tab = cbind(x$vars,
171+
round(x$sign*x$vmat%*%x$y,3),
172+
round(x$sign*x$vmat%*%x$y/(x$sigma*sqrt(rowSums(x$vmat^2))),3),
173+
round(x$pv,3),round(x$ci,3))
174+
colnames(tab) = c("Var", "Coef", "Z-score", "P-value", "LowConfPt", "UpConfPt")
175+
if (tailarea) {
176+
tab = cbind(tab,round(x$tailarea,3))
177+
colnames(tab)[(ncol(tab)-1):ncol(tab)] = c("LowTailArea","UpTailArea")
178+
}
179+
rownames(tab) = rep("",nrow(tab))
180+
print(tab)
181+
182+
cat(sprintf("\nNote: coefficients shown are %s regression coefficients\n",
183+
ifelse(x$type=="partial","partial","full")))
184+
invisible()
185+
}
186+
187+
#estimateLambda <- function(x, sigma, nsamp=1000){
188+
# checkargs.xy(x,rep(0,nrow(x)))
189+
# if(nsamp < 10) stop("More Monte Carlo samples required for estimation")
190+
# if (length(sigma)!=1) stop("sigma should be a number > 0")
191+
# if (sigma<=0) stop("sigma should be a number > 0")
192+
193+
# n = nrow(x)
194+
# eps = sigma*matrix(rnorm(nsamp*n),n,nsamp)
195+
# lambda = 2*mean(apply(t(x)%*%eps,2,max))
196+
# return(lambda)
197+
#}
198+

0 commit comments

Comments
 (0)