Skip to content

Commit 6dc80be

Browse files
Merge pull request #26 from jonathan-taylor/using_Rcpp
Using rcpp to speed up debiased LASSO Still to decide: strategy to limit size of active set
2 parents 2bd33e8 + acbf82c commit 6dc80be

19 files changed

+1013
-325
lines changed

.travis.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,10 @@ r:
77
- devel
88
addons:
99
apt:
10-
packages: libmpfr-dev
10+
packages: libmpfr-dev
1111
warnings_are_errors: true
1212
before_install:
1313
- tlmgr install index # for texlive and vignette?
14+
- R -e 'install.packages("Rcpp", repos="http://cloud.r-project.org")'
15+
- make Rcpp
1416
- cd selectiveInference

Makefile

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
Rcpp:
2+
- rm -f selectiveInference/src/RcppExports.cpp
3+
- rm -f selectiveInference/R/RcppExports.R
4+
Rscript -e "library(Rcpp); Rcpp::compileAttributes('selectiveInference')"
5+
6+
install: Rcpp
7+
R CMD install selectiveInference
8+
9+
build:
10+
R CMD build selectiveInference
11+
12+
check: Rcpp build
13+
R CMD build selectiveInference
14+
R CMD check selectiveInference_1.2.2.tar.gz # fix this to be a script variable

README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,11 @@ The latest release of the package can be installed through CRAN:
2121
install.packages("selectiveInference")
2222
```
2323
Code in repo is under development and may be unstable.
24+
25+
## For development
26+
27+
You will have to run
28+
29+
```
30+
make Rcpp
31+
```

selectiveInference/DESCRIPTION

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,5 @@ Description: New tools for post-selection inference, for use with forward
1818
models.
1919
License: GPL-2
2020
RoxygenNote: 5.0.1
21+
LinkingTo: Rcpp
22+
Imports: Rcpp

selectiveInference/NAMESPACE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,5 +44,5 @@ importFrom("stats", dnorm, lsfit, pexp, pnorm, predict,
4444
qnorm, rnorm, sd, uniroot, dchisq, model.matrix, pchisq)
4545
importFrom("stats", "coef", "df", "lm", "pf")
4646
importFrom("stats", "glm", "residuals", "vcov")
47-
47+
importFrom("Rcpp", "sourceCpp")
4848

selectiveInference/R/funs.common.R

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -152,13 +152,7 @@ updateQR <- function(Q1,Q2,R,col) {
152152
n = ncol(Q1)
153153
k = ncol(Q2)
154154

155-
a = .C("update1",
156-
Q2=as.double(Q2),
157-
w=as.double(t(Q2)%*%col),
158-
m=as.integer(m),
159-
k=as.integer(k),
160-
dup=FALSE,
161-
package="selectiveInference")
155+
a = update1_(as.matrix(Q2), t(Q2)%*%col, m, k) # Rcpp call
162156

163157
Q2 = matrix(a$Q2,nrow=m)
164158
w = c(t(Q1)%*%col,a$w)

selectiveInference/R/funs.fixed.R

Lines changed: 57 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33
# min 1/2 || y - \beta_0 - X \beta ||_2^2 + \lambda || \beta ||_1
44

55
fixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","cox"),intercept=TRUE, add.targets=NULL, status=NULL,
6-
sigma=NULL, alpha=0.1,
7-
type=c("partial","full"), tol.beta=1e-5, tol.kkt=0.1,
8-
gridrange=c(-100,100), bits=NULL, verbose=FALSE) {
6+
sigma=NULL, alpha=0.1,
7+
type=c("partial","full"), tol.beta=1e-5, tol.kkt=0.1,
8+
gridrange=c(-100,100), bits=NULL, verbose=FALSE, linesearch.try=10) {
99

1010
family = match.arg(family)
1111
this.call = match.call()
@@ -26,8 +26,6 @@ sigma=NULL, alpha=0.1,
2626

2727
else{
2828

29-
30-
3129
checkargs.xy(x,y)
3230
if (missing(beta) || is.null(beta)) stop("Must supply the solution beta")
3331
if (missing(lambda) || is.null(lambda)) stop("Must supply the tuning parameter value lambda")
@@ -159,8 +157,8 @@ sigma=NULL, alpha=0.1,
159157
hsigmaSinv <- solve(hsigmaS) # pinv(hsigmaS)
160158

161159
# Approximate inverse covariance matrix for when (n < p) from lasso_Inference.R
162-
useC = TRUE
163-
htheta <- InverseLinfty(hsigma, n, length(S), verbose=FALSE, useC=useC)
160+
161+
htheta <- InverseLinfty(hsigma, n, length(S), verbose=FALSE, max.try=linesearch.try)
164162
# htheta <- InverseLinfty(hsigma, n, verbose=FALSE)
165163

166164
FS = rbind(diag(length(S)),matrix(0,pp-length(S),length(S)))
@@ -270,8 +268,7 @@ fixedLasso.poly=
270268
### Functions borrowed and slightly modified from lasso_inference.R
271269

272270
## Approximates inverse covariance matrix theta
273-
InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold=1e-2, verbose = TRUE, useC = FALSE) {
274-
# InverseLinfty <- function(sigma, n, resol=1.5, mu=NULL, maxiter=50, threshold=1e-2, verbose = TRUE) {
271+
InverseLinfty <- function(sigma, n, e, resol=1.2, mu=NULL, maxiter=50, threshold=1e-2, verbose = TRUE, max.try=10) {
275272
isgiven <- 1;
276273
if (is.null(mu)){
277274
isgiven <- 0;
@@ -293,16 +290,15 @@ InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold
293290
mu.stop <- 0;
294291
try.no <- 1;
295292
incr <- 0;
296-
while ((mu.stop != 1)&&(try.no<10)){
293+
294+
output = NULL
295+
296+
while ((mu.stop != 1) && (try.no<max.try) ){
297297
last.beta <- beta
298-
if (useC == FALSE) {
299-
output <- InverseLinftyOneRow(sigma, i, mu, maxiter=maxiter, threshold=threshold)
300-
} else {
301-
output <- InverseLinftyOneRowC(sigma, i, mu, maxiter=maxiter)
302-
}
303-
beta <- output$optsol
298+
output <- InverseLinftyOneRow(sigma, i, mu, maxiter=maxiter, soln_result=output) # uses a warm start
299+
beta <- output$soln
304300
iter <- output$iter
305-
if (isgiven==1){
301+
if (isgiven==1) {
306302
mu.stop <- 1
307303
}
308304
else{
@@ -339,98 +335,54 @@ InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold
339335
return(M)
340336
}
341337

342-
InverseLinftyOneRowC <- function (Sigma, i, mu, maxiter=50) {
343-
344-
p = nrow(Sigma)
345-
basis_vector = rep(0, p)
346-
basis_vector[i] = 1.
347-
theta = rep(0, p)
348-
349-
val = .C("find_one_row",
350-
Sigma=as.double(Sigma),
351-
Sigma_diag=as.double(diag(Sigma)),
352-
Sigma_theta=as.double(rep(0, p)),
353-
nrow=as.integer(p),
354-
bound=as.double(mu),
355-
theta=as.double(theta),
356-
maxiter=as.integer(50),
357-
row=as.integer(i-1),
358-
coord=as.integer(i-1),
359-
dup=FALSE,
360-
package="selectiveInference")
361-
362-
# Check feasibility
363-
364-
if (max(abs(Sigma %*% val$theta - basis_vector)) > 1.01 * mu) {
365-
warning("Solution for row of M does not seem to be feasible")
366-
}
367-
368-
return(val$theta)
369-
}
338+
InverseLinftyOneRow <- function (Sigma, i, mu, maxiter=50, soln_result=NULL, kkt_tol=1.e-6, objective_tol=1.e-6,
339+
use_QP=TRUE) {
370340

371-
InverseLinftyOneRow <- function ( sigma, i, mu, maxiter=50, threshold=1e-2 ) {
372-
p <- nrow(sigma);
373-
rho <- max(abs(sigma[i,-i])) / sigma[i,i];
374-
mu0 <- rho/(1+rho);
375-
beta <- rep(0,p);
376-
377-
#if (mu >= mu0){
378-
# beta[i] <- (1-mu0)/sigma[i,i];
379-
# returnlist <- list("optsol" = beta, "iter" = 0);
380-
# return(returnlist);
381-
#}
382-
383-
diff.norm2 <- 1;
384-
last.norm2 <- 1;
385-
iter <- 1;
386-
iter.old <- 1;
387-
beta[i] <- (1-mu0)/sigma[i,i];
388-
beta.old <- beta;
389-
sigma.tilde <- sigma;
390-
diag(sigma.tilde) <- 0;
391-
vs <- -sigma.tilde%*%beta;
392-
393-
while ((iter <= maxiter) && (diff.norm2 >= threshold*last.norm2)){
394-
395-
for (j in 1:p){
396-
oldval <- beta[j];
397-
v <- vs[j];
398-
if (j==i)
399-
v <- v+1;
400-
beta[j] <- SoftThreshold(v,mu)/sigma[j,j];
401-
if (oldval != beta[j]){
402-
vs <- vs + (oldval-beta[j])*sigma.tilde[,j];
403-
}
404-
}
405-
406-
iter <- iter + 1;
407-
if (iter==2*iter.old){
408-
d <- beta - beta.old;
409-
diff.norm2 <- sqrt(sum(d*d));
410-
last.norm2 <-sqrt(sum(beta*beta));
411-
iter.old <- iter;
412-
beta.old <- beta;
413-
if (iter>10)
414-
vs <- -sigma.tilde%*%beta;
415-
}
416-
}
417-
418-
returnlist <- list("optsol" = beta, "iter" = iter)
419-
return(returnlist)
420-
}
341+
# If soln_result is not Null, it is used as a warm start.
342+
# It should be a list
343+
# with entries "soln", "gradient", "ever_active", "nactive"
344+
345+
p = nrow(Sigma)
421346

422-
SoftThreshold <- function( x, lambda ) {
423-
#
424-
# Standard soft thresholding
425-
#
426-
if (x>lambda){
427-
return (x-lambda);}
347+
if (is.null(soln_result)) {
348+
soln = rep(0, p)
349+
ever_active = rep(0, p)
350+
ever_active[1] = i # 1-based
351+
ever_active = as.integer(ever_active)
352+
nactive = as.integer(1)
353+
if (use_QP) {
354+
linear_func = rep(0, p)
355+
linear_func[i] = -1
356+
linear_func = as.numeric(linear_func)
357+
gradient = 1. * linear_func
358+
} else {
359+
gradient = rep(0, p)
360+
}
361+
}
428362
else {
429-
if (x< (-lambda)){
430-
return (x+lambda);}
431-
else {
432-
return (0); }
363+
soln = soln_result$soln
364+
gradient = soln_result$gradient
365+
ever_active = as.integer(soln_result$ever_active)
366+
nactive = as.integer(soln_result$nactive)
367+
if (use_QP) {
368+
linear_func = soln_result$linear_func
369+
}
370+
}
371+
372+
if (use_QP) {
373+
result = solve_QP(Sigma, mu, maxiter, soln, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol)
374+
} else {
375+
result = find_one_row_debiasingM(Sigma, i, mu, maxiter, soln, gradient, ever_active, nactive, kkt_tol, objective_tol) # C function uses 1-based indexing for active set
433376
}
377+
378+
# Check feasibility
379+
380+
if (!result$kkt_check) {
381+
warning("Solution for row of M does not seem to be feasible")
382+
}
383+
384+
return(result)
385+
434386
}
435387

436388
##############################

selectiveInference/R/funs.lar.R

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -254,14 +254,7 @@ downdateQR <- function(Q1,Q2,R,col) {
254254
m = nrow(Q1)
255255
n = ncol(Q1)
256256

257-
a = .C("downdate1",
258-
Q1=as.double(Q1),
259-
R=as.double(R),
260-
col=as.integer(col-1),
261-
m=as.integer(m),
262-
n=as.integer(n),
263-
dup=FALSE,
264-
package="selectiveInference")
257+
a = downdate1_(as.matrix(Q1), R, col, m, n) # Rcpp call
265258

266259
Q1 = matrix(a$Q1,nrow=m)
267260
R = matrix(a$R,nrow=n)

selectiveInference/man/fixedLassoInf.Rd

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@ fixed value of the tuning parameter lambda
1010
}
1111
\usage{
1212
fixedLassoInf(x, y, beta, lambda, family = c("gaussian", "binomial",
13-
"cox"),intercept=TRUE, add.targets=NULL, status=NULL, sigma=NULL, alpha=0.1,
13+
"cox"),intercept=TRUE, add.targets=NULL, status=NULL, sigma=NULL, alpha=0.1,
1414
type=c("partial","full"), tol.beta=1e-5, tol.kkt=0.1,
15-
gridrange=c(-100,100), bits=NULL, verbose=FALSE)
15+
gridrange=c(-100,100), bits=NULL, verbose=FALSE, linesearch.try=10)
1616
}
1717
\arguments{
1818
\item{x}{
@@ -85,7 +85,11 @@ helpful (though computationally more costly). In particular, extra precision mig
8585
if the values in the output columns of \code{tailarea} differ noticeably from alpha/2.
8686
}
8787
\item{verbose}{
88-
Print out progress along the way? Default is FALSE}
88+
Print out progress along the way? Default is FALSE
89+
}
90+
\item{linesearch.try}{
91+
When running type="full" (i.e. debiased LASSO) how many attempts in the line search?
92+
}
8993
}
9094
9195
\details{
@@ -145,7 +149,7 @@ p = 10
145149
sigma = 1
146150
147151
x = matrix(rnorm(n*p),n,p)
148-
x=scale(x,TRUE,TRUE)
152+
x = scale(x,TRUE,TRUE)
149153
150154
beta = c(3,2,rep(0,p-2))
151155
y = x\%*\%beta + sigma*rnorm(n)
@@ -165,10 +169,10 @@ out
165169

166170
## as above, but use lar function instead to get initial
167171
## lasso fit (should get same results)
168-
lfit = lar(x,y,normalize=FALSE)
169-
beta = coef(lfit,s=lambda,mode="lambda")
170-
out2 = fixedLassoInf(x,y,beta,lambda,sigma=sigma)
171-
out2
172+
lfit = lar(x,y,normalize=FALSE)
173+
beta = coef(lfit,s=lambda,mode="lambda")
174+
out2 = fixedLassoInf(x,y,beta,lambda,sigma=sigma)
175+
out2
172176

173177
## mimic different penalty factors by first scaling x
174178
set.seed(43)
@@ -249,5 +253,30 @@ status=sample(c(0,1),size=n,replace=TRUE)
249253
# compute fixed lambda p-values and selection intervals
250254
out = fixedLassoInf(x,tim,beta_hat,lambda,status=status,family="cox")
251255
out
256+
257+
# Debiased lasso or "full"
258+
259+
n = 50
260+
p = 100
261+
sigma = 1
262+
263+
x = matrix(rnorm(n*p),n,p)
264+
x = scale(x,TRUE,TRUE)
265+
266+
beta = c(3,2,rep(0,p-2))
267+
y = x\%*\%beta + sigma*rnorm(n)
268+
269+
# first run glmnet
270+
gfit = glmnet(x, y, standardize=FALSE, intercept=FALSE)
271+
272+
# extract coef for a given lambda; note the 1/n factor!
273+
# (and we don't save the intercept term)
274+
lambda = 2.8
275+
beta = coef(gfit, s=lambda/n, exact=TRUE)[-1]
276+
277+
# compute fixed lambda p-values and selection intervals
278+
out = fixedLassoInf(x, y, beta, lambda, sigma=sigma, type='full', intercept=FALSE)
279+
out
280+
252281
}
253282

selectiveInference/src/Makevars

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
PKG_CFLAGS= -I.
2+
PKG_CPPFLAGS= -I.
3+
PKG_LIBS=-L.
4+
5+
$(SHLIB): Rcpp Rcpp-matrixcomps.o Rcpp-debias.o RcppExports.o debias.o
6+
7+
clean:
8+
rm -f *o
9+
10+
Rcpp:
11+
Rscript -e "library(Rcpp); Rcpp::compileAttributes('..')"

0 commit comments

Comments
 (0)