Skip to content

Commit 7faeba8

Browse files
committed
working on cvlar, found possible issue with cvRSSquads
1 parent 9de1b7c commit 7faeba8

File tree

3 files changed

+96
-34
lines changed

3 files changed

+96
-34
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,5 @@
66
**.RData
77
**.o
88
**.so
9-
forLater/josh/**
9+
forLater/josh/**
10+
.Rproj.user

forLater/josh/selectiveInference/R/cv.R

Lines changed: 57 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1-
# ------------------------------------------------
2-
# Cross-validation, preliminary
1+
# Cross-validation
2+
3+
#--------------------------------------
4+
# Functions for creating cv folds
5+
# -------------------------------------
36

47
cvMakeFolds <- function(x, nfolds = 5) {
58
inds <- sample(1:nrow(x), replace=FALSE)
@@ -15,9 +18,23 @@ cvMakeFolds <- function(x, nfolds = 5) {
1518
return(folds)
1619
}
1720

18-
############################################
19-
# Can this be optimized using svdu_thresh? #
20-
############################################
21+
# To interface with glmnet
22+
foldid <- function(folds) {
23+
n <- sum(sapply(folds, length))
24+
glmnetfoldid <- rep(0, n)
25+
for (ind in 1:length(folds)) {
26+
glmnetfoldid[folds[[ind]]] <- ind
27+
}
28+
glmnetfoldid
29+
}
30+
31+
#--------------------------------------
32+
# Functions for computing quadratic form for cv-error
33+
#--------------------------------------
34+
35+
# There seems to be a problem here, picking overly conservative models
36+
37+
# Can this be optimized using svdu_thresh?
2138
cvHatMatrix <- function(x, folds, active.sets) {
2239
nfolds <- length(folds)
2340
lapply(1:nfolds, function(f) {
@@ -61,6 +78,11 @@ cvRSSquad <- function(x, folds, active.sets) {
6178
return(Q)
6279
}
6380

81+
82+
#--------------------------------------
83+
# Functions for forward stepwise
84+
#--------------------------------------
85+
6486
cvfs <- function(x, y, index = 1:ncol(x), maxsteps, sigma = NULL, intercept = TRUE, center = TRUE, normalize = TRUE, nfolds = 5) {
6587

6688
n <- nrow(x)
@@ -131,47 +153,58 @@ cvfs <- function(x, y, index = 1:ncol(x), maxsteps, sigma = NULL, intercept = TR
131153
}
132154

133155

134-
cvlar <- function(x, y) { # other args
156+
#--------------------------------------
157+
# Functions for lar
158+
#--------------------------------------
159+
160+
cvlar <- function(x, y, maxsteps) { # other args
135161
folds <- cvMakeFolds(x)
136162
models <- lapply(folds, function(fold) {
137-
x.train <- X
138-
y.train <- Y
163+
x.train <- x
164+
y.train <- y
139165
x.train[fold,] <- 0
140166
y.train[fold] <- 0
141-
x.test <- X[fold,]
142-
y.test <- Y[fold]
143-
larpath.train <- lar(x.train, y.train, maxsteps = maxsteps, intercept = F, normalize = F)
144-
return(lff)
167+
x.test <- x[fold,]
168+
y.test <- y[fold]
169+
larpath.train <- lar(x.train, y.train, maxsteps = maxsteps, intercept = F, normalize = F)
170+
return(larpath.train)
145171
})
146172

147173
active.sets <- lapply(models, function(model) model$action)
148174
lambdas <- lapply(models, function(model) model$lambda)
149175
lmin <- min(unlist(lambdas))
150176

151-
# Interpolate lambda grid or parametrize by steps?
152-
# interpolation probably requires re-writing cvRSSquads for
153-
# penalized fits in order to make sense
177+
# Interpolate lambda grid or parametrize by steps?
178+
# interpolation probably requires re-writing cvRSSquads for
179+
# penalized fits in order to make sense
154180

155-
# do steps for now just to have something that works?
181+
# do steps for now just to have something that works?
156182

157183
RSSquads <- list()
158184
for (s in 1:maxsteps) {
159185
initial.active <- lapply(active.sets, function(a) a[1:s])
160-
RSSquads[[s]] <- cvRSSquad(X, folds, initial.active)
186+
RSSquads[[s]] <- cvRSSquad(x, folds, initial.active)
161187
}
162188

163-
RSSs <- lapply(RSSquads, function(Q) t(Y) %*% Q %*% Y)
189+
RSSs <- lapply(RSSquads, function(Q) t(y) %*% Q %*% y)
164190
sstar <- which.min(RSSs)
165191
quadstar <- RSSquads[sstar][[1]]
166192

167-
RSSquads <- lapply(RSSquads, function(quad) quad - quadstar)
168-
RSSquads[[sstar]] <- NULL # remove the all zeroes case
193+
# Need to add these later?
194+
#RSSquads <- lapply(RSSquads, function(quad) quad - quadstar)
195+
#RSSquads[[sstar]] <- NULL # remove the all zeroes case
169196

170-
fit <- lar(X, Y, maxsteps=sstar, intercept = F, normalize = F)
197+
fit <- lar(x, y, maxsteps=sstar, intercept = F, normalize = F)
171198

172-
# Very tall Gamma encoding all cv-model paths
199+
# Very tall Gamma encoding all cv-model paths
173200
Gamma <- do.call(rbind, lapply(models, function(model) return(model$Gamma)))
174-
175-
# more to do here
201+
fit$Gamma <- rbind(fit$Gamma, Gamma)
202+
fit$khat <- sstar
203+
fit$folds <- folds
204+
# more to do here
205+
return(fit)
176206
}
177207

208+
cvlarInf <- function(obj) {
209+
larInf(obj, type = "all", k = obj$khat)
210+
}

forLater/josh/sim.cvlar.R

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -36,23 +36,51 @@
3636
# pass 0-padded x[-fold] and y[-fold] to lar?
3737

3838
library(selectiveInference)
39-
setwd("/Users/joftius/Dropbox/work/R-software/forLater/josh")
4039
source("selectiveInference/R/cv.R")
4140

41+
library(glmnet)
42+
4243
set.seed(1)
4344
n <- 100
4445
p <- 50
4546
maxsteps <- 10
4647
sparsity <- 3
4748
snr <- 2
48-
rho <- 0.1
49+
#rho <- 0.1
4950
nfolds <- 5
51+
niters <- 200
52+
53+
instance <- function(n, p, maxsteps, sparsity, snr, nfolds) {
54+
55+
x <- matrix(rnorm(n*p), nrow=n)
56+
y <- rnorm(n)
57+
beta <- rep(0, p)
58+
beta[1:sparsity] <- snr * sqrt(2*log(p)/n) * sample(c(-1,1), sparsity, replace=T)
59+
y <- y + x %*% beta
60+
my <- mean(y)
61+
y <- y - my
62+
63+
fit <- cvlar(x, y, maxsteps)
64+
pv <- cvlarInf(fit)
65+
66+
# Validate with glmnet
67+
68+
glmnetfit <- cv.glmnet(fit$x, fit$y, intercept = FALSE, foldid = foldid(fit$folds))
69+
70+
nzglmnet <- which(coef(glmnetfit) != 0)
71+
nzcvlar <- fit$action
72+
cat("glmnet:", nzglmnet, "\n")
73+
cat("cvlar:", nzcvlar, "\n")
74+
return(list(nzglmnet, nzcvlar))
75+
}
76+
77+
78+
time <- system.time({
79+
output <- replicate(niters, instance(n, p, maxsteps, sparsity, snr, nfolds))
80+
})
5081

51-
x <- matrix(rnorm(n*p), nrow=n)
52-
y <- rnorm(n)
53-
beta <- rep(0, p)
54-
beta[1:sparsity] <- 2* sqrt(2*log(p)/n) * sample(c(-1,1), sparsity, replace=T)
55-
y <- y + x %*% beta
56-
my <- mean(y)
57-
y <- y - my
82+
vglmnet <- do.call(c, list(output[1,]))
83+
vcvlar <- do.call(c, list(output[2,]))
5884

85+
mean(sapply(vglmnet, length))
86+
mean(sapply(vcvlar, length))

0 commit comments

Comments
 (0)