Skip to content

Commit c467ba2

Browse files
author
Joshua Loftus
committed
Beginning cvlar implementation
1 parent c6015aa commit c467ba2

File tree

2 files changed

+103
-0
lines changed

2 files changed

+103
-0
lines changed

forLater/josh/selectiveInference/R/cv.R

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,3 +130,48 @@ cvfs <- function(x, y, index = 1:ncol(x), maxsteps, sigma = NULL, intercept = TR
130130
invisible(fit)
131131
}
132132

133+
134+
cvlar <- function(x, y) { # other args
135+
folds <- cvMakeFolds(x)
136+
models <- lapply(folds, function(fold) {
137+
x.train <- X
138+
y.train <- Y
139+
x.train[fold,] <- 0
140+
y.train[fold] <- 0
141+
x.test <- X[fold,]
142+
y.test <- Y[fold]
143+
larpath.train <- lar(x.train, y.train, maxsteps = maxsteps, intercept = F, normalize = F)
144+
return(lff)
145+
})
146+
147+
active.sets <- lapply(models, function(model) model$action)
148+
lambdas <- lapply(models, function(model) model$lambda)
149+
lmin <- min(unlist(lambdas))
150+
151+
# Interpolate lambda grid or parametrize by steps?
152+
# interpolation probably requires re-writing cvRSSquads for
153+
# penalized fits in order to make sense
154+
155+
# do steps for now just to have something that works?
156+
157+
RSSquads <- list()
158+
for (s in 1:maxsteps) {
159+
initial.active <- lapply(active.sets, function(a) a[1:s])
160+
RSSquads[[s]] <- cvRSSquad(X, folds, initial.active)
161+
}
162+
163+
RSSs <- lapply(RSSquads, function(Q) t(Y) %*% Q %*% Y)
164+
sstar <- which.min(RSSs)
165+
quadstar <- RSSquads[sstar][[1]]
166+
167+
RSSquads <- lapply(RSSquads, function(quad) quad - quadstar)
168+
RSSquads[[sstar]] <- NULL # remove the all zeroes case
169+
170+
fit <- lar(X, Y, maxsteps=sstar, intercept = F, normalize = F)
171+
172+
# Very tall Gamma encoding all cv-model paths
173+
Gamma <- do.call(rbind, lapply(models, function(model) return(model$Gamma)))
174+
175+
# more to do here
176+
}
177+

forLater/josh/sim.cvlar.R

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# Choices
2+
3+
# RSS: least-squares or penalized beta?
4+
# depends on final model. Go with least-squares for now
5+
6+
# fixed vs lar? (lar, apparently)
7+
# fixed probably slower, but advantage of same lambda grid?
8+
# is same lambda grid necessary? -- doesn't lar algorithm give all possible models anyway?
9+
# i.e. for non-knot lambda just find where it is in lar path, take corresponding model
10+
11+
# groups? later
12+
13+
# TODO
14+
15+
# copy larInf or groupfsInf?
16+
# larInf: add CV quadratic constraints* & break/fix p-value computation
17+
# -------- *but can we even use the ydecomp we use for quadratic?
18+
# groupfsInf: some ugly rewriting, no cumprojs etc, but straightforward
19+
# -------- downside: need to implement larInf basically
20+
21+
# larInf
22+
# [ ] is.null(sigma) don't estimate it
23+
24+
# plan:
25+
# expand Gamma for [-fold] indices?
26+
# stack all the Gammas? or iterate through them?
27+
# work backward from poly.pval <- larInf
28+
29+
30+
# big picture / long term
31+
# what OOP kind of design would lend itself to easily implementing more cv things?
32+
33+
# Gamma: something x n
34+
# Gamma %*% y >= 0
35+
36+
# pass 0-padded x[-fold] and y[-fold] to lar?
37+
38+
library(selectiveInference)
39+
setwd("/Users/joftius/Dropbox/work/R-software/forLater/josh")
40+
source("selectiveInference/R/cv.R")
41+
42+
set.seed(1)
43+
n <- 100
44+
p <- 50
45+
maxsteps <- 10
46+
sparsity <- 3
47+
snr <- 2
48+
rho <- 0.1
49+
nfolds <- 5
50+
51+
x <- matrix(rnorm(n*p), nrow=n)
52+
y <- rnorm(n)
53+
beta <- rep(0, p)
54+
beta[1:sparsity] <- 2* sqrt(2*log(p)/n) * sample(c(-1,1), sparsity, replace=T)
55+
y <- y + x %*% beta
56+
my <- mean(y)
57+
y <- y - my
58+

0 commit comments

Comments
 (0)