Skip to content

Commit 4dac202

Browse files
committed
2 parents 6c78851 + 80db795 commit 4dac202

File tree

13 files changed

+477
-209
lines changed

13 files changed

+477
-209
lines changed

forLater/josh/selectiveInference/R/cv.R

Lines changed: 57 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,18 @@
11
# ------------------------------------------------
22
# Cross-validation, preliminary
33

4-
cvMakeFolds <- function(x, nfolds = 10) {
5-
#inds <- sample(1:nrow(x), replace=FALSE)
6-
inds <- 1:nrow(x)
4+
cvMakeFolds <- function(x, nfolds = 5) {
5+
inds <- sample(1:nrow(x), replace=FALSE)
6+
#inds <- 1:nrow(x)
77
foldsize <- floor(nrow(x)/nfolds)
8-
lapply(1:nfolds, function(f) return(inds[1:foldsize+(f-1)*foldsize]))
8+
folds <- lapply(1:nfolds, function(f) return(inds[1:foldsize+(f-1)*foldsize]))
9+
if (nfolds*foldsize < nrow(x)) {
10+
# remainder observations added to first several folds
11+
for (i in 1:(nrow(x) - nfolds*foldsize)) {
12+
folds[[i]] <- c(folds[[i]], inds[nfolds*foldsize + i])
13+
}
14+
}
15+
return(folds)
916
}
1017

1118
############################################
@@ -87,6 +94,7 @@ cvfs <- function(x, y, index = 1:ncol(x), maxsteps, sigma = NULL, intercept = TR
8794
fold <- folds[[f]]
8895
fit <- groupfs(X[-fold,], Y[-fold], index=index, maxsteps=maxsteps, sigma=sigma, intercept=FALSE, center=FALSE, normalize=FALSE)
8996
fit$fold <- fold
97+
# Why is this commented out?
9098
## projections[[f]] <- lapply(fit$projections, function(step.projs) {
9199
## lapply(step.projs, function(proj) {
92100
## # Reduce from n by n matrix to svdu_thresh
@@ -122,3 +130,48 @@ cvfs <- function(x, y, index = 1:ncol(x), maxsteps, sigma = NULL, intercept = TR
122130
invisible(fit)
123131
}
124132

133+
134+
cvlar <- function(x, y) { # other args
135+
folds <- cvMakeFolds(x)
136+
models <- lapply(folds, function(fold) {
137+
x.train <- X
138+
y.train <- Y
139+
x.train[fold,] <- 0
140+
y.train[fold] <- 0
141+
x.test <- X[fold,]
142+
y.test <- Y[fold]
143+
larpath.train <- lar(x.train, y.train, maxsteps = maxsteps, intercept = F, normalize = F)
144+
return(lff)
145+
})
146+
147+
active.sets <- lapply(models, function(model) model$action)
148+
lambdas <- lapply(models, function(model) model$lambda)
149+
lmin <- min(unlist(lambdas))
150+
151+
# Interpolate lambda grid or parametrize by steps?
152+
# interpolation probably requires re-writing cvRSSquads for
153+
# penalized fits in order to make sense
154+
155+
# do steps for now just to have something that works?
156+
157+
RSSquads <- list()
158+
for (s in 1:maxsteps) {
159+
initial.active <- lapply(active.sets, function(a) a[1:s])
160+
RSSquads[[s]] <- cvRSSquad(X, folds, initial.active)
161+
}
162+
163+
RSSs <- lapply(RSSquads, function(Q) t(Y) %*% Q %*% Y)
164+
sstar <- which.min(RSSs)
165+
quadstar <- RSSquads[sstar][[1]]
166+
167+
RSSquads <- lapply(RSSquads, function(quad) quad - quadstar)
168+
RSSquads[[sstar]] <- NULL # remove the all zeroes case
169+
170+
fit <- lar(X, Y, maxsteps=sstar, intercept = F, normalize = F)
171+
172+
# Very tall Gamma encoding all cv-model paths
173+
Gamma <- do.call(rbind, lapply(models, function(model) return(model$Gamma)))
174+
175+
# more to do here
176+
}
177+

forLater/josh/sim.cvlar.R

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# Choices
2+
3+
# RSS: least-squares or penalized beta?
4+
# depends on final model. Go with least-squares for now
5+
6+
# fixed vs lar? (lar, apparently)
7+
# fixed probably slower, but advantage of same lambda grid?
8+
# is same lambda grid necessary? -- doesn't lar algorithm give all possible models anyway?
9+
# i.e. for non-knot lambda just find where it is in lar path, take corresponding model
10+
11+
# groups? later
12+
13+
# TODO
14+
15+
# copy larInf or groupfsInf?
16+
# larInf: add CV quadratic constraints* & break/fix p-value computation
17+
# -------- *but can we even use the ydecomp we use for quadratic?
18+
# groupfsInf: some ugly rewriting, no cumprojs etc, but straightforward
19+
# -------- downside: need to implement larInf basically
20+
21+
# larInf
22+
# [ ] is.null(sigma) don't estimate it
23+
24+
# plan:
25+
# expand Gamma for [-fold] indices?
26+
# stack all the Gammas? or iterate through them?
27+
# work backward from poly.pval <- larInf
28+
29+
30+
# big picture / long term
31+
# what OOP kind of design would lend itself to easily implementing more cv things?
32+
33+
# Gamma: something x n
34+
# Gamma %*% y >= 0
35+
36+
# pass 0-padded x[-fold] and y[-fold] to lar?
37+
38+
library(selectiveInference)
39+
setwd("/Users/joftius/Dropbox/work/R-software/forLater/josh")
40+
source("selectiveInference/R/cv.R")
41+
42+
set.seed(1)
43+
n <- 100
44+
p <- 50
45+
maxsteps <- 10
46+
sparsity <- 3
47+
snr <- 2
48+
rho <- 0.1
49+
nfolds <- 5
50+
51+
x <- matrix(rnorm(n*p), nrow=n)
52+
y <- rnorm(n)
53+
beta <- rep(0, p)
54+
beta[1:sparsity] <- 2* sqrt(2*log(p)/n) * sample(c(-1,1), sparsity, replace=T)
55+
y <- y + x %*% beta
56+
my <- mean(y)
57+
y <- y - my
58+

forLater/josh/sim.datasplit.R

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4,30 +4,31 @@ source("../../selectiveInference/R/funs.groupfs.R")
44
source("../../selectiveInference/R/funs.quadratic.R")
55
source("../../selectiveInference/R/funs.common.R")
66

7-
set.seed(1)
8-
niters <- 400
7+
set.seed(19)
8+
niters <- 500
99
known <- FALSE
10-
n <- 100
10+
n <- 50
1111
p <- 100
12-
maxsteps <- 10
12+
maxsteps <- 8
1313
sparsity <- 5
14-
snr <- 1
14+
snr <- 2
1515
rho <- 0.1
16-
ratio <- 0.7
17-
ratio2 <- 0.85
16+
ratio <- 0.6
17+
ratio2 <- 0.8
1818
train <- 1:(ratio*n)
1919
test <- setdiff(1:n, train)
2020
train2 <- 1:(ratio2*n)
21-
test <- setdiff(1:n, train2)
21+
test2 <- setdiff(1:n, train2)
2222
index <- 1:p
2323

24-
instance <- function(n, p, sparsity, snr, maxsteps, rho) {
25-
26-
x <- matrix(rnorm(n*p), nrow=n)
24+
x <- matrix(rnorm(n*p), nrow=n)
2725
if (rho != 0) {
2826
z <- matrix(rep(t(rnorm(n)), p), nrow = n)
2927
x <- sqrt(1-rho)*x + sqrt(rho)*z
3028
}
29+
30+
instance <- function(n, p, sparsity, snr, maxsteps, rho) {
31+
3132
y <- rnorm(n)
3233

3334
if (sparsity > 0) {
@@ -47,23 +48,24 @@ instance <- function(n, p, sparsity, snr, maxsteps, rho) {
4748
xte2 <- x[test2, ]
4849

4950
if (known) {
50-
trfit <- groupfs(xtr, ytr, index, maxsteps=maxsteps, sigma=1, aicstop=1, k = 2*log(p))
51-
fit <- groupfs(xtr2, ytr2, index, maxsteps=maxsteps, sigma=1, aicstop=1, k = 2*log(p))
51+
trfit <- groupfs(xtr, ytr, index, maxsteps=maxsteps, sigma=1, aicstop=1, k = log(length(train)))
52+
fit <- groupfs(xtr2, ytr2, index, maxsteps=maxsteps, sigma=1, aicstop=1, k = log(length(train2)))
5253
} else {
53-
trfit <- groupfs(xtr, ytr, index, maxsteps=maxsteps, aicstop=1, k = log(length(train)))
54-
fit <- groupfs(xtr2, ytr2, index, maxsteps=maxsteps, aicstop=1, k = log(length(train2)))
54+
trfit <- groupfs(xtr, ytr, index, maxsteps=maxsteps, aicstop=1, k = 2*log(p))
55+
fit <- groupfs(xtr2, ytr2, index, maxsteps=maxsteps, aicstop=1, k = 2*log(p))
5556
}
5657

5758
trcols <- which(1:p %in% trfit$action)
5859
tr2cols <- which(1:p %in% fit$action)
5960
tepv <- summary(lm(yte~xte[, trcols]-1))$coefficients[,4]
6061
tepv2 <- summary(lm(yte2~xte2[, tr2cols]-1))$coefficients[,4]
6162
names(tepv) <- as.character(sort(trfit$action))
62-
names(tepv2) <- as.character(sort(trfit$action))
63+
names(tepv2) <- as.character(sort(fit$action))
6364
pv <- groupfsInf(fit)
6465
trpv <- groupfsInf(trfit)
6566
return(list(vars = fit$action, pvals = pv$pv,
6667
splitvars = sort(trfit$action), splitpvals = tepv,
68+
splitvars2 = sort(fit$action), splitpvals2 = tepv2,
6769
trpvals = trpv$pv))
6870
}
6971

@@ -75,9 +77,12 @@ vars <- do.call(c, list(output[1,]))
7577
pvals <- do.call(c, list(output[2,]))
7678
splitvars <- do.call(c, list(output[3,]))
7779
splitpvals <- do.call(c, list(output[4,]))
78-
trpvals <- do.call(c, list(output[5,]))
80+
splitvars2 <- do.call(c, list(output[5,]))
81+
splitpvals2 <- do.call(c, list(output[6,]))
82+
trpvals <- do.call(c, list(output[7,]))
7983

80-
save(vars, pvals, splitvars, splitpvals, trpvals,
84+
save(vars, pvals, splitvars, splitpvals,
85+
splitvars2, splitpvals2, trpvals,
8186
file = paste0("results/datasplit",
8287
"_", ifelse(known, "TC", "TF"),
8388
"_n", n,

forLater/josh/sim.selectedmodel.R

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
library(selectiveInference)
2+
library(intervals)
3+
setwd("~/Dropbox/work/R-software/forLater/josh")
4+
source("selectiveInference/R/cv.R")
5+
source("../../selectiveInference/R/funs.groupfs.R")
6+
source("../../selectiveInference/R/funs.quadratic.R")
7+
source("../../selectiveInference/R/funs.common.R")
8+
source("../../selectiveInference/R/funs.fs.R")
9+
source("../../selectiveInference/R/funs.lar.R")
10+
source("../../selectiveInference/R/funs.inf.R")
11+
library(MASS)
12+
pinv = ginv
13+
14+
set.seed(19)
15+
niters <- 500
16+
known <- TRUE
17+
n <- 50
18+
p <- 100
19+
maxsteps <- 8
20+
sparsity <- 5
21+
snr <- 2
22+
index <- 1:p
23+
24+
x <- matrix(rnorm(n*p), nrow=n)
25+
26+
instance <- function(n, p, sparsity, snr, maxsteps) {
27+
y <- rnorm(n)
28+
if (sparsity > 0) {
29+
beta <- rep(0, p)
30+
beta[1:sparsity] <- snr * sample(c(-1,1), sparsity, replace=T)
31+
y <- y + x %*% beta
32+
}
33+
y <- y - mean(y)
34+
fit <- groupfs(x, y, index, maxsteps=maxsteps, sigma=1, intercept=F, center=F, normalize=F)
35+
fitfs <- fs(x, y, maxsteps=maxsteps, intercept=F, normalize=F)
36+
if (any(fit$action != fitfs$action)) stop("Model paths did not agree")
37+
pvfs <- fsInf(fitfs, sigma=1, k = maxsteps, type = "all")
38+
pv <- groupfsInf(fit)
39+
return(list(vars = fit$action, pvals = pv$pv, selpvals = pvfs$pv))
40+
}
41+
42+
time <- system.time({
43+
output <- replicate(niters, instance(n, p, sparsity, snr, maxsteps))
44+
})
45+
46+
vars <- do.call(c, list(output[1,]))
47+
pvals <- do.call(c, list(output[2,]))
48+
selpvals <- do.call(c, list(output[3,]))
49+
50+
save(vars, pvals, selpvals,
51+
file = paste0("results/selected",
52+
"_", ifelse(known, "TC", "TF"),
53+
"_n", n,
54+
"_p", p,
55+
"_sparsity", sparsity,
56+
"_snr", as.character(snr),
57+
".RData"))
58+
59+
print(time)
60+

0 commit comments

Comments
 (0)