Skip to content

Commit 1c538ed

Browse files
Merge branch 'jelena-markovic-randomized_jelena'
2 parents 95b98c2 + df777e3 commit 1c538ed

File tree

3 files changed

+92
-45
lines changed

3 files changed

+92
-45
lines changed

selectiveInference/R/funs.randomized.R

Lines changed: 73 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,11 @@
33
#
44
# min 1/2 || y - \beta_0 - X \beta ||_2^2 + \lambda || \beta ||_1 - \omega^T\beta + \frac{\epsilon}{2} \|\beta\|^2_2
55

6-
randomizedLASSO = function(X,
6+
randomizedLasso = function(X,
77
y,
88
lam,
9-
noise_scale,
10-
ridge_term,
9+
noise_scale=NULL,
10+
ridge_term=NULL,
1111
noise_type=c('gaussian', 'laplace'),
1212
max_iter=100, # how many iterations for each optimization problem
1313
kkt_tol=1.e-4, # tolerance for the KKT conditions
@@ -20,6 +20,21 @@ randomizedLASSO = function(X,
2020

2121
n = nrow(X); p = ncol(X)
2222

23+
mean_diag = mean(apply(X^2, 2, sum))
24+
25+
# default ridge term
26+
27+
if (is.null(ridge_term)) {
28+
ridge_term = sqrt(mean_diag) * sd(y) / sqrt(n)
29+
}
30+
31+
# default noise level
32+
33+
if (is.null(noise_scale)) {
34+
noise_scale = 0.5 * sd(y) * sqrt(mean_diag)
35+
}
36+
37+
print(c(noise_scale, ridge_term))
2338
noise_type = match.arg(noise_type)
2439

2540
if (noise_scale > 0) {
@@ -246,10 +261,10 @@ conditional_density = function(noise_scale, lasso_soln) {
246261
if (sum(opt_state < 0) > 0) {
247262
return(-Inf)
248263
}
249-
D = selectiveInference:::log_density_gaussian_conditional_(noise_scale,
250-
reduced_B,
251-
as.matrix(opt_state),
252-
reduced_beta_offset)
264+
D = log_density_gaussian_conditional_(noise_scale,
265+
reduced_B,
266+
as.matrix(opt_state),
267+
reduced_beta_offset)
253268
return(D)
254269
}
255270
lasso_soln$log_optimization_density = log_condl_optimization_density
@@ -258,23 +273,42 @@ conditional_density = function(noise_scale, lasso_soln) {
258273
return(lasso_soln)
259274
}
260275

261-
randomized_inference = function(X, y, sigma, lam, noise_scale, ridge_term){
276+
randomizedLassoInf = function(X,
277+
y,
278+
lam,
279+
sigma=NULL,
280+
noise_scale=NULL,
281+
ridge_term=NULL,
282+
condition_subgrad=TRUE,
283+
level=0.9) {
262284

263285
n = nrow(X)
264286
p = ncol(X)
265-
lasso_soln = selectiveInference:::randomizedLASSO(X, y, lam, noise_scale, ridge_term)
287+
lasso_soln = randomizedLasso(X, y, lam, noise_scale, ridge_term)
266288
active_set = lasso_soln$active_set
267289
inactive_set = lasso_soln$inactive_set
268290
nactive = length(active_set)
269-
291+
292+
if (condition_subgrad==TRUE){
293+
lasso_soln=conditional_density(noise_scale,lasso_soln)
294+
}
295+
270296
dim = length(lasso_soln$observed_opt_state)
271297
print(paste("chain dim", dim))
272-
S = selectiveInference:::sample_opt_variables(lasso_soln, jump_scale=rep(1/sqrt(n), dim), nsample=10000)
298+
S = sample_opt_variables(lasso_soln, jump_scale=rep(1/sqrt(n), dim), nsample=10000)
273299
opt_samples = S$samples[2001:10000,]
274300
print(paste("dim opt samples", toString(dim(opt_samples))))
275301

276302
X_E = X[, active_set]
277303
X_minusE = X[, inactive_set]
304+
305+
# if no sigma given, use OLS estimate
306+
307+
if (is.null(sigma)) {
308+
lm_y = lm(y ~ X[,E] - 1)
309+
sigma = sum(resid(lm_y)^2 / lm_y$df.resid)
310+
}
311+
print(c(sigma, 'sigma'))
278312
target_cov = solve(t(X_E) %*% X_E)*sigma^2
279313
cov_target_internal = rbind(target_cov, matrix(0, nrow=p-nactive, ncol=nactive))
280314
observed_target = solve(t(X_E) %*% X_E) %*% t(X_E) %*% y
@@ -283,37 +317,46 @@ randomized_inference = function(X, y, sigma, lam, noise_scale, ridge_term){
283317
opt_transform = lasso_soln$optimization_transform
284318
observed_raw = lasso_soln$observed_raw
285319

286-
pvalus = rep(0, nactive)
320+
pvalues = rep(0, nactive)
287321
ci = matrix(0, nactive, 2)
288322
for (i in 1:nactive){
289-
target_transform = selectiveInference:::linear_decomposition(observed_target[i],
290-
observed_internal,
291-
target_cov[i,i],
292-
cov_target_internal[,i],
293-
internal_transform)
323+
target_transform = linear_decomposition(observed_target[i],
324+
observed_internal,
325+
target_cov[i,i],
326+
cov_target_internal[,i],
327+
internal_transform)
294328
target_sample = rnorm(nrow(opt_samples)) * sqrt(target_cov[i,i])
295329

296330
pivot = function(candidate){
297-
weights = selectiveInference:::importance_weight(noise_scale,
298-
t(as.matrix(target_sample)) + candidate,
299-
t(opt_samples),
300-
opt_transform,
301-
target_transform,
302-
observed_raw)
303-
return(mean((target_sample<observed_target[i])*weights)/mean(weights))
331+
weights = importance_weight(noise_scale,
332+
t(as.matrix(target_sample)) + candidate,
333+
t(opt_samples),
334+
opt_transform,
335+
target_transform,
336+
observed_raw)
337+
return(mean((target_sample+candidate<observed_target[i])*weights)/mean(weights))
304338
}
305-
level = 0.9
306339
rootU = function(candidate){
307340
return (pivot(observed_target[i]+candidate)-(1-level)/2)
308341
}
309342
rootL = function(candidate){
310343
return (pivot(observed_target[i]+candidate)-(1+level)/2)
311344
}
312345
pvalues[i] = pivot(0)
313-
line_min = -10*sd(target_sample)
314-
line_max = 10*sd(target_sample)
315-
ci[i,1] = uniroot(rootU, c(line_min, line_max))$root+observed_target[i]
316-
ci[i,2] = uniroot(rootL, c(line_min, line_max))$root+observed_target[i]
346+
line_min = -20*sd(target_sample)
347+
line_max = 20*sd(target_sample)
348+
if (rootU(line_min)*rootU(line_max)<0){
349+
ci[i,2] = uniroot(rootU, c(line_min, line_max))$root+observed_target[i]
350+
} else{
351+
print("non inv u")
352+
ci[i,2]=line_max
353+
}
354+
if (rootL(line_min)*rootL(line_max)<0){
355+
ci[i,1] = uniroot(rootL, c(line_min, line_max))$root+observed_target[i]
356+
} else{
357+
print("non inv u")
358+
ci[i,1] = line_min
359+
}
317360
}
318-
return(list(pvalues=pvalues, ci=ci))
361+
return(list(active_set=active_set, pvalues=pvalues, ci=ci))
319362
}

tests/randomized/test_instances.R

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -25,28 +25,32 @@ gaussian_instance = function(n, p, s, sigma=1, rho=0, signal=6, X=NA,
2525
}
2626

2727

28-
collect_results = function(n,p,s, nsim=10){
28+
collect_results = function(n,p,s, nsim=100, level=0.9){
2929
rho=0.3
3030
lam=1.
3131
sigma=1
32-
sample_pivots = c()
32+
sample_pvalues = c()
33+
sample_coverage = c()
3334
for (i in 1:nsim){
3435
data = gaussian_instance(n=n,p=p,s=s, rho=rho, sigma=sigma)
3536
X=data$X
3637
y=data$y
37-
ridge_term=sd(y)/sqrt(n)
38-
noise_scale = sd(y)/2
39-
#X = matrix(rnorm(n * p), n, p)
40-
#y = rnorm(n)
41-
#lam = 20 / sqrt(n)
42-
#noise_scale = 0.01 * sqrt(n)
43-
#ridge_term = .1 / sqrt(n)
44-
result = selectiveInference:::randomized_inference(X,y,sigma,lam,noise_scale,ridge_term)
45-
sample_pivots = c(sample_pivots, result$pivots)
38+
beta=data$beta
39+
result = selectiveInference:::randomizedLassoInf(X, y, sigma, lam, level=level)
40+
true_beta = beta[result$active_set]
41+
coverage = rep(0, nrow(result$ci))
42+
for (i in 1:nrow(result$ci)){
43+
if (result$ci[i,1]<true_beta[i] & result$ci[i,2]>true_beta[i]){
44+
coverage[i]=1
45+
}
46+
print(paste("ci", toString(result$ci[i,])))
47+
}
48+
sample_pvalues = c(sample_pvalues, result$pvalues)
49+
sample_coverage = c(sample_coverage, coverage)
4650
}
47-
51+
print(paste("coverage", mean(sample_coverage)))
4852
jpeg('pivots.jpg')
49-
plot(ecdf(sample_pivots), xlim=c(0,1), main="Empirical CDF of null p-values", xlab="p-values", ylab="ecdf")
53+
plot(ecdf(sample_pvalues), xlim=c(0,1), main="Empirical CDF of null p-values", xlab="p-values", ylab="ecdf")
5054
abline(0, 1, lty=2)
5155
dev.off()
5256
}

tests/randomized/test_randomized.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ smoke_test = function() {
77
lam = 20 / sqrt(n)
88
noise_scale = 0.01 * sqrt(n)
99
ridge_term = .1 / sqrt(n)
10-
selectiveInference:::randomizedLASSO(X, y, lam, noise_scale, ridge_term)
10+
selectiveInference:::randomizedLasso(X, y, lam, noise_scale, ridge_term)
1111
}
1212

1313
A = smoke_test()
@@ -20,7 +20,7 @@ sampler_test = function() {
2020
lam = 20 / sqrt(n)
2121
noise_scale = 0.01 * sqrt(n)
2222
ridge_term = .1 / sqrt(n)
23-
obj = selectiveInference:::randomizedLASSO(X, y, lam, noise_scale, ridge_term)
23+
obj = selectiveInference:::randomizedLasso(X, y, lam, noise_scale, ridge_term)
2424
S = selectiveInference:::sample_opt_variables(obj, jump_scale=rep(1/sqrt(n), p), nsample=10000)
2525
return(S$samples[2001:10000,])
2626
}

0 commit comments

Comments
 (0)