Skip to content

Commit a2e32dc

Browse files
authored
Merge pull request #41 from jonathan-taylor/master
Some bug fixes of randomizedLasso, most of framework for randomized logistic
2 parents 9e7a081 + af3b818 commit a2e32dc

File tree

5 files changed

+347
-69
lines changed

5 files changed

+347
-69
lines changed

selectiveInference/R/funs.randomized.R

Lines changed: 156 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
randomizedLasso = function(X,
77
y,
88
lam,
9+
family=c("gaussian","binomial"),
910
noise_scale=NULL,
1011
ridge_term=NULL,
1112
noise_type=c('gaussian', 'laplace'),
@@ -17,6 +18,7 @@ randomizedLasso = function(X,
1718
kkt_stop=TRUE,
1819
parameter_stop=TRUE)
1920
{
21+
family = match.arg(family)
2022

2123
n = nrow(X); p = ncol(X)
2224

@@ -33,7 +35,7 @@ randomizedLasso = function(X,
3335
if (is.null(noise_scale)) {
3436
noise_scale = 0.5 * sd(y) * sqrt(mean_diag)
3537
}
36-
38+
3739
noise_type = match.arg(noise_type)
3840

3941
if (noise_scale > 0) {
@@ -65,8 +67,8 @@ randomizedLasso = function(X,
6567
nactive = as.integer(0)
6668

6769
result = solve_QP_wide(X, # design matrix
68-
lam / n, # vector of Lagrange multipliers
69-
ridge_term / n, # ridge_term
70+
lam / n, # vector of Lagrange multipliers
71+
ridge_term / n, # ridge_term
7072
max_iter,
7173
soln,
7274
linear_func,
@@ -76,14 +78,14 @@ randomizedLasso = function(X,
7678
nactive,
7779
kkt_tol,
7880
objective_tol,
79-
parameter_tol,
81+
parameter_tol,
8082
p,
81-
objective_stop, # objective_stop
82-
kkt_stop, # kkt_stop
83-
parameter_stop) # param_stop
84-
83+
objective_stop, # objective_stop
84+
kkt_stop, # kkt_stop
85+
parameter_stop) # param_stop
86+
8587
sign_soln = sign(result$soln)
86-
88+
8789
unpenalized = lam == 0
8890
active = (!unpenalized) & (sign_soln != 0)
8991
inactive = (!unpenalized) & (sign_soln == 0)
@@ -96,7 +98,11 @@ randomizedLasso = function(X,
9698

9799
observed_scalings = abs(result$soln)[active]
98100
observed_unpen = result$soln[unpenalized]
99-
observed_subgrad = result$gradient[inactive]
101+
observed_subgrad = -n*result$gradient[inactive]
102+
103+
if (sum(abs(observed_subgrad)>lam[inactive]*(1.001)) > 0){
104+
stop("subgradient eq not satisfied")
105+
}
100106

101107
observed_opt_state = c(observed_unpen, observed_scalings, observed_subgrad)
102108

@@ -106,27 +112,49 @@ randomizedLasso = function(X,
106112
I = inactive_set
107113
X_E = X[,E]
108114
X_I = X[,I]
109-
L_E = t(X) %*% X[,E]
110-
115+
116+
if (length(E)==0){
117+
return(list(active_set=c()))
118+
}
119+
120+
if (family=="binomial"){
121+
unpen_reg = glm(y~X_E-1, family="binomial")
122+
unpen_est = unpen_reg$coefficients
123+
pi_fn = function(beta){
124+
temp = X_E %*% as.matrix(beta)
125+
return(as.vector(exp(temp)/(1+exp(temp)))) # n-dimensional
126+
}
127+
pi_vec = pi_fn(unpen_est)
128+
W_E = diag(pi_vec*(1-pi_vec))
129+
} else if (family=="gaussian"){
130+
W_E = diag(rep(1,n))
131+
}
132+
133+
L_E = t(X) %*% W_E %*% X[,E]
134+
111135
coef_term = L_E
112136

113137
signs_ = c(rep(1, sum(unpenalized)), sign_soln[active])
138+
139+
coef_term[active,] = coef_term[active,] + ridge_term * diag(rep(1, sum(active))) # ridge term
140+
114141
if (length(signs_) == 1) {
115-
coef_term = coef_term * signs_
142+
coef_term = coef_term * signs_
116143
} else {
117-
coef_term = coef_term %*% diag(signs_) # scaligns are non-negative
144+
coef_term = coef_term %*% diag(signs_) # scaligns are non-negative
118145
}
119-
120-
coef_term[active,] = coef_term[active,] + ridge_term * diag(rep(1, sum(active))) # ridge term
121-
122-
subgrad_term = matrix(0, p, sum(inactive)) # for subgrad
123-
for (i in 1:sum(inactive)) {
124-
subgrad_term[inactive_set[i], i] = 1
125-
}
126-
127-
linear_term = cbind(coef_term,
128-
subgrad_term)
129-
146+
147+
if (sum(inactive) > 0) {
148+
subgrad_term = matrix(0, p, sum(inactive)) # for subgrad
149+
for (i in 1:sum(inactive)) {
150+
subgrad_term[inactive_set[i], i] = 1
151+
}
152+
153+
linear_term = cbind(coef_term,
154+
subgrad_term)
155+
} else {
156+
linear_term = coef_term
157+
}
130158
offset_term = rep(0, p)
131159
offset_term[active] = lam[active] * sign_soln[active]
132160

@@ -142,20 +170,30 @@ randomizedLasso = function(X,
142170

143171
active_term = -L_E # for \bar{\beta}_E
144172

145-
inactive_term = -subgrad_term
146-
linear_term = cbind(active_term,
147-
inactive_term)
173+
if (sum(inactive) > 0) {
174+
inactive_term = -subgrad_term
175+
linear_term = cbind(active_term,
176+
inactive_term)
177+
} else {
178+
linear_term = active_term
179+
}
180+
148181
offset_term = rep(0, p)
149182
internal_transform = list(linear_term = linear_term,
150183
offset_term = offset_term)
151184

152185
# density for sampling optimization variables
153-
186+
154187
observed_raw = -t(X) %*% y
188+
if (family=="binomial"){
189+
beta_E = result$soln[active_set]
190+
observed_raw = observed_raw + t(X)%*%pi_fn(beta_E) - L_E %*% beta_E
191+
}
155192
inactive_lam = lam[inactive_set]
156193
inactive_start = sum(unpenalized) + sum(active)
157194
active_start = sum(unpenalized)
158-
195+
196+
159197
# XXX only for Gaussian so far
160198

161199
log_optimization_density = function(opt_state) {
@@ -185,9 +223,11 @@ randomizedLasso = function(X,
185223
optimization_transform = opt_transform,
186224
internal_transform = internal_transform,
187225
log_optimization_density = log_optimization_density,
188-
observed_opt_state = observed_opt_state,
226+
observed_opt_state = observed_opt_state,
189227
observed_raw = observed_raw,
190-
noise_scale = noise_scale
228+
noise_scale = noise_scale,
229+
soln = result$soln,
230+
perturb = perturb_
191231
))
192232

193233
}
@@ -239,7 +279,6 @@ importance_weight = function(noise_scale,
239279
A = apply(A, 2, function(x) {return(x + target_transform$offset_term + opt_transform$offset_term)})
240280
log_num = -apply(A^2, 2, sum) / noise_scale^2
241281
} else {
242-
243282
log_num = log_density_gaussian_(noise_scale,
244283
target_transform$linear_term,
245284
as.matrix(target_sample),
@@ -262,6 +301,15 @@ importance_weight = function(noise_scale,
262301
W = W - max(W)
263302
return(exp(W))
264303
}
304+
305+
get_mean_cov = function(noise_scale, linear_term, offset_term){
306+
temp = solve(t(linear_term) %*% linear_term)
307+
cov = noise_scale^2*temp
308+
mean = temp %*% t(linear_term) %*% offset_term
309+
return(list(mean=mean, cov=cov))
310+
}
311+
312+
265313

266314
conditional_density = function(noise_scale, lasso_soln) {
267315

@@ -306,19 +354,23 @@ conditional_density = function(noise_scale, lasso_soln) {
306354
lasso_soln$log_optimization_density = log_condl_optimization_density
307355
lasso_soln$observed_opt_state = observed_opt_state[1:nactive]
308356
lasso_soln$optimization_transform = opt_transform
309-
return(lasso_soln)
357+
reduced_opt_transform =list(linear_term = reduced_B, offset_term = reduced_beta_offset)
358+
return(list(lasso_soln=lasso_soln,
359+
reduced_opt_transform = reduced_opt_transform))
310360
}
311361

312362
randomizedLassoInf = function(X,
313363
y,
314364
lam,
365+
family=c("gaussian", "binomial"),
315366
sigma=NULL,
316367
noise_scale=NULL,
317368
ridge_term=NULL,
318369
condition_subgrad=TRUE,
319370
level=0.9,
320-
nsample=10000,
321-
burnin=2000,
371+
sampler=c("norejection", "adaptMCMC"),
372+
nsample=10000,
373+
burnin=2000,
322374
max_iter=100, # how many iterations for each optimization problem
323375
kkt_tol=1.e-4, # tolerance for the KKT conditions
324376
parameter_tol=1.e-8, # tolerance for relative convergence of parameter
@@ -330,10 +382,13 @@ randomizedLassoInf = function(X,
330382

331383
n = nrow(X)
332384
p = ncol(X)
333-
385+
386+
family = match.arg(family)
387+
334388
lasso_soln = randomizedLasso(X,
335389
y,
336390
lam,
391+
family=family,
337392
noise_scale=noise_scale,
338393
ridge_term=ridge_term,
339394
max_iter=max_iter,
@@ -345,37 +400,75 @@ randomizedLassoInf = function(X,
345400
parameter_stop=parameter_stop)
346401

347402
active_set = lasso_soln$active_set
348-
if (length(active_set)==0){
403+
nactive = length(active_set)
404+
405+
if (nactive==0){
349406
return (list(active_set=active_set, pvalues=c(), ci=c()))
350407
}
351408
inactive_set = lasso_soln$inactive_set
352-
nactive = length(active_set)
353-
409+
354410
noise_scale = lasso_soln$noise_scale # set to default value in randomizedLasso
355-
356-
if (condition_subgrad==TRUE){
357-
lasso_soln=conditional_density(noise_scale, lasso_soln)
358-
}
411+
412+
constraints = matrix(0,nactive,2)
413+
constraints[,2] = Inf
414+
if (condition_subgrad==TRUE){
415+
condl_lasso=conditional_density(noise_scale, lasso_soln)
416+
lasso_soln = condl_lasso$lasso_soln
417+
cur_opt_transform = condl_lasso$reduced_opt_transform
418+
} else{
419+
if (nactive<p){
420+
subgrad_constraints = matrix(-lam, p-nactive, 2)
421+
subgrad_constraints[,2]=lam
422+
constraints = rbind(constraints, subgrad_constraints)
423+
}
424+
cur_opt_transform = list(linear_term = lasso_soln$optimization_transform$linear_term,
425+
offset_term = lasso_soln$optimization_transform$offset_term+lasso_soln$observed_raw)
426+
}
359427

360428
ndim = length(lasso_soln$observed_opt_state)
361-
362-
S = sample_opt_variables(lasso_soln, jump_scale=rep(1/sqrt(n), ndim), nsample=nsample)
363-
opt_samples = as.matrix(S$samples[(burnin+1):nsample,,drop=FALSE])
429+
430+
sampler = match.arg(sampler)
431+
432+
if (sampler == "adaptMCMC"){
433+
S = sample_opt_variables(lasso_soln, jump_scale=rep(1/sqrt(n), ndim), nsample=nsample)
434+
opt_samples = as.matrix(S$samples[(burnin+1):nsample,,drop=FALSE])
435+
} else if (sampler == "norejection") {
436+
opt_samples = gaussian_sampler(noise_scale,
437+
lasso_soln$observed_opt_state,
438+
cur_opt_transform$linear_term,
439+
cur_opt_transform$offset_term,
440+
constraints,
441+
nsamples=nsample)
442+
opt_sample = opt_samples[(burnin+1):nsample,]
443+
}
364444

365445
X_E = X[, active_set]
366446
X_minusE = X[, inactive_set]
367447

368-
# if no sigma given, use OLS estimate
369-
448+
if (family == "gaussian") {
449+
lm_y = lm(y ~ X_E - 1)
450+
sigma_resid = sqrt(sum(resid(lm_y)^2) / lm_y$df.resid)
451+
observed_target = lm_y$coefficients
452+
W_E = diag(rep(1,n))
453+
observed_internal = c(observed_target, t(X_minusE) %*% (y-X_E%*% observed_target))
454+
} else if (family == "binomial") {
455+
glm_y = glm(y~X_E-1)
456+
sigma_resid = sqrt(sum(resid(glm_y)^2) / glm_y$df.resid)
457+
observed_target = as.matrix(glm_y$coefficients)
458+
temp = X_E%*%observed_target
459+
pi_vec = exp(temp)/(1+exp(temp))
460+
observed_internal = c(observed_target, t(X_minusE) %*% (y-pi_vec))
461+
W_E=diag(as.vector(pi_vec *(1-pi_vec)))
462+
}
463+
464+
# if no sigma given, use the estimate
465+
370466
if (is.null(sigma)) {
371-
lm_y = lm(y ~ X_E - 1)
372-
sigma = sqrt(sum(resid(lm_y)^2) / lm_y$df.resid)
467+
sigma = sigma_resid
373468
}
374-
375-
target_cov = solve(t(X_E) %*% X_E)*sigma^2
469+
470+
target_cov = solve(t(X_E) %*% W_E %*% X_E)*sigma^2
376471
cov_target_internal = rbind(target_cov, matrix(0, nrow=p-nactive, ncol=nactive))
377-
observed_target = solve(t(X_E) %*% X_E) %*% t(X_E) %*% y
378-
observed_internal = c(observed_target, t(X_minusE) %*% (y-X_E%*% observed_target))
379472
internal_transform = lasso_soln$internal_transform
380473
opt_transform = lasso_soln$optimization_transform
381474
observed_raw = lasso_soln$observed_raw
@@ -404,7 +497,6 @@ randomizedLassoInf = function(X,
404497
cur_linear = reduced_target_opt_linear[,2:ncol(reduced_target_opt_linear)]
405498
cur_offset = temp %*% opt_transform$offset_term
406499
cur_transform = list(linear_term = as.matrix(cur_linear), offset_term = cur_offset)
407-
408500
raw = target_transform$linear_term * observed_target[i] + target_transform$offset_term
409501
} else {
410502
cur_transform = opt_transform
@@ -450,3 +542,11 @@ randomizedLassoInf = function(X,
450542
}
451543
return(list(active_set=active_set, pvalues=pvalues, ci=ci))
452544
}
545+
546+
547+
548+
549+
550+
551+
552+

0 commit comments

Comments
 (0)