Skip to content

Commit ce13171

Browse files
author
Jelena Markovic
committed
logistic
1 parent d2438c9 commit ce13171

File tree

2 files changed

+74
-24
lines changed

2 files changed

+74
-24
lines changed

selectiveInference/R/funs.randomized.R

Lines changed: 53 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
randomizedLasso = function(X,
77
y,
88
lam,
9+
family="gaussian",
910
noise_scale=NULL,
1011
ridge_term=NULL,
1112
noise_type=c('gaussian', 'laplace'),
@@ -17,7 +18,6 @@ randomizedLasso = function(X,
1718
kkt_stop=TRUE,
1819
parameter_stop=TRUE)
1920
{
20-
2121
n = nrow(X); p = ncol(X)
2222

2323
mean_diag = mean(apply(X^2, 2, sum))
@@ -86,7 +86,7 @@ randomizedLasso = function(X,
8686
parameter_stop) # param_stop
8787

8888
sign_soln = sign(result$soln)
89-
89+
9090
unpenalized = lam == 0
9191
active = (!unpenalized) & (sign_soln != 0)
9292
inactive = (!unpenalized) & (sign_soln == 0)
@@ -113,8 +113,21 @@ randomizedLasso = function(X,
113113
I = inactive_set
114114
X_E = X[,E]
115115
X_I = X[,I]
116-
L_E = t(X) %*% X[,E]
117-
116+
117+
if (family=="binomial"){
118+
unpen_reg = glm(y~X_E-1, family="binomial")
119+
unpen_est = unpen_reg$coefficients
120+
pi_fn = function(beta){
121+
temp = X_E %*% as.matrix(beta)
122+
return(as.vector(exp(temp)/(1+exp(temp)))) # n-dimensional
123+
}
124+
pi_vec = pi_fn(unpen_est)
125+
W_E = diag(pi_vec*(1-pi_vec))
126+
} else if (family=="gaussian"){
127+
W_E = diag(rep(1,n))
128+
}
129+
L_E = t(X) %*% W_E %*% X[,E]
130+
118131
coef_term = L_E
119132

120133
signs_ = c(rep(1, sum(unpenalized)), sign_soln[active])
@@ -158,8 +171,12 @@ randomizedLasso = function(X,
158171
offset_term = offset_term)
159172

160173
# density for sampling optimization variables
161-
174+
162175
observed_raw = -t(X) %*% y
176+
if (family=="binomial"){
177+
beta_E = result$soln[active_set]
178+
observed_raw = observed_raw+t(X)%*%pi_fn(beta_E)-L_E %*% beta_E
179+
}
163180
inactive_lam = lam[inactive_set]
164181
inactive_start = sum(unpenalized) + sum(active)
165182
active_start = sum(unpenalized)
@@ -333,6 +350,7 @@ conditional_density = function(noise_scale, lasso_soln) {
333350
randomizedLassoInf = function(X,
334351
y,
335352
lam,
353+
family="gaussian",
336354
sampler="A",
337355
sigma=NULL,
338356
noise_scale=NULL,
@@ -352,10 +370,11 @@ randomizedLassoInf = function(X,
352370

353371
n = nrow(X)
354372
p = ncol(X)
355-
373+
356374
lasso_soln = randomizedLasso(X,
357375
y,
358376
lam,
377+
family=family,
359378
noise_scale=noise_scale,
360379
ridge_term=ridge_term,
361380
max_iter=max_iter,
@@ -410,17 +429,32 @@ randomizedLassoInf = function(X,
410429
X_E = X[, active_set]
411430
X_minusE = X[, inactive_set]
412431

413-
# if no sigma given, use OLS estimate
414-
432+
433+
434+
if (family=="gaussian"){
435+
lm_y = lm(y ~ X_E - 1)
436+
sigma_resid = sqrt(sum(resid(lm_y)^2) / lm_y$df.resid)
437+
observed_target = lm_y$coefficients
438+
W_E = diag(rep(1,n))
439+
observed_internal = c(observed_target, t(X_minusE) %*% (y-X_E%*% observed_target))
440+
} else if (family=="binomial"){
441+
glm_y = glm(y~X_E-1)
442+
sigma_resid = sqrt(sum(resid(glm_y)^2) / glm_y$df.resid)
443+
observed_target = as.matrix(glm_y$coefficients)
444+
temp = X_E%*%observed_target
445+
pi_vec = exp(temp)/(1+exp(temp))
446+
observed_internal = c(observed_target, t(X_minusE) %*% (y-pi_vec))
447+
W_E=diag(as.vector(pi_vec *(1-pi_vec)))
448+
}
449+
450+
# if no sigma given, use the estimate
451+
415452
if (is.null(sigma)) {
416-
lm_y = lm(y ~ X_E - 1)
417-
sigma = sqrt(sum(resid(lm_y)^2) / lm_y$df.resid)
453+
sigma = sigma_resid
418454
}
419-
420-
target_cov = solve(t(X_E) %*% X_E)*sigma^2
455+
456+
target_cov = solve(t(X_E) %*% W_E %*% X_E)*sigma^2
421457
cov_target_internal = rbind(target_cov, matrix(0, nrow=p-nactive, ncol=nactive))
422-
observed_target = solve(t(X_E) %*% X_E) %*% t(X_E) %*% y
423-
observed_internal = c(observed_target, t(X_minusE) %*% (y-X_E%*% observed_target))
424458
internal_transform = lasso_soln$internal_transform
425459
opt_transform = lasso_soln$optimization_transform
426460
observed_raw = lasso_soln$observed_raw
@@ -495,5 +529,10 @@ randomizedLassoInf = function(X,
495529
return(list(active_set=active_set, pvalues=pvalues, ci=ci))
496530
}
497531

532+
533+
534+
535+
536+
498537

499538

tests/randomized/test_instances.R

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
library(selectiveInference)
22

3-
gaussian_instance = function(n, p, s, sigma=1, rho=0, signal=6, X=NA,
4-
random_signs=TRUE, scale=TRUE, center=TRUE, seed=NA){
3+
get_instance = function(n, p, s, sigma=1, rho=0, signal=6, family="gaussian",
4+
X=NA, random_signs=TRUE, scale=TRUE, center=TRUE, seed=NA){
55
if (!is.na(seed)){
66
set.seed(seed)
77
}
@@ -19,11 +19,20 @@ gaussian_instance = function(n, p, s, sigma=1, rho=0, signal=6, X=NA,
1919
signs = sample(c(-1,1), s, replace = TRUE)
2020
beta = beta * signs
2121
}
22-
y = X %*% beta + rnorm(n)*sigma
22+
mu = X %*% beta
23+
if (family=="gaussian"){
24+
y = mu + rnorm(n)*sigma
25+
} else if (family=="binomial"){
26+
prob = exp(mu)/(1+exp(mu))
27+
y= rbinom(n,1, prob)
28+
}
2329
result = list(X=X,y=y,beta=beta)
2430
return(result)
2531
}
2632

33+
34+
35+
2736
test_randomized_lasso = function(n=100,p=200,s=0){
2837
set.seed(1)
2938
data = gaussian_instance(n=n,p=p,s=s, rho=0.3, sigma=3)
@@ -61,27 +70,29 @@ test_KKT=function(){
6170

6271

6372

64-
collect_results = function(n,p,s, nsim=100, level=0.9, condition_subgrad=FALSE, lam=1.2){
73+
collect_results = function(n,p,s, nsim=100, level=0.9,
74+
family = "gaussian",
75+
condition_subgrad=FALSE, lam=1.2){
6576

6677
rho=0.3
6778
sigma=1
6879
sample_pvalues = c()
6980
sample_coverage = c()
7081
for (i in 1:nsim){
71-
data = gaussian_instance(n=n,p=p,s=s, rho=rho, sigma=sigma)
82+
data = get_instance(n=n,p=p,s=s, rho=rho, sigma=sigma, family=family)
7283
X=data$X
7384
y=data$y
74-
beta=data$beta
7585
result = selectiveInference:::randomizedLassoInf(X, y,
76-
lam=lam,
86+
lam,
87+
family = family,
88+
sampler = "A",
7789
sigma=sigma,
7890
level=level,
79-
sampler = "A",
8091
burnin=1000,
8192
nsample=5000,
8293
condition_subgrad=condition_subgrad)
8394
if (length(result$active_set)>0){
84-
true_beta = beta[result$active_set]
95+
true_beta = data$beta[result$active_set]
8596
coverage = rep(0, nrow(result$ci))
8697
for (i in 1:nrow(result$ci)){
8798
if (result$ci[i,1]<true_beta[i] & result$ci[i,2]>true_beta[i]){
@@ -104,7 +115,7 @@ collect_results = function(n,p,s, nsim=100, level=0.9, condition_subgrad=FALSE,
104115
}
105116

106117
set.seed(1)
107-
collect_results(n=100, p=20, s=0, lam=1.2)
118+
collect_results(n=100, p=20, s=0, lam=0.5)
108119
#test_randomized_lasso()
109120
#test_KKT()
110121

0 commit comments

Comments
 (0)