6
6
randomizedLasso = function (X ,
7
7
y ,
8
8
lam ,
9
+ family = c(" gaussian" ," binomial" ),
9
10
noise_scale = NULL ,
10
11
ridge_term = NULL ,
11
12
noise_type = c(' gaussian' , ' laplace' ),
@@ -17,6 +18,7 @@ randomizedLasso = function(X,
17
18
kkt_stop = TRUE ,
18
19
parameter_stop = TRUE )
19
20
{
21
+ family = match.arg(family )
20
22
21
23
n = nrow(X ); p = ncol(X )
22
24
@@ -65,8 +67,8 @@ randomizedLasso = function(X,
65
67
nactive = as.integer(0 )
66
68
67
69
result = solve_QP_wide(X , # design matrix
68
- lam / n , # vector of Lagrange multipliers
69
- ridge_term / n , # ridge_term
70
+ lam / n , # vector of Lagrange multipliers
71
+ ridge_term / n , # ridge_term
70
72
max_iter ,
71
73
soln ,
72
74
linear_func ,
@@ -83,7 +85,7 @@ randomizedLasso = function(X,
83
85
parameter_stop ) # param_stop
84
86
85
87
sign_soln = sign(result $ soln )
86
-
88
+
87
89
unpenalized = lam == 0
88
90
active = (! unpenalized ) & (sign_soln != 0 )
89
91
inactive = (! unpenalized ) & (sign_soln == 0 )
@@ -110,8 +112,26 @@ randomizedLasso = function(X,
110
112
I = inactive_set
111
113
X_E = X [,E ]
112
114
X_I = X [,I ]
113
- L_E = t(X ) %*% X [,E ]
114
-
115
+
116
+ if (length(E )== 0 ){
117
+ return (list (active_set = c()))
118
+ }
119
+
120
+ if (family == " binomial" ){
121
+ unpen_reg = glm(y ~ X_E - 1 , family = " binomial" )
122
+ unpen_est = unpen_reg $ coefficients
123
+ pi_fn = function (beta ){
124
+ temp = X_E %*% as.matrix(beta )
125
+ return (as.vector(exp(temp )/ (1 + exp(temp )))) # n-dimensional
126
+ }
127
+ pi_vec = pi_fn(unpen_est )
128
+ W_E = diag(pi_vec * (1 - pi_vec ))
129
+ } else if (family == " gaussian" ){
130
+ W_E = diag(rep(1 ,n ))
131
+ }
132
+
133
+ L_E = t(X ) %*% W_E %*% X [,E ]
134
+
115
135
coef_term = L_E
116
136
117
137
signs_ = c(rep(1 , sum(unpenalized )), sign_soln [active ])
@@ -155,8 +175,12 @@ randomizedLasso = function(X,
155
175
offset_term = offset_term )
156
176
157
177
# density for sampling optimization variables
158
-
178
+
159
179
observed_raw = - t(X ) %*% y
180
+ if (family == " binomial" ){
181
+ beta_E = result $ soln [active_set ]
182
+ observed_raw = observed_raw + t(X )%*% pi_fn(beta_E ) - L_E %*% beta_E
183
+ }
160
184
inactive_lam = lam [inactive_set ]
161
185
inactive_start = sum(unpenalized ) + sum(active )
162
186
active_start = sum(unpenalized )
@@ -191,11 +215,11 @@ randomizedLasso = function(X,
191
215
optimization_transform = opt_transform ,
192
216
internal_transform = internal_transform ,
193
217
log_optimization_density = log_optimization_density ,
194
- observed_opt_state = observed_opt_state ,
218
+ observed_opt_state = observed_opt_state ,
195
219
observed_raw = observed_raw ,
196
- noise_scale = noise_scale ,
197
- soln = result $ soln ,
198
- perturb = perturb_
220
+ noise_scale = noise_scale ,
221
+ soln = result $ soln ,
222
+ perturb = perturb_
199
223
))
200
224
201
225
}
@@ -330,6 +354,7 @@ conditional_density = function(noise_scale, lasso_soln) {
330
354
randomizedLassoInf = function (X ,
331
355
y ,
332
356
lam ,
357
+ family = c(" gaussian" , " binomial" ),
333
358
sigma = NULL ,
334
359
noise_scale = NULL ,
335
360
ridge_term = NULL ,
@@ -349,10 +374,13 @@ randomizedLassoInf = function(X,
349
374
350
375
n = nrow(X )
351
376
p = ncol(X )
352
-
377
+
378
+ family = match.arg(family )
379
+
353
380
lasso_soln = randomizedLasso(X ,
354
381
y ,
355
382
lam ,
383
+ family = family ,
356
384
noise_scale = noise_scale ,
357
385
ridge_term = ridge_term ,
358
386
max_iter = max_iter ,
@@ -409,17 +437,30 @@ randomizedLassoInf = function(X,
409
437
X_E = X [, active_set ]
410
438
X_minusE = X [, inactive_set ]
411
439
412
- # if no sigma given, use OLS estimate
413
-
440
+ if (family == " gaussian" ) {
441
+ lm_y = lm(y ~ X_E - 1 )
442
+ sigma_resid = sqrt(sum(resid(lm_y )^ 2 ) / lm_y $ df.resid )
443
+ observed_target = lm_y $ coefficients
444
+ W_E = diag(rep(1 ,n ))
445
+ observed_internal = c(observed_target , t(X_minusE ) %*% (y - X_E %*% observed_target ))
446
+ } else if (family == " binomial" ) {
447
+ glm_y = glm(y ~ X_E - 1 )
448
+ sigma_resid = sqrt(sum(resid(glm_y )^ 2 ) / glm_y $ df.resid )
449
+ observed_target = as.matrix(glm_y $ coefficients )
450
+ temp = X_E %*% observed_target
451
+ pi_vec = exp(temp )/ (1 + exp(temp ))
452
+ observed_internal = c(observed_target , t(X_minusE ) %*% (y - pi_vec ))
453
+ W_E = diag(as.vector(pi_vec * (1 - pi_vec )))
454
+ }
455
+
456
+ # if no sigma given, use the estimate
457
+
414
458
if (is.null(sigma )) {
415
- lm_y = lm(y ~ X_E - 1 )
416
- sigma = sqrt(sum(resid(lm_y )^ 2 ) / lm_y $ df.resid )
459
+ sigma = sigma_resid
417
460
}
418
-
419
- target_cov = solve(t(X_E ) %*% X_E )* sigma ^ 2
461
+
462
+ target_cov = solve(t(X_E ) %*% W_E %*% X_E )* sigma ^ 2
420
463
cov_target_internal = rbind(target_cov , matrix (0 , nrow = p - nactive , ncol = nactive ))
421
- observed_target = solve(t(X_E ) %*% X_E ) %*% t(X_E ) %*% y
422
- observed_internal = c(observed_target , t(X_minusE ) %*% (y - X_E %*% observed_target ))
423
464
internal_transform = lasso_soln $ internal_transform
424
465
opt_transform = lasso_soln $ optimization_transform
425
466
observed_raw = lasso_soln $ observed_raw
@@ -494,5 +535,10 @@ randomizedLassoInf = function(X,
494
535
return (list (active_set = active_set , pvalues = pvalues , ci = ci ))
495
536
}
496
537
538
+
539
+
540
+
541
+
542
+
497
543
498
544
0 commit comments