|
| 1 | +library(selectiveInference) |
| 2 | + |
| 3 | +gaussian_instance = function(n, p, s, sigma=1, rho=0, signal=6, X=NA, |
| 4 | + random_signs=TRUE, scale=TRUE, center=TRUE, seed=NA){ |
| 5 | + if (!is.na(seed)){ |
| 6 | + set.seed(seed) |
| 7 | + } |
| 8 | + |
| 9 | + if (is.na(X)){ |
| 10 | + X = sqrt(1-rho)*matrix(rnorm(n*p),n) + sqrt(rho)*matrix(rep(rnorm(n), p), nrow = n) |
| 11 | + X = scale(X)/sqrt(n) |
| 12 | + } |
| 13 | + beta = rep(0, p) |
| 14 | + if (s>0){ |
| 15 | + beta[1:s] = seq(3, 6, length.out=s) |
| 16 | + } |
| 17 | + beta = sample(beta) |
| 18 | + if (random_signs==TRUE & s>0){ |
| 19 | + signs = sample(c(-1,1), s, replace = TRUE) |
| 20 | + beta = beta * signs |
| 21 | + } |
| 22 | + y = X %*% beta + rnorm(n)*sigma |
| 23 | + result = list(X=X,y=y,beta=beta) |
| 24 | + return(result) |
| 25 | +} |
| 26 | + |
| 27 | +conditional_density = function(noise_scale, lasso_soln){ |
| 28 | + |
| 29 | + active_set = lasso_soln$active_set |
| 30 | + observed_raw = lasso_soln$observed_raw |
| 31 | + opt_linear = lasso_soln$optimization_transform$linear_term |
| 32 | + opt_offset = lasso_soln$optimization_transform$offset_term |
| 33 | + observed_opt_state = lasso_soln$observed_opt_state |
| 34 | + |
| 35 | + nactive = length(active_set) |
| 36 | + B = opt_linear[,1:nactive] |
| 37 | + beta_offset = opt_offset |
| 38 | + p=length(observed_opt_state) |
| 39 | + if (nactive<p){ |
| 40 | + beta_offset = beta_offset+(opt_linear[,(nactive+1):p] %*% observed_opt_state[(nactive+1):p]) |
| 41 | + } |
| 42 | + opt_transform = list(linear_term=B, |
| 43 | + offset_term = beta_offset) |
| 44 | + reduced_B = chol(t(B) %*% B) |
| 45 | + beta_offset = beta_offset+observed_raw |
| 46 | + reduced_beta_offset = solve(t(reduced_B)) %*% (t(B) %*% beta_offset) |
| 47 | + |
| 48 | + log_condl_optimization_density = function(opt_state) { |
| 49 | + if (sum(opt_state < 0) > 0) { |
| 50 | + return(-Inf) |
| 51 | + } |
| 52 | + D = selectiveInference:::log_density_gaussian_conditional_(noise_scale, |
| 53 | + reduced_B, |
| 54 | + as.matrix(opt_state), |
| 55 | + reduced_beta_offset) |
| 56 | + return(D) |
| 57 | + } |
| 58 | + lasso_soln$log_optimization_density = log_condl_optimization_density |
| 59 | + lasso_soln$observed_opt_state = observed_opt_state[1:nactive] |
| 60 | + lasso_soln$optimization_transform = opt_transform |
| 61 | + return(lasso_soln) |
| 62 | +} |
| 63 | + |
| 64 | + |
| 65 | +randomized_inference = function(X,y,sigma, lam, noise_scale, ridge_term){ |
| 66 | + n=nrow(X) |
| 67 | + p=ncol(X) |
| 68 | + lasso_soln=selectiveInference:::randomizedLASSO(X, y, lam, noise_scale, ridge_term) |
| 69 | + active_set = lasso_soln$active_set |
| 70 | + inactive_set = lasso_soln$inactive_set |
| 71 | + nactive = length(active_set) |
| 72 | + print(paste("nactive", nactive)) |
| 73 | + |
| 74 | + #lasso_soln = conditional_density(noise_scale, lasso_soln) |
| 75 | + |
| 76 | + dim=length(lasso_soln$observed_opt_state) |
| 77 | + print(paste("chain dim", dim)) |
| 78 | + S = selectiveInference:::sample_opt_variables(lasso_soln, jump_scale=rep(1/sqrt(n), dim), nsample=10000) |
| 79 | + opt_samples = S$samples[2001:10000,] |
| 80 | + print(paste("dim opt samples", toString(dim(opt_samples)))) |
| 81 | + |
| 82 | + X_E=X[, active_set] |
| 83 | + X_minusE=X[, inactive_set] |
| 84 | + target_cov = solve(t(X_E) %*% X_E)*sigma^2 |
| 85 | + cov_target_internal = rbind(target_cov, matrix(0, nrow=p-nactive, ncol=nactive)) |
| 86 | + observed_target = solve(t(X_E) %*% X_E) %*% t(X_E) %*% y |
| 87 | + observed_internal = c(observed_target, t(X_minusE) %*% (y-X_E%*% observed_target)) |
| 88 | + internal_transform = lasso_soln$internal_transform |
| 89 | + opt_transform = lasso_soln$optimization_transform |
| 90 | + observed_raw = lasso_soln$observed_raw |
| 91 | + |
| 92 | + pivots = rep(0, nactive) |
| 93 | + ci = matrix(0, nactive, 2) |
| 94 | + for (i in 1:nactive){ |
| 95 | + target_transform = selectiveInference:::linear_decomposition(observed_target[i], |
| 96 | + observed_internal, |
| 97 | + target_cov[i,i], |
| 98 | + cov_target_internal[,i], |
| 99 | + internal_transform) |
| 100 | + target_sample = rnorm(nrow(opt_samples)) * sqrt(target_cov[i,i]) |
| 101 | + |
| 102 | + pivot = function(candidate){ |
| 103 | + weights = selectiveInference:::importance_weight(noise_scale, |
| 104 | + t(as.matrix(target_sample))+candidate, |
| 105 | + t(opt_samples), |
| 106 | + opt_transform, |
| 107 | + target_transform, |
| 108 | + observed_raw) |
| 109 | + return(mean((target_sample<observed_target[i])*weights)/mean(weights)) |
| 110 | + } |
| 111 | + level = 0.9 |
| 112 | + rootU = function(candidate){ |
| 113 | + return (pivot(observed_target[i]+candidate)-(1-level)/2) |
| 114 | + } |
| 115 | + rootL = function(candidate){ |
| 116 | + return (pivot(observed_target[i]+candidate)-(1+level)/2) |
| 117 | + } |
| 118 | + pivots[i] = pivot(0) |
| 119 | + line_min = -10*sd(target_sample) |
| 120 | + line_max = 10*sd(target_sample) |
| 121 | + ci[i,1] = uniroot(rootU, c(line_min, line_max))$root+observed_target[i] |
| 122 | + ci[i,2] = uniroot(rootL,c(line_min, line_max))$root+observed_target[i] |
| 123 | + } |
| 124 | + print(paste("pivots", toString(pivots))) |
| 125 | + for (i in 1:nactive){ |
| 126 | + print(paste("CIs", toString(ci[i,]))) |
| 127 | + } |
| 128 | + return(list(pivots=pivots, ci=ci)) |
| 129 | +} |
| 130 | + |
| 131 | +collect_results = function(n,p,s, nsim=1){ |
| 132 | + rho=0.3 |
| 133 | + lam=1. |
| 134 | + sigma=1 |
| 135 | + sample_pivots = NULL |
| 136 | + for (i in 1:nsim){ |
| 137 | + data = gaussian_instance(n=n,p=p,s=s, rho=rho, sigma=sigma) |
| 138 | + X=data$X |
| 139 | + print(dim(X)) |
| 140 | + y=data$y |
| 141 | + ridge_term=sd(y)/sqrt(n) |
| 142 | + noise_scale = sd(y)/2 |
| 143 | + #X = matrix(rnorm(n * p), n, p) |
| 144 | + #y = rnorm(n) |
| 145 | + #lam = 20 / sqrt(n) |
| 146 | + #noise_scale = 0.01 * sqrt(n) |
| 147 | + #ridge_term = .1 / sqrt(n) |
| 148 | + result = randomized_inference(X,y,sigma,lam,noise_scale,ridge_term) |
| 149 | + sample_pivots = c(sample_pivots, result$pivots) |
| 150 | + } |
| 151 | + |
| 152 | + jpeg('pivots.jpg') |
| 153 | + plot(ecdf(sample_pivots), xlim=c(0,1), main="Empirical CDF of null p-values", xlab="p-values", ylab="ecdf") |
| 154 | + abline(0, 1, lty=2) |
| 155 | + dev.off() |
| 156 | +} |
| 157 | + |
| 158 | +set.seed(1) |
| 159 | +collect_results(n=100, p=20, s=0) |
| 160 | + |
| 161 | + |
| 162 | + |
0 commit comments