Skip to content

Commit a8d50a6

Browse files
Merge branch 'jelena-markovic-randomized_jelena'
2 parents 1d60dfa + 9597497 commit a8d50a6

File tree

3 files changed

+172
-11
lines changed

3 files changed

+172
-11
lines changed

selectiveInference/R/funs.randomized.R

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ randomizedLASSO = function(X,
130130

131131
# density for sampling optimization variables
132132

133-
observed_raw = -t(X) %*% Y
133+
observed_raw = -t(X) %*% y
134134
inactive_lam = lam[inactive_set]
135135
inactive_start = sum(unpenalized) + sum(active)
136136
active_start = sum(unpenalized)
@@ -145,11 +145,10 @@ randomizedLASSO = function(X,
145145
(sum(opt_state[(active_start+1):inactive_start] < 0) > 0)) {
146146
return(-Inf)
147147
}
148-
149148
D = log_density_gaussian_conditional_(noise_scale,
150149
opt_transform$linear_term,
151150
as.matrix(opt_state),
152-
observed_raw)
151+
observed_raw+opt_transform$offset_term)
153152
return(D)
154153
}
155154

@@ -187,12 +186,12 @@ linear_decomposition = function(observed_target,
187186
var_target = as.matrix(var_target)
188187
if (nrow(var_target) == 1) {
189188
nuisance = observed_internal - cov_target_internal * observed_target / var_target
190-
target_linear = internal_transform$linear_part %*% cov_target_internal / var_target
189+
target_linear = internal_transform$linear_term %*% cov_target_internal / var_target[1,1]
191190
} else {
192191
nuisance = observed_internal - cov_target_internal %*% solve(var_target) %*% observed_target
193-
target_linear = internal_transform$linear_part %*% cov_target_internal %*% solve(var_target)
192+
target_linear = internal_transform$linear_term %*% cov_target_internal %*% solve(var_target)
194193
}
195-
target_offset = internal_transform$linear_part %*% nuisance + internal_transform$offset
194+
target_offset = internal_transform$linear_term %*% nuisance + internal_transform$offset_term
196195
return(list(linear_term=target_linear,
197196
offset_term=target_offset))
198197
}
@@ -209,14 +208,14 @@ importance_weight = function(noise_scale,
209208
log_num = log_density_gaussian_(noise_scale,
210209
target_transform$linear_term,
211210
as.matrix(target_sample),
212-
optimization_transform$linear_term,
213-
as.matrix(opt_state),
214-
target_transform$offset_term + optimization_transform$offset_term)
211+
opt_transform$linear_term,
212+
as.matrix(opt_sample),
213+
target_transform$offset_term + opt_transform$offset_term)
215214

216215
log_den = log_density_gaussian_conditional_(noise_scale,
217216
opt_transform$linear_term,
218217
as.matrix(opt_sample),
219-
observed_raw)
218+
observed_raw+opt_transform$offset_term)
220219
W = log_num - log_den
221220
W = W - max(W)
222221
return(exp(W))

tests/randomized/test_instances.R

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
library(selectiveInference)
2+
3+
gaussian_instance = function(n, p, s, sigma=1, rho=0, signal=6, X=NA,
4+
random_signs=TRUE, scale=TRUE, center=TRUE, seed=NA){
5+
if (!is.na(seed)){
6+
set.seed(seed)
7+
}
8+
9+
if (is.na(X)){
10+
X = sqrt(1-rho)*matrix(rnorm(n*p),n) + sqrt(rho)*matrix(rep(rnorm(n), p), nrow = n)
11+
X = scale(X)/sqrt(n)
12+
}
13+
beta = rep(0, p)
14+
if (s>0){
15+
beta[1:s] = seq(3, 6, length.out=s)
16+
}
17+
beta = sample(beta)
18+
if (random_signs==TRUE & s>0){
19+
signs = sample(c(-1,1), s, replace = TRUE)
20+
beta = beta * signs
21+
}
22+
y = X %*% beta + rnorm(n)*sigma
23+
result = list(X=X,y=y,beta=beta)
24+
return(result)
25+
}
26+
27+
conditional_density = function(noise_scale, lasso_soln){
28+
29+
active_set = lasso_soln$active_set
30+
observed_raw = lasso_soln$observed_raw
31+
opt_linear = lasso_soln$optimization_transform$linear_term
32+
opt_offset = lasso_soln$optimization_transform$offset_term
33+
observed_opt_state = lasso_soln$observed_opt_state
34+
35+
nactive = length(active_set)
36+
B = opt_linear[,1:nactive]
37+
beta_offset = opt_offset
38+
p=length(observed_opt_state)
39+
if (nactive<p){
40+
beta_offset = beta_offset+(opt_linear[,(nactive+1):p] %*% observed_opt_state[(nactive+1):p])
41+
}
42+
opt_transform = list(linear_term=B,
43+
offset_term = beta_offset)
44+
reduced_B = chol(t(B) %*% B)
45+
beta_offset = beta_offset+observed_raw
46+
reduced_beta_offset = solve(t(reduced_B)) %*% (t(B) %*% beta_offset)
47+
48+
log_condl_optimization_density = function(opt_state) {
49+
if (sum(opt_state < 0) > 0) {
50+
return(-Inf)
51+
}
52+
D = selectiveInference:::log_density_gaussian_conditional_(noise_scale,
53+
reduced_B,
54+
as.matrix(opt_state),
55+
reduced_beta_offset)
56+
return(D)
57+
}
58+
lasso_soln$log_optimization_density = log_condl_optimization_density
59+
lasso_soln$observed_opt_state = observed_opt_state[1:nactive]
60+
lasso_soln$optimization_transform = opt_transform
61+
return(lasso_soln)
62+
}
63+
64+
65+
randomized_inference = function(X,y,sigma, lam, noise_scale, ridge_term){
66+
n=nrow(X)
67+
p=ncol(X)
68+
lasso_soln=selectiveInference:::randomizedLASSO(X, y, lam, noise_scale, ridge_term)
69+
active_set = lasso_soln$active_set
70+
inactive_set = lasso_soln$inactive_set
71+
nactive = length(active_set)
72+
print(paste("nactive", nactive))
73+
74+
#lasso_soln = conditional_density(noise_scale, lasso_soln)
75+
76+
dim=length(lasso_soln$observed_opt_state)
77+
print(paste("chain dim", dim))
78+
S = selectiveInference:::sample_opt_variables(lasso_soln, jump_scale=rep(1/sqrt(n), dim), nsample=10000)
79+
opt_samples = S$samples[2001:10000,]
80+
print(paste("dim opt samples", toString(dim(opt_samples))))
81+
82+
X_E=X[, active_set]
83+
X_minusE=X[, inactive_set]
84+
target_cov = solve(t(X_E) %*% X_E)*sigma^2
85+
cov_target_internal = rbind(target_cov, matrix(0, nrow=p-nactive, ncol=nactive))
86+
observed_target = solve(t(X_E) %*% X_E) %*% t(X_E) %*% y
87+
observed_internal = c(observed_target, t(X_minusE) %*% (y-X_E%*% observed_target))
88+
internal_transform = lasso_soln$internal_transform
89+
opt_transform = lasso_soln$optimization_transform
90+
observed_raw = lasso_soln$observed_raw
91+
92+
pivots = rep(0, nactive)
93+
ci = matrix(0, nactive, 2)
94+
for (i in 1:nactive){
95+
target_transform = selectiveInference:::linear_decomposition(observed_target[i],
96+
observed_internal,
97+
target_cov[i,i],
98+
cov_target_internal[,i],
99+
internal_transform)
100+
target_sample = rnorm(nrow(opt_samples)) * sqrt(target_cov[i,i])
101+
102+
pivot = function(candidate){
103+
weights = selectiveInference:::importance_weight(noise_scale,
104+
t(as.matrix(target_sample))+candidate,
105+
t(opt_samples),
106+
opt_transform,
107+
target_transform,
108+
observed_raw)
109+
return(mean((target_sample<observed_target[i])*weights)/mean(weights))
110+
}
111+
level = 0.9
112+
rootU = function(candidate){
113+
return (pivot(observed_target[i]+candidate)-(1-level)/2)
114+
}
115+
rootL = function(candidate){
116+
return (pivot(observed_target[i]+candidate)-(1+level)/2)
117+
}
118+
pivots[i] = pivot(0)
119+
line_min = -10*sd(target_sample)
120+
line_max = 10*sd(target_sample)
121+
ci[i,1] = uniroot(rootU, c(line_min, line_max))$root+observed_target[i]
122+
ci[i,2] = uniroot(rootL,c(line_min, line_max))$root+observed_target[i]
123+
}
124+
print(paste("pivots", toString(pivots)))
125+
for (i in 1:nactive){
126+
print(paste("CIs", toString(ci[i,])))
127+
}
128+
return(list(pivots=pivots, ci=ci))
129+
}
130+
131+
collect_results = function(n,p,s, nsim=1){
132+
rho=0.3
133+
lam=1.
134+
sigma=1
135+
sample_pivots = NULL
136+
for (i in 1:nsim){
137+
data = gaussian_instance(n=n,p=p,s=s, rho=rho, sigma=sigma)
138+
X=data$X
139+
print(dim(X))
140+
y=data$y
141+
ridge_term=sd(y)/sqrt(n)
142+
noise_scale = sd(y)/2
143+
#X = matrix(rnorm(n * p), n, p)
144+
#y = rnorm(n)
145+
#lam = 20 / sqrt(n)
146+
#noise_scale = 0.01 * sqrt(n)
147+
#ridge_term = .1 / sqrt(n)
148+
result = randomized_inference(X,y,sigma,lam,noise_scale,ridge_term)
149+
sample_pivots = c(sample_pivots, result$pivots)
150+
}
151+
152+
jpeg('pivots.jpg')
153+
plot(ecdf(sample_pivots), xlim=c(0,1), main="Empirical CDF of null p-values", xlab="p-values", ylab="ecdf")
154+
abline(0, 1, lty=2)
155+
dev.off()
156+
}
157+
158+
set.seed(1)
159+
collect_results(n=100, p=20, s=0)
160+
161+
162+

tests/randomized/test_randomized.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
library(selectiveInference)
22

33
smoke_test = function() {
4-
54
n = 100; p = 50
65
X = matrix(rnorm(n * p), n, p)
76
y = rnorm(n)
@@ -10,6 +9,7 @@ smoke_test = function() {
109
ridge_term = .1 / sqrt(n)
1110
selectiveInference:::randomizedLASSO(X, y, lam, noise_scale, ridge_term)
1211
}
12+
1313
A = smoke_test()
1414

1515
sampler_test = function() {

0 commit comments

Comments
 (0)