Skip to content

Commit 855459e

Browse files
authored
Merge pull request #10 from StatFunGen/dev
Freeze ColocBoost output
2 parents ccb4e26 + def42f4 commit 855459e

12 files changed

+409
-645
lines changed

.DS_Store

-6 KB
Binary file not shown.

R/colocboost.R

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
#' @param dict_sumstatLD A L by 2 matrix of dictionary for \code{sumstat} and \code{LD} if there exist subsets of traits corresponding to the same sumstat.
3131
#' The first column should be 1:L for L sumstat The second column should be the index of \code{LD} corresponding to the sumstat.
3232
#' The innovation: do not provide the same matrix in \code{LD} to reduce the computational burden.
33-
#' @param traits_names The names of traits, which has the same order for Y.
33+
#' @param outcome_names The names of traits, which has the same order for Y.
3434
#' @param target_idx The index of the target trait if perform targeted ColocBoost
3535
#' @param effect_est Matrix of snp regression coefficients (i.e. regression beta values) in the genomic region
3636
#' @param effect_se Matrix of standard errors associated with the beta values
@@ -108,7 +108,7 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
108108
###### - index dict for X match multiple Y / LD match multiple sumstat
109109
dict_YX = NULL, # Y index for 1st column, X index for 2nd column
110110
dict_sumstatLD = NULL, # sumstat index for 1st column, LD index for 2nd column
111-
traits_names = NULL, # the names of traits
111+
outcome_names = NULL, # the names of outcomes
112112
###### - HyPrColoc input
113113
effect_est = NULL, # same as HyPrColoc, beta hat matrix: with rowname of snp names
114114
effect_se = NULL, # same as HyPrColoc, sebeta hat matrix with rowname of snp names
@@ -140,7 +140,7 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
140140
median_abs_corr = NULL,
141141
between_purity = 0.8, # minimum LD between two csets
142142
tol = 1e-9, # tol for LD
143-
merging = TRUE, # if merge two sets for one trait
143+
merging = TRUE, # if merge two sets for one outcome
144144
coverage_singlew = 0.8,
145145
lambda = 0.5, # the ratio for z^2 and z in weight penalty
146146
lambda_target = 1,
@@ -244,10 +244,10 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
244244
for (i in 1:length(Y)){
245245
tmp <- unique(dict_YX[dict_YX[,1]==i,2])
246246
if (length(tmp) == 0){
247-
warning(paste("Error: You don't provide matched X for trait", i))
247+
warning(paste("Error: You don't provide matched X for outcome", i))
248248
return(NULL)
249249
} else if (length(tmp) != 1){
250-
warning(paste("Error: You provide different matched X for trait", i))
250+
warning(paste("Error: You provide different matched X for outcome", i))
251251
return(NULL)
252252
} else {
253253
yx_dict[i] <- tmp
@@ -338,7 +338,7 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
338338
if ( !is.null(sumstat) ){
339339
if (is.data.frame(sumstat)){ sumstat <- list(sumstat) }
340340
if (!is.list(sumstat)){
341-
warning("Error: Input sumstat must be the list containing summary level data for all traits!")
341+
warning("Error: Input sumstat must be the list containing summary level data for all outcomes!")
342342
return(NULL)
343343
}
344344
# --- check if variants names in summary data
@@ -359,7 +359,7 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
359359

360360
# if no LD input, set diagonal matrix to LD
361361
warning("Providing the LD for summary statistics data is highly recommended. ",
362-
"Without LD, only a single iteration will be performed under the assumption of one causal variant per trait. ",
362+
"Without LD, only a single iteration will be performed under the assumption of one causal variant per outcome. ",
363363
"Additionally, the purity of CoS cannot be evaluated!")
364364

365365
p.sumstat <- sapply(keep.snp.sumstat, length)
@@ -431,7 +431,7 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
431431
warning("Providing the sample size (n), or even a rough estimate of n, ",
432432
"is highly recommended. Without n, the implicit assumption is ",
433433
"n is large (Inf) and the effect sizes are small (close to zero).",
434-
"Trait ", paste(p_no, collapse = ","), " in sumstat don't contain 'n'!")
434+
"outcome ", paste(p_no, collapse = ","), " in sumstat don't contain 'n'!")
435435
}
436436

437437
Z <- N_sumstat <- Var_y <- SeBhat <- vector(mode='list', length=length(sumstat))
@@ -500,20 +500,20 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
500500
min_variants <- min(sapply(keep.snps, length))
501501
if (min_variants < 100){
502502
warning("Warning message about the number of variants.\n",
503-
"The smallest number of variants across traits is ", min_variants, " <100. ",
503+
"The smallest number of variants across outcomes is ", min_variants, " <100. ",
504504
"If this is what you expected, this is not a problem.",
505505
"If this is not you expected, please check input data.")
506506
}
507507
if (length(overlapp_snps)<=1){
508-
warning("Error: No or only 1 overlapping variants were found across all traits, colocalization cannot be performed. ",
509-
"Please verify the variant names across different traits.")
508+
warning("Error: No or only 1 overlapping variants were found across all outcomes, colocalization cannot be performed. ",
509+
"Please verify the variant names across different outcomes.")
510510
return(NULL)
511511
} else if ( (length(overlapp_snps)/mean_variants)<0.1 ){
512512
warning("Warning message about the overlapped variants.\n",
513-
"The average number of variants across traits is ", mean_variants,
513+
"The average number of variants across outcomes is ", mean_variants,
514514
". But only ", length(overlapp_snps), " number of variants overlapped (<10%).\n",
515515
"If this is what you expected, this is not a problem.\n",
516-
"If this is not you expected, please check if the variant name matched across traits.")
516+
"If this is not you expected, please check if the variant name matched across outcomes.")
517517
}
518518
cb_data <- colocboost_init_data(X = X, Y = Y, dict_YX = yx_dict,
519519
Z = Z, LD = LD, N_sumstat = N_sumstat, dict_sumstatLD = sumstatLD_dict,
@@ -550,7 +550,7 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
550550
coloc_thres = coloc_thres,
551551
LD_obj = LD_obj,
552552
target_idx = target_idx,
553-
traits_names = traits_names)
553+
outcome_names = outcome_names)
554554

555555
# --- post-processing of the colocboost updates
556556
message("Starting post-hoc analyses and results summary.")

R/colocboost_init.R

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ colocboost_init_data <- function(X, Y, dict_YX,
5151
}
5252
if (target_variants & !is.null(target_idx)){
5353
if (target_idx > length(keep.snps)){
54-
stop("Target trait index is over the total number of traits! please check!")
54+
stop("Target outcome index is over the total number of outcomes! please check!")
5555
}
5656
keep.snp.names <- keep.snps[[dict[target_idx]]]
5757
if (overlap_varaints){
@@ -122,7 +122,7 @@ colocboost_init_data <- function(X, Y, dict_YX,
122122
tmp$X <- x_stand
123123
}
124124
cb_data$data[[flag]] <- tmp
125-
names(cb_data$data)[flag] <- paste0("ind_trait_",i)
125+
names(cb_data$data)[flag] <- paste0("ind_outcome_",i)
126126
flag = flag + 1
127127
}
128128
cb_data$dict = c(dict_YX_final)
@@ -223,7 +223,7 @@ colocboost_init_data <- function(X, Y, dict_YX,
223223

224224
}
225225
cb_data$data[[flag]] <- tmp
226-
names(cb_data$data)[flag] <- paste0("sumstat_trait_",i)
226+
names(cb_data$data)[flag] <- paste0("sumstat_outcome_",i)
227227
flag = flag + 1
228228

229229
}
@@ -337,7 +337,7 @@ colocboost_init_para <- function(cb_data, cb_model,tau=0.01,
337337
multicorrection_cut=1,
338338
func_multicorrection = "lfdr",
339339
LD_obj = FALSE,
340-
traits_names = NULL,
340+
outcome_names = NULL,
341341
target_idx = NULL){
342342

343343

@@ -346,16 +346,16 @@ colocboost_init_para <- function(cb_data, cb_model,tau=0.01,
346346
N <- sapply(cb_data$data, function(dt) dt$N)
347347
# - number of SNPs
348348
P <- if (!is.null(cb_data$data[[1]]$X)) ncol(cb_data$data[[1]]$X) else length(cb_data$data[[1]]$XtY)
349-
# - number of traits
349+
# - number of outcomes
350350
L <- length(cb_data$data)
351351
# - initial profile loglikelihood
352352
profile_loglike <- sum(sapply(1:length(cb_model), function(i) tail(cb_model[[i]]$profile_loglike_each, n=1)))
353-
# - check initial update trait
353+
# - check initial update outcome
354354
stop_null <- sapply(cb_model, function(tmp) min(tmp$multi_correction_univariate))
355355
pos_stop <- which(stop_null >= multicorrection_cut)
356356
update_y = rep(1, L)
357357
if (length(pos_stop) != 0){ update_y[pos_stop] <- 0 } else {pos_stop = NULL}
358-
if (!is.null(traits_names)){ traits_names = traits_names } else {traits_names = paste0("Y", 1:L)}
358+
if (!is.null(outcome_names)){ outcome_names = outcome_names } else {outcome_names = paste0("Y", 1:L)}
359359

360360
cb_model_para = list("L" = L,
361361
"P" = P,
@@ -371,8 +371,8 @@ colocboost_init_para <- function(cb_data, cb_model,tau=0.01,
371371
"true_stop" = pos_stop,
372372
"LD_obj" = LD_obj,
373373
"real_update_jk" = c(),
374-
"traits_names" = traits_names,
375-
"snp_names" = cb_data$snp.names,
374+
"outcome_names" = outcome_names,
375+
"variables" = cb_data$snp.names,
376376
"target_idx" = target_idx)
377377
class(cb_model_para) = "colocboost"
378378

Lines changed: 54 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,25 @@
11

22

3-
colocboost_onecausal <- function(cb_model, cb_model_para, cb_data,
4-
jk_equiv_cor = 0.8,
5-
jk_equiv_loglik = 1,
6-
tau = 0.01,
7-
decayrate = 1,
8-
func_prior = "z2z",
9-
lambda = 0.5,
10-
lambda_target = 1,
11-
LD_obj = FALSE){
3+
colocboost_one_causal <- function(cb_model, cb_model_para, cb_data,
4+
jk_equiv_cor = 0.8,
5+
jk_equiv_loglik = 1,
6+
tau = 0.01,
7+
decayrate = 1,
8+
func_prior = "z2z",
9+
lambda = 0.5,
10+
lambda_target = 1,
11+
LD_obj = FALSE){
1212

1313
if (jk_equiv_cor != 0){
14-
cb_obj <- colocboost_oneiteration(cb_model, cb_model_para, cb_data,
15-
jk_equiv_cor = jk_equiv_cor,
16-
jk_equiv_loglik = jk_equiv_loglik,
17-
tau = tau,
18-
decayrate = decayrate,
19-
func_prior = func_prior,
20-
lambda = lambda,
21-
lambda_target = lambda_target,
22-
LD_obj = LD_obj)
14+
cb_obj <- colocboost_one_iteration(cb_model, cb_model_para, cb_data,
15+
jk_equiv_cor = jk_equiv_cor,
16+
jk_equiv_loglik = jk_equiv_loglik,
17+
tau = tau,
18+
decayrate = decayrate,
19+
func_prior = func_prior,
20+
lambda = lambda,
21+
lambda_target = lambda_target,
22+
LD_obj = LD_obj)
2323
} else {
2424
cb_obj <- colocboost_diagLD(cb_model, cb_model_para, cb_data,
2525
jk_equiv_cor = jk_equiv_cor,
@@ -39,24 +39,24 @@ colocboost_onecausal <- function(cb_model, cb_model_para, cb_data,
3939

4040

4141
# under one causal per trait assumption with one iteration
42-
colocboost_oneiteration <- function(cb_model, cb_model_para, cb_data,
43-
jk_equiv_cor = 0.8,
44-
jk_equiv_loglik = 1,
45-
tau = 0.01,
46-
decayrate = 1,
47-
func_prior = "z2z",
48-
lambda = 0.5,
49-
lambda_target = 1,
50-
LD_obj = FALSE){
42+
colocboost_one_iteration <- function(cb_model, cb_model_para, cb_data,
43+
jk_equiv_cor = 0.8,
44+
jk_equiv_loglik = 1,
45+
tau = 0.01,
46+
decayrate = 1,
47+
func_prior = "z2z",
48+
lambda = 0.5,
49+
lambda_target = 1,
50+
LD_obj = FALSE){
5151

5252

5353
if (sum(cb_model_para$update_y == 1) != 0){
5454

5555
######## - some traits updated
5656
# - step 1: check update clusters
57-
real_update <- boost_check_update_jk_onecausal(cb_model, cb_model_para, cb_data,
58-
jk_equiv_cor = jk_equiv_cor,
59-
jk_equiv_loglik = jk_equiv_loglik)
57+
real_update <- boost_check_update_jk_one_causal(cb_model, cb_model_para, cb_data,
58+
jk_equiv_cor = jk_equiv_cor,
59+
jk_equiv_loglik = jk_equiv_loglik)
6060

6161
# - step 2: boost update
6262
for (i_update in 1:length(real_update)){
@@ -66,13 +66,13 @@ colocboost_oneiteration <- function(cb_model, cb_model_para, cb_data,
6666
cb_model_para$update_status <- cbind(cb_model_para$update_status, as.matrix(real_update[[i_update]]$update_status))
6767
cb_model_para$real_update_jk <- rbind(cb_model_para$real_update_jk, real_update[[i_update]]$real_update_jk)
6868
# - update cb_model
69-
cb_model <- boost_joint(cb_model, cb_model_para, cb_data,
70-
tau = tau,
71-
decayrate = decayrate,
72-
func_prior = func_prior,
73-
lambda = lambda,
74-
lambda_target = lambda_target,
75-
LD_obj = LD_obj)
69+
cb_model <- colocboost_update(cb_model, cb_model_para, cb_data,
70+
tau = tau,
71+
decayrate = decayrate,
72+
func_prior = func_prior,
73+
lambda = lambda,
74+
lambda_target = lambda_target,
75+
LD_obj = LD_obj)
7676
}
7777
}
7878
# -- remove redundant parameters
@@ -92,10 +92,10 @@ colocboost_oneiteration <- function(cb_model, cb_model_para, cb_data,
9292

9393

9494

95-
boost_check_update_jk_onecausal <- function(cb_model, cb_model_para, cb_data,
96-
prioritize_jkstar = TRUE,
97-
jk_equiv_cor = 0.8,
98-
jk_equiv_loglik = 1){
95+
boost_check_update_jk_one_causal <- function(cb_model, cb_model_para, cb_data,
96+
prioritize_jkstar = TRUE,
97+
jk_equiv_cor = 0.8,
98+
jk_equiv_loglik = 1){
9999

100100
pos.update <- which(cb_model_para$update_y == 1)
101101
update_jk <- rep(NA, cb_model_para$L+1)
@@ -195,9 +195,9 @@ colocboost_diagLD <- function(cb_model, cb_model_para, cb_data,
195195
cb_model_para$jk <- rbind(cb_model_para$jk, update_jk)
196196
cb_model_para$update_status <- cbind(cb_model_para$update_status, as.matrix(update_status))
197197
cb_model_para$real_update_jk <- rbind(cb_model_para$real_update_jk, real_update_jk)
198-
cb_model <- boost_joint(cb_model, cb_model_para, cb_data,
199-
tau = tau, decayrate = decayrate, func_prior = func_prior,
200-
lambda = lambda, lambda_target = lambda_target, LD_obj = LD_obj)
198+
cb_model <- colocboost_update(cb_model, cb_model_para, cb_data,
199+
tau = tau, decayrate = decayrate, func_prior = func_prior,
200+
lambda = lambda, lambda_target = lambda_target, LD_obj = LD_obj)
201201

202202
}
203203

@@ -225,9 +225,9 @@ colocboost_diagLD <- function(cb_model, cb_model_para, cb_data,
225225
cb_model_para$update_temp <- list("update_status" = update_status,
226226
"real_update_jk" = real_update_jk)
227227
# - update cb_model
228-
cb_model_tmp <- boost_joint(cb_model_tmp, cb_model_para, cb_data,
229-
tau = tau, decayrate = decayrate, func_prior = func_prior,
230-
lambda = lambda, lambda_target = lambda_target, LD_obj = LD_obj)
228+
cb_model_tmp <- colocboost_update(cb_model_tmp, cb_model_para, cb_data,
229+
tau = tau, decayrate = decayrate, func_prior = func_prior,
230+
lambda = lambda, lambda_target = lambda_target, LD_obj = LD_obj)
231231
weights <- rbind(weights, cb_model_tmp[[iy]]$weights_path)
232232
}
233233
###### overlap weights
@@ -275,13 +275,13 @@ colocboost_diagLD <- function(cb_model, cb_model_para, cb_data,
275275
cb_model_para$update_status <- cbind(cb_model_para$update_status, as.matrix(real_update[[i_update]]$update_status))
276276
cb_model_para$real_update_jk <- rbind(cb_model_para$real_update_jk, real_update[[i_update]]$real_update_jk)
277277
# - update cb_model
278-
cb_model <- boost_joint(cb_model, cb_model_para, cb_data,
279-
tau = tau,
280-
decayrate = decayrate,
281-
func_prior = func_prior,
282-
lambda = lambda,
283-
lambda_target = lambda_target,
284-
LD_obj = LD_obj)
278+
cb_model <- colocboost_update(cb_model, cb_model_para, cb_data,
279+
tau = tau,
280+
decayrate = decayrate,
281+
func_prior = func_prior,
282+
lambda = lambda,
283+
lambda_target = lambda_target,
284+
LD_obj = LD_obj)
285285
}
286286
}
287287
# -- remove redundant parameters

0 commit comments

Comments
 (0)