|
26 | 26 | #' @importFrom S4Vectors queryHits subjectHits |
27 | 27 | #' @importFrom IRanges IRanges findOverlaps start end reduce |
28 | 28 | #' @export |
29 | | -harmonize_twas <- function(twas_weights_data, ld_meta_file_path, gwas_meta_file) { |
| 29 | +harmonize_twas <- function(twas_weights_data, ld_meta_file_path, gwas_meta_file, column_file_path = NULL, comment_string = "#") { |
30 | 30 | # Function to group contexts based on start and end positions |
31 | 31 | group_contexts_by_region <- function(twas_weights_data, molecular_id, chrom, tolerance = 5000) { |
32 | 32 | region_info_df <- do.call(rbind, lapply(names(twas_weights_data$weights), function(context) { |
@@ -150,7 +150,9 @@ harmonize_twas <- function(twas_weights_data, ld_meta_file_path, gwas_meta_file) |
150 | 150 | # Step 3: load GWAS data for clustered context groups |
151 | 151 | for (study in names(gwas_files)) { |
152 | 152 | gwas_file <- gwas_files[study] |
153 | | - gwas_data_sumstats <- harmonize_gwas(gwas_file, query_region=query_region, LD_list$combined_LD_variants, c("beta", "z"), match_min_prop = 0) |
| 153 | + gwas_data_sumstats <- harmonize_gwas(gwas_file, query_region=query_region, |
| 154 | + LD_list$combined_LD_variants, c("beta", "z"), |
| 155 | + match_min_prop = 0, column_file_path = column_file_path, comment_string = comment_string) |
154 | 156 | if(is.null(gwas_data_sumstats)) next |
155 | 157 | # loop through context within the context group: |
156 | 158 | for (context in contexts) { |
@@ -247,9 +249,19 @@ harmonize_twas <- function(twas_weights_data, ld_meta_file_path, gwas_meta_file) |
247 | 249 | #' @param query_region A string for region of query for tabix-indexed gwas summary statistics file in the format of chr:start-end |
248 | 250 | #' @noRd |
249 | 251 | #' @export |
250 | | -harmonize_gwas <- function(gwas_file, query_region, ld_variants, col_to_flip=NULL, match_min_prop=0){ |
| 252 | +harmonize_gwas <- function(gwas_file, query_region, ld_variants, col_to_flip=NULL, match_min_prop=0, column_file_path=NULL, comment_string="#"){ |
251 | 253 | if(is.null(gwas_file)| is.na(gwas_file)) stop("No GWAS file path provided. ") |
252 | | - gwas_data_sumstats <- as.data.frame(tabix_region(gwas_file, query_region)) # extension for yml file for column name mapping |
| 254 | + if (!is.null(column_file_path)) { |
| 255 | + rss_result <- load_rss_data( |
| 256 | + sumstat_path = gwas_file, |
| 257 | + column_file_path = column_file_path, |
| 258 | + region = query_region, |
| 259 | + comment_string = comment_string |
| 260 | + ) |
| 261 | + gwas_data_sumstats <- rss_result$sumstats |
| 262 | + } else { |
| 263 | + gwas_data_sumstats <- as.data.frame(tabix_region(gwas_file, query_region)) |
| 264 | + } |
253 | 265 | if (nrow(gwas_data_sumstats) == 0) { |
254 | 266 | if (length(names(gwas_file))==0) names(gwas_file) <- gwas_file |
255 | 267 | warning(paste0("No GWAS summary statistics found for the region of ", query_region, " in ", names(gwas_file), ". ")) |
@@ -287,7 +299,9 @@ twas_pipeline <- function(twas_weights_data, |
287 | 299 | mr_coverage_column = "cs_coverage_0.95", |
288 | 300 | quantile_twas = FALSE, |
289 | 301 | output_twas_data = FALSE, |
290 | | - event_filters=NULL) { |
| 302 | + event_filters=NULL, |
| 303 | + column_file_path = NULL, |
| 304 | + comment_string="#") { |
291 | 305 | # internal function to format TWAS output |
292 | 306 | format_twas_data <- function(post_qc_twas_data, twas_table) { |
293 | 307 | weights_list <- do.call(c, lapply(names(post_qc_twas_data), function(molecular_id) { |
@@ -421,7 +435,7 @@ twas_pipeline <- function(twas_weights_data, |
421 | 435 | } |
422 | 436 |
|
423 | 437 | # harmonize twas weights and gwas sumstats against LD |
424 | | - twas_data_qced_result <- harmonize_twas(twas_weights_data, ld_meta_file_path, gwas_meta_file) |
| 438 | + twas_data_qced_result <- harmonize_twas(twas_weights_data, ld_meta_file_path, gwas_meta_file, column_file_path = column_file_path, comment_string = comment_string) |
425 | 439 | twas_results_db <- lapply(names(twas_weights_data), function(weight_db) { |
426 | 440 | twas_weights_data[[weight_db]][["molecular_id"]] <- weight_db |
427 | 441 | twas_data_qced <- twas_data_qced_result$twas_data_qced |
|
0 commit comments