@@ -61,74 +61,77 @@ finemap_pval_LD <- function(df, bfile, pval_threshold = 5E-8, clump_r2 = 0.001)
6161# ' \item \code{X_column_scale_factors} A vector of SNP identifiers (e.g., SNP names).
6262# ' \item \code{pip} A named vector of Posterior Inclusion Probability (PIP) values, indexed by SNP names.
6363# ' }
64+ # ' @param df A data frame containing SNP information, with the following columns:
65+ # ' \itemize{
66+ # ' \item \code{SNP} The SNP identifiers.
67+ # ' \item \code{POS} The position of each SNP.
68+ # ' \item \code{P} The p-value associated with each SNP.
69+ # ' }
6470# ' @return A tibble (data frame) containing the following columns:
6571# ' \itemize{
66- # ' \item \code{SNP} The SNP identifiers
72+ # ' \item \code{SNP} The SNP identifiers.
73+ # ' \item \code{POS} The position of each SNP.
74+ # ' \item \code{P} The p-value for each SNP.
6775# ' \item \code{PIP} The Posterior Inclusion Probability for each SNP.
6876# ' \item \code{cs_snps} A string listing the other SNPs in the same credible set (NA if only one SNP).
6977# ' \item \code{cs} The credible set identifier, represented as a factor.
70- # ' \item \code{test} The label "susie", indicating the test used for generating the table.
78+ # ' \item \code{test} A string indicating the test used for generating the table (always "susie") .
7179# ' \item \code{label} The SNP identifier for the SNP with the highest PIP value within each credible set (NA for other SNPs).
7280# ' }
7381# ' @examples
7482# ' \dontrun{
75- # ' result <- susieR_cs_table(susie_model)
83+ # ' result <- susieR_cs_table(susie_model, df )
7684# ' print(result)
7785# ' }
7886# ' @export
79- susieR_cs_table <- function (susieR_model ) {
87+ susieR_cs_table <- function (susieR_model , df ) {
8088 # Extract the credible sets
8189 cs_list <- susieR_model $ sets $ cs
90+ pip <- data.frame (SNP = names(susieR_model $ pip ), pip_value = susieR_model $ pip )
8291
83- # Process each credible set
84- credible_sets <- purrr :: map(cs_list , ~ {
85- snp_indices <- .x
86- snp_ids <- names(susieR_model $ X_column_scale_factors )[snp_indices ]
87- pip_values <- susieR_model $ pip [snp_ids ]
92+ table <- data.frame (
93+ SNP = df $ SNP ,
94+ POS = df $ POS ,
95+ P = df $ P
96+ ) %> %
97+ left_join(pip , by = " SNP" )
8898
89- # Get other SNPs (exclude the current SNP from the list)
90- other_snps <- if (length(snp_ids ) > 1 ) {
91- purrr :: map(snp_ids , ~ paste(setdiff(snp_ids , .x ), collapse = " , " ))
92- } else {
93- rep(NA , length(snp_ids ))
94- }
99+ # Initialize the `cs` column to store credible set labels
100+ table $ cs <- NA
95101
96- # Return a tibble with the SNPs and corresponding PIP values
97- tibble :: tibble(SNP = snp_ids , PIP = pip_values , cs_snps = unlist(other_snps ), test = " susie" )
98- })
102+ # Assign credible set labels (e.g., "L1", "L2") to the `cs` column
103+ for (i in seq_along(cs_list )) {
104+ row_indices <- cs_list [[i ]] # Extract row numbers for this list item
105+ table $ cs [row_indices ] <- paste0(" L" , i ) # Assign the label to these rows
106+ }
99107
100- # Combine all credible sets into one dataframe and add the `cs` label
101- df <- purrr :: imap_dfr(credible_sets , ~ {
102- .x %> %
103- # Create the `cs` column
104- dplyr :: mutate(cs = as.factor(.y )) %> %
105- # Group by `cs` to find the max PIP for each group
106- dplyr :: group_by(cs ) %> %
107- # Create the label only for the row with the highest PIP value
108- dplyr :: mutate(label = if_else(PIP == max(PIP ), SNP , NA_character_ )) %> %
109- # Ungroup to avoid issues with further operations
110- dplyr :: ungroup()
111- })
108+ # Initialize the `cs_snps` column to store SNPs in the same credible set
109+ table $ cs_snps <- NA
112110
113- # Prepare the table for `table_susie`
114- snp_ids <- names(susieR_model $ X_column_scale_factors )
115- pip_values <- susieR_model $ pip [snp_ids ]
116- other_snps <- if (length(snp_ids ) > 1 ) {
117- purrr :: map(snp_ids , ~ paste(setdiff(snp_ids , .x ), collapse = " , " ))
118- } else {
119- rep(NA , length(snp_ids ))
120- }
111+ # Populate the `cs_snps` column with comma-separated SNPs in the same credible set
112+ for (i in seq_along(cs_list )) {
113+ row_indices <- cs_list [[i ]] # Row indices for the current credible set
114+ snps <- table $ SNP [row_indices ] # Extract SNP values for these rows
121115
122- table_susie <- tibble :: tibble(SNP = snp_ids , PIP = pip_values , test = " susie" )
116+ # Assign concatenated SNPs excluding the current SNP for each row
117+ for (row in row_indices ) {
118+ other_snps <- setdiff(snps , table $ SNP [row ])
119+ if (length(other_snps ) > 0 ) {
120+ table $ cs_snps [row ] <- paste(other_snps , collapse = " , " )
121+ }
122+ }
123+ }
123124
124- # Combine `df` with `table_susie`
125- finemap_susie_table <- table_susie %> %
126- left_join(df , by = " SNP" ) %> %
127- select(SNP , PIP.x , cs_snps , cs , test.x , label ) %> %
128- rename(PIP = PIP.x ,
129- test = test.x )
125+ # Add a label for the SNP with the smallest p-value in each credible set
126+ table <- table %> %
127+ dplyr :: filter(! is.na(cs )) %> % # Exclude rows where cs is NA
128+ dplyr :: group_by(cs ) %> %
129+ dplyr :: mutate(label = if_else(P == min(P ), SNP , NA_character_ )) %> % # Identify SNP with smallest p-value
130+ dplyr :: ungroup() %> %
131+ dplyr :: bind_rows(table %> % dplyr :: filter(is.na(cs ))) # Add back rows with NA cs
130132
131- return (finemap_susie_table )
133+ # Return the final table
134+ return (table )
132135}
133136
134137# ' Create a Table of Credible Sets from Finimom Model
@@ -163,43 +166,45 @@ finimom_cs_table <- function(finimom_model, df) {
163166 cs_list <- finimom_model $ sets
164167
165168 # Create the initial table with SNP details and PIP values
166- finemap_finimom_table <- data.frame (
169+ table <- data.frame (
167170 SNP = df $ SNP , # SNP identifiers
168171 POS = df $ POS , # SNP positions
169172 P = df $ P , # SNP p-values
170173 PIP = finimom_model $ pip # Posterior inclusion probabilities
171174 )
172175
173176 # Initialize the `cs` column to store credible set labels
174- finemap_finimom_table $ cs <- NA
177+ table $ cs <- NA
175178
176179 # Assign credible set labels (e.g., "L1", "L2") to the `cs` column
177180 for (i in seq_along(cs_list )) {
178181 row_indices <- cs_list [[i ]] # Extract row numbers for this list item
179- finemap_finimom_table $ cs [row_indices ] <- paste0(" L" , i ) # Assign the label to these rows
182+ table $ cs [row_indices ] <- paste0(" L" , i ) # Assign the label to these rows
180183 }
181184
182185 # Initialize the `cs_snps` column to store SNPs in the same credible set
183- finemap_finimom_table $ cs_snps <- NA
186+ table $ cs_snps <- NA
184187
185188 # Populate the `cs_snps` column with comma-separated SNPs in the same credible set
186189 for (i in seq_along(cs_list )) {
187190 row_indices <- cs_list [[i ]] # Row indices for the current credible set
188- snps <- finemap_finimom_table $ SNP [row_indices ] # Extract SNP values for these rows
191+ snps <- table $ SNP [row_indices ] # Extract SNP values for these rows
189192
190193 # Assign concatenated SNPs excluding the current SNP for each row
191194 for (row in row_indices ) {
192- finemap_finimom_table $ cs_snps [row ] <- paste(setdiff(snps , finemap_finimom_table $ SNP [row ]), collapse = " , " )
195+ table $ cs_snps [row ] <- paste(setdiff(snps , table $ SNP [row ]), collapse = " , " )
193196 }
194197 }
195198
196199 # Add a label for the SNP with the smallest p-value in each credible set
197- finemap_finimom_table <- finemap_finimom_table %> %
200+ table <- table %> %
201+ dplyr :: filter(! is.na(cs )) %> % # Exclude rows where cs is NA
198202 dplyr :: group_by(cs ) %> %
199203 dplyr :: mutate(label = if_else(P == min(P ), SNP , NA_character_ )) %> % # Identify SNP with smallest p-value
200- dplyr :: ungroup()
204+ dplyr :: ungroup() %> %
205+ dplyr :: bind_rows(table %> % dplyr :: filter(is.na(cs ))) # Add back rows with NA cs
201206
202207 # Return the final table
203- return (finemap_finimom_table )
208+ return (table )
204209}
205210
0 commit comments