@@ -121,7 +121,7 @@ standardize_celltype_colnames <- function(mat) {
121121
122122 # # CD4 and subtypes
123123 lower <- stringr :: str_to_lower(colnames(mat ))
124- is_cd4 <- grepl(" \\ bcd4\\ b" , lower )
124+ is_cd4 <- grepl(" \\ bcd4\\ b|reg|regulatory " , lower )
125125 is_tcell_variant <- grepl(" (^|[^a-z0-9])(t|tcell|t\\ .cells|t_cells|t cells)([^a-z0-9]|$)" , lower , perl = TRUE )
126126 is_memory <- grepl(" memory" , lower )
127127 cd4_idx <- which(is_cd4 | (is_tcell_variant & is_memory ))
@@ -170,15 +170,15 @@ standardize_celltype_colnames <- function(mat) {
170170 colnames(blocks $ CD4.non.regulatory ) <- stringr :: str_replace(
171171 colnames(blocks $ CD4.non.regulatory ),
172172 " (?i)^(.*?)(?:_)?(T[_\\ .-]?cell[_\\ .-]?CD4[_\\ .-]?.*non[._-]?regulatory.*|T[_\\ .-]?cells?[_\\ .-]?.*non[._-]?regulatory.*|CD4[_\\ .-]?non[._-]?regulatory.*|T\\ .cells\\ .non\\ .regulatory|T_cells_non_regulatory|nonregulatory|non[\\ W_]?reg)$" ,
173- " \\ 1_CD4 .non.regulatory"
173+ " CD4 .non.regulatory"
174174 )
175175 }
176176 if (ncol(blocks $ CD4.regulatory )){
177177 mat <- mat [, ! colnames(mat ) %in% colnames(blocks $ CD4.regulatory ), drop = FALSE ]
178178 colnames(blocks $ CD4.regulatory ) <- stringr :: str_replace(
179179 colnames(blocks $ CD4.regulatory ),
180180 " (?i)^(.*?)(?:_)?(T[_\\ .-]?cell[_\\ .-]?regulatory.*|T[_\\ .-]?cells?[_\\ .-]?regulatory.*|T\\ .cells\\ .regulatory.*|tregs?\\ .?$|tregulatory.*|regulatory.*)$" ,
181- " \\ 1_CD4 .regulatory"
181+ " CD4 .regulatory"
182182 )
183183 }
184184
@@ -2987,6 +2987,21 @@ create_gsea_signature <- function(gene_scores,
29872987 pos <- fg %> %
29882988 dplyr :: filter(! is.na(NES ) & NES > 0 ) %> % # Keep positive NES
29892989 dplyr :: arrange(dplyr :: desc(NES )) # Arrange by descending NES
2990+
2991+ # fg_top <- fg %>%
2992+ # dplyr::filter(!is.na(NES)) %>%
2993+ # dplyr::arrange(dplyr::desc(dplyr::abs(NES))) %>%
2994+ # dplyr::slice_head(n = 20) %>%
2995+ # dplyr::mutate(pathway = factor(pathway, levels = rev(pathway)),
2996+ # sig = -log10(padj + 1e-300))
2997+
2998+ # ggplot(fg_top, aes(x = NES, y = pathway, size = size, color = sig)) +
2999+ # geom_point() +
3000+ # scale_color_viridis_c(name = "-log10(padj)") +
3001+ # scale_size_continuous(name = "pathway size") +
3002+ # labs(title = "FGSEA — top 20 pathways", x = "NES", y = NULL) +
3003+ # theme_minimal(base_size = 12)
3004+
29903005 if (nrow(pos ) == 0 ) stop(" No pathways with NES > 0 found" )
29913006 first_pathway <- as.character(pos $ pathway [1 ])
29923007 suffix <- stringr :: str_replace_all(first_pathway , " [^A-Za-z0-9]+" , " _" ) # Replace non-alphanumeric characters with underscores
@@ -3009,7 +3024,7 @@ expand_subgroup_members <- function(subgroup, subgroup_map) {
30093024}
30103025
30113026
3012- compute_deconvolution_dictionary <- function (subgroups , expr ) {
3027+ compute_deconvolution_dictionary <- function (subgroups , expr , pathways = NULL ) {
30133028
30143029 subgroup_map <- subgroups [[" Deconvolution subgroups composition" ]]
30153030 deconv_mat = subgroups [[" Deconvolution matrix" ]]
@@ -3029,7 +3044,7 @@ compute_deconvolution_dictionary <- function(subgroups, expr) {
30293044 deconv = deconv_mat ,
30303045 subgroup = sub_name ) # Compute correlation rankings for the subgroup
30313046
3032- sig_out <- create_gsea_signature(ranked , sub_name ) # create pathwyas signature
3047+ sig_out <- create_gsea_signature(ranked , sub_name , pathways ) # create pathwyas signature
30333048 if (is.null(sig_out )) next # No enrichment found, skip to next subgroup
30343049 new_label <- sig_out [[1 ]]
30353050 idx <- which(colnames(deconv_mat ) == sub_name ) # replace column name in deconv_mat if present
@@ -3047,21 +3062,3 @@ compute_deconvolution_dictionary <- function(subgroups, expr) {
30473062
30483063 return (subgroups )
30493064}
3050-
3051- library(ggplot2 )
3052- library(dplyr )
3053- library(viridis )
3054- # FGSEA dotplot (top 20 by |NES|)
3055- fg_top <- fgseaRes | >
3056- filter(! is.na(NES )) | >
3057- arrange(desc(abs(NES ))) | >
3058- slice_head(n = 20 ) | >
3059- mutate(pathway = factor (pathway , levels = rev(pathway )),
3060- sig = - log10(padj + 1e-300 ))
3061-
3062- ggplot(fg_top , aes(x = NES , y = pathway , size = size , color = sig )) +
3063- geom_point() +
3064- scale_color_viridis_c(name = " -log10(padj)" ) +
3065- scale_size_continuous(name = " pathway size" ) +
3066- labs(title = " FGSEA — top 20 pathways" , x = " NES" , y = NULL ) +
3067- theme_minimal(base_size = 12 )
0 commit comments