From 1ddb5f05ba86b0d082ba0d6d4eae09a5371bef20 Mon Sep 17 00:00:00 2001
From: bednarsky <72978444+bednarsky@users.noreply.github.com>
Date: Thu, 2 Oct 2025 14:05:58 +0200
Subject: [PATCH] Add summary plot for specific terms. Remove summary heatmaps.
 Update docs #34 #35 #32

---
 README.md                                     |   9 +-
 workflow/Snakefile                            |  18 +-
 .../report/summary_plot_specificTerms.rst     |   1 +
 ...ary_plot.rst => summary_plot_topTerms.rst} |   0
 workflow/rules/aggregate.smk                  |  18 +-
 workflow/scripts/aggregate.py                 |  16 +-
 workflow/scripts/overview_plot.R              | 344 +++++++++---------
 7 files changed, 207 insertions(+), 199 deletions(-)
 create mode 100644 workflow/report/summary_plot_specificTerms.rst
 rename workflow/report/{summary_plot.rst => summary_plot_topTerms.rst} (100%)

diff --git a/README.md b/README.md
index 60122eb..3b99e9d 100644
--- a/README.md
+++ b/README.md
@@ -120,8 +120,9 @@ The five tools LOLA, GREAT, pycisTarget, RcisTarget and GSEApy (over-representat
         - effect-size is presented by the x-axis position
         - overlap is presented by the dot size
     - group summary/overview
-        - the union of the top `{top_terms_n}` most significant terms per query, method, and database within a group is determined. 
-        - their effect-size (effect) and statistical significance (adjp) are visualized as hierarchically clustered heatmaps, with statistical significance denoted by `\*` (PDF).
+        - two plots: 
+            - top terms: the union of the top `{top_terms_n}` most significant terms per query, method, and database within a group is determined. 
+            - specific terms: the union of statistically significant terms with the lowest average significance across all other groups is determined. This plot is empty if no statistically significant terms are found.
         - a hierarchically clustered bubble plot encoding both effect-size (color) and significance (size) is provided, with statistical significance denoted by `\*` (PNG).
         - all summary visualizations are configured to cap the values (`{adjp_cap}`/`{or_cap}`/`{nes_cap}`) to avoid shifts in the coloring scheme caused by outliers.
 - **results** (`{result_path}/enrichment_analysis`)
@@ -131,9 +132,7 @@ The five tools LOLA, GREAT, pycisTarget, RcisTarget and GSEApy (over-representat
         - enrichment dot plot (PNG): `{query}\_{database}.{png}`
     - `{group}/{method}/{database}/` containing
         - aggregated result table (CSV): `{group}\_{database}\_all.csv`
-        - filtered aggregated result table (CSV): `{group}\_{database}\_sig.csv`
-        - hierarchically clustered heatmaps visualizing statistical significance and effect-sizes of the top `{top_terms_n}` terms (PDF): `{group}\_{database}\_{adjp|effect}\_heatmap.pdf`
-        - hierarchically clustered bubble plot visualizing statistical significance and effect-sizes simultaneously (PNG):  `{group}\_{database}\_summary.{png}`
+        - hierarchically clustered bubble plot visualizing statistical significance and effect-sizes simultaneously (PNG):  `{group}\_{database}\_summary_{topTerms|specificTerms}.{png}`. In case of only one query gene/region set, this plot is empty.
 
 Note:
 - Despite usage of the correct parameter, **rGREAT** was not using the provided cores during testing. Nevertheless, it is still provided as parameter.
diff --git a/workflow/Snakefile b/workflow/Snakefile
index e902ea8..5a970e0 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -92,12 +92,18 @@ rule all:
         expand(os.path.join(result_path, '{gene_set}', 'preranked_GSEApy','{db}','{gene_set}_{db}.csv'), gene_set=rnk_dict.keys(), db=database_dict.keys()),
         expand(os.path.join(result_path, '{gene_set}', 'preranked_GSEApy','{db}','{gene_set}_{db}.png'), gene_set=rnk_dict.keys(), db=database_dict.keys()),
         # summaries
-        expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary.png'),group=list(set(genes["group"].tolist()+regions["group"].tolist())), tool='ORA_GSEApy', db=database_dict.keys()),
-        expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary.png'),group=rnk["group"].unique(), tool='preranked_GSEApy', db=database_dict.keys()),
-        expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary.png'),group=regions["group"].unique(), tool='GREAT', db=database_dict.keys()),
-        expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary.png'),group=regions["group"].unique(), tool='LOLA', db=lola_db_dict.keys()),
-        expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary.png'),group=regions["group"].unique(), tool='pycisTarget', db=pycistarget_db_dict.keys()),
-        expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary.png'),group=list(set(genes["group"].tolist()+regions["group"].tolist())), tool='RcisTarget', db=rcistarget_db_dict.keys()),
+        expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_topTerms.png'),group=list(set(genes["group"].tolist()+regions["group"].tolist())), tool='ORA_GSEApy', db=database_dict.keys()),
+        expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_specificTerms.png'),group=list(set(genes["group"].tolist()+regions["group"].tolist())), tool='ORA_GSEApy', db=database_dict.keys()),
+        expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_topTerms.png'),group=rnk["group"].unique(), tool='preranked_GSEApy', db=database_dict.keys()),
+        expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_specificTerms.png'),group=rnk["group"].unique(), tool='preranked_GSEApy', db=database_dict.keys()),
+        expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_topTerms.png'),group=regions["group"].unique(), tool='GREAT', db=database_dict.keys()),
+        expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_specificTerms.png'),group=regions["group"].unique(), tool='GREAT', db=database_dict.keys()),
+        expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_topTerms.png'),group=regions["group"].unique(), tool='LOLA', db=lola_db_dict.keys()),
+        expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_specificTerms.png'),group=regions["group"].unique(), tool='LOLA', db=lola_db_dict.keys()),
+        expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_topTerms.png'),group=regions["group"].unique(), tool='pycisTarget', db=pycistarget_db_dict.keys()),
+        expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_specificTerms.png'),group=regions["group"].unique(), tool='pycisTarget', db=pycistarget_db_dict.keys()),
+        expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_topTerms.png'),group=list(set(genes["group"].tolist()+regions["group"].tolist())), tool='RcisTarget', db=rcistarget_db_dict.keys()),
+        expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_specificTerms.png'),group=list(set(genes["group"].tolist()+regions["group"].tolist())), tool='RcisTarget', db=rcistarget_db_dict.keys()),
         # config
         envs = expand(os.path.join(result_path,'envs','{env}.yaml'),env=['region_enrichment_analysis','gene_enrichment_analysis','visualization','pycisTarget','RcisTarget']),
         configs = os.path.join(result_path,'configs','{}_config.yaml'.format(config["project_name"])),
diff --git a/workflow/report/summary_plot_specificTerms.rst b/workflow/report/summary_plot_specificTerms.rst
new file mode 100644
index 0000000..2bca841
--- /dev/null
+++ b/workflow/report/summary_plot_specificTerms.rst
@@ -0,0 +1 @@
+Summary of the most specific enrichment analysis results of group {{snakemake.wildcards["group"]}} in database {{snakemake.wildcards["db"]}} using {{snakemake.wildcards["tool"]}}.
\ No newline at end of file
diff --git a/workflow/report/summary_plot.rst b/workflow/report/summary_plot_topTerms.rst
similarity index 100%
rename from workflow/report/summary_plot.rst
rename to workflow/report/summary_plot_topTerms.rst
diff --git a/workflow/rules/aggregate.smk b/workflow/rules/aggregate.smk
index 338f0e8..11d84b3 100644
--- a/workflow/rules/aggregate.smk
+++ b/workflow/rules/aggregate.smk
@@ -5,7 +5,6 @@ rule aggregate:
         enrichment_results = get_group_paths,
     output:
         results_all = os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_all.csv'),
-        results_sig = os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_sig.csv'),
     threads: config.get("threads", 1)
     resources:
         mem_mb=config.get("mem", "16000"),
@@ -21,8 +20,19 @@ rule visualize:
     input:
         results_all = os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_all.csv'),
     output:
-        summary_plot = report(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary.png'),
-                             caption="../report/summary_plot.rst", 
+        summary_plot_topTerms = report(
+                             os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_topTerms.png'),
+                             caption="../report/summary_plot_topTerms.rst", 
+                             category="{}_{}".format(config["project_name"], module_name),
+                             subcategory="{group}",
+                               labels={
+                                  "name": "{tool}",
+                                  "type": "summary plot",
+                                  "misc": "{db}",
+                              }),
+        summary_plot_specificTerms = report(
+                             os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_specificTerms.png'),
+                             caption="../report/summary_plot_specificTerms.rst", 
                              category="{}_{}".format(config["project_name"], module_name),
                              subcategory="{group}",
                                labels={
@@ -30,8 +40,6 @@ rule visualize:
                                   "type": "summary plot",
                                   "misc": "{db}",
                               }),
-        adjp_hm = os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_adjp_heatmap.pdf'),
-        effect_hm = os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_effect_heatmap.pdf'),
     params:
         utils_path = workflow.source_path("../scripts/utils.R")
     threads: config.get("threads", 1)
diff --git a/workflow/scripts/aggregate.py b/workflow/scripts/aggregate.py
index f995965..c9ab4f5 100644
--- a/workflow/scripts/aggregate.py
+++ b/workflow/scripts/aggregate.py
@@ -12,7 +12,6 @@
 
 # output
 results_all_path = snakemake.output['results_all']
-results_sig_path = snakemake.output['results_sig']
 
 # parameters
 group = snakemake.wildcards["group"]
@@ -41,23 +40,10 @@
 # move on if results are empty
 if len(results_list)==0:
     open(results_all_path, mode='a').close()
-    open(results_sig_path, mode='a').close()
     sys.exit(0)
         
 # concatenate all results into one results dataframe
 result_df = pd.concat(results_list, axis=0)
 
 # save all enirchment results
-result_df.to_csv(results_all_path)
-
-# find union of statistically significant terms
-if tool=="pycisTarget" or tool=="RcisTarget":
-    sig_terms = result_df.loc[result_df[adjp_col] >= adjp_th, term_col].unique()
-else:
-    sig_terms = result_df.loc[result_df[adjp_col] <= adjp_th, term_col].unique()
-
-# filter by significant terms
-result_sig_df = result_df.loc[result_df[term_col].isin(sig_terms), :]
-
-# save filtered enirchment results by significance
-result_sig_df.to_csv(results_sig_path)
+result_df.to_csv(results_all_path)
\ No newline at end of file
diff --git a/workflow/scripts/overview_plot.R b/workflow/scripts/overview_plot.R
index b7ee44a..39ff79d 100644
--- a/workflow/scripts/overview_plot.R
+++ b/workflow/scripts/overview_plot.R
@@ -17,9 +17,8 @@ source(snakemake@params[["utils_path"]])
 results_all_path <- snakemake@input[["results_all"]]
 
 # output
-plot_path <- snakemake@output[["summary_plot"]]
-adjp_hm_path <- snakemake@output[["adjp_hm"]]
-effect_hm_path <- snakemake@output[["effect_hm"]]
+plot_path_topTerms <- snakemake@output[["summary_plot_topTerms"]]
+plot_path_specificTerms <- snakemake@output[["summary_plot_specificTerms"]]
 
 # parameters
 tool <- snakemake@wildcards[["tool"]]
@@ -38,9 +37,8 @@ cluster_flag <- as.logical(as.numeric(snakemake@config[["cluster_summary"]]))
 
 # stop early if results are empty
 if(file.size(results_all_path) == 0L){
-    file.create(plot_path)
-    file.create(adjp_hm_path)
-    file.create(effect_hm_path)
+    file.create(plot_path_topTerms)
+    file.create(plot_path_specificTerms)
     quit(save = "no", status = 0)
 }
 
@@ -49,187 +47,197 @@ results_all <- data.frame(fread(file.path(results_all_path), header=TRUE))
 
 # stop early if results consist of only one query
 if(length(unique(results_all$name))==1){
-    file.create(plot_path)
-    file.create(adjp_hm_path)
-    file.create(effect_hm_path)
+    file.create(plot_path_topTerms)
+    file.create(plot_path_specificTerms)
     quit(save = "no", status = 0)
 }
 
 # remove empty terms
 results_all <- results_all[results_all[[term_col]]!="",]
 
-# determine top_n most significant terms (not necessarily statistically significant!)
-top_terms <- c()
-for (query in unique(results_all$name)){
-    tmp_result <- results_all[results_all$name==query,]
-    top_n_tmp <- min(top_n, nrow(tmp_result))
-    
+make_summary_plot <- function(type = "topTerms"){
+    # determine terms to plot
+    top_terms <- c()
+    if (type == "topTerms"){
+        # pick top_n most significant terms per query (not necessarily statistically significant!)
+        for (query in unique(results_all$name)){
+            tmp_result <- results_all[results_all$name==query,]
+            top_n_tmp <- min(top_n, nrow(tmp_result))
+            if(tool=="pycisTarget" | tool=="RcisTarget"){
+                tmp_terms <- tmp_result[order(-tmp_result[[adjp_col]]), term_col][1:top_n_tmp]
+            }else{
+                tmp_terms <- tmp_result[order(tmp_result[[adjp_col]]), term_col][1:top_n_tmp]
+            }
+            top_terms <- unique(c(top_terms,tmp_terms))
+        }
+    } else if (type == "specificTerms"){
+        # for each query, take significant terms that have the lowest average -log10(adj-p) across all other queries
+        all_queries <- unique(results_all$name)
+        for (query in all_queries){
+            tmp_result <- results_all[results_all$name==query,]
+            if(tool=="pycisTarget" | tool=="RcisTarget"){
+                sig_mask <- tmp_result[[adjp_col]] >= adjp_th
+                # invert, to get the ranking right
+                tmp_result[[adjp_col]] <- -tmp_result[[adjp_col]]
+            } else {
+                # significant in current query
+                sig_mask <- tmp_result[[adjp_col]] <= adjp_th
+                # log10 transform p-values to avoid influence of small p-values
+                tmp_result[[adjp_col]] <- -log10(tmp_result[[adjp_col]])
+            }
+            sig_terms <- tmp_result[sig_mask, term_col]
+            if (length(sig_terms) == 0){
+                next
+            }
+            other_queries <- setdiff(all_queries, query)
+            # compute average across other queries
+            avg_other <- sapply(sig_terms, function(term){
+                vals <- sapply(other_queries, function(q){
+                    v <- results_all[results_all$name==q & results_all[[term_col]]==term, adjp_col]
+                    if (length(v) == 0) return(NA) else return(v[1])
+                })
+                if (tool=="pycisTarget" | tool=="RcisTarget"){
+                    vals[is.na(vals)] <- 0
+                    mean(vals)
+                } else {
+                    vals[is.na(vals)] <- 1
+                    mean(-log10(vals))
+                }
+            })
+            ordered_terms <- names(sort(avg_other, decreasing = FALSE))
+            top_n_tmp <- min(top_n, length(ordered_terms))
+            tmp_terms <- ordered_terms[1:top_n_tmp]
+            top_terms <- unique(c(top_terms, tmp_terms))
+        }
+        if (length(top_terms) == 0){
+            file.create(plot_path_specificTerms)
+            return()
+        }
+    } else {
+        stop("Unknown type: ", type)
+    }
+
+    # make adjusted p-vale and effect-size (odds-ratio or normalized enrichment scores) dataframes
+    adjp_df <- dcast(results_all, as.formula(paste(term_col, "~ name")), value.var = adjp_col)
+    rownames(adjp_df) <- adjp_df[[term_col]]
+    adjp_df[[term_col]] <- NULL
+    adjp_df <- adjp_df[,unique(results_all$name)]
+
+    effect_df <- dcast(results_all, as.formula(paste(term_col, "~ name")), value.var = effect_col)
+    rownames(effect_df) <- effect_df[[term_col]]
+    effect_df[[term_col]] <- NULL
+    effect_df <- effect_df[,unique(results_all$name)]
+
+    # filter by selected terms
+    adjp_df <- adjp_df[top_terms,]
+    effect_df <- effect_df[top_terms,]
+
+    # fill NA for effect_df with 1 or 0 (i.e., neutral enrichment) and for adjp_df with 1 (i.e., no significance)
+    effect_df[is.na(effect_df)] <- if (tool=="preranked_GSEApy" | tool=="pycisTarget" | tool=="RcisTarget") 0 else 1
+    adjp_df[is.na(adjp_df)] <- if (tool=="pycisTarget" | tool=="RcisTarget") 0 else 1
+
+    # make stat. sign. annotation for effect-size plot later
+    adjp_annot <- adjp_df
     if(tool=="pycisTarget" | tool=="RcisTarget"){
-        tmp_terms <- tmp_result[order(-tmp_result[[adjp_col]]), term_col][1:top_n_tmp]
+        adjp_annot[adjp_df >= adjp_th] <- "*"
+        adjp_annot[adjp_df < adjp_th] <- ""
     }else{
-        tmp_terms <- tmp_result[order(tmp_result[[adjp_col]]), term_col][1:top_n_tmp]
+        adjp_annot[adjp_df <= adjp_th] <- "*"
+        adjp_annot[adjp_df > adjp_th] <- ""
     }
-    
-    top_terms <- unique(c(top_terms,tmp_terms))
-}
 
-# make adjusted p-vale and effect-size (odds-ratio or normalized enrichment scores) dataframes
-adjp_df <- dcast(results_all, as.formula(paste(term_col, "~ name")), value.var = adjp_col)
-rownames(adjp_df) <- adjp_df[[term_col]]
-adjp_df[[term_col]] <- NULL
-adjp_df <- adjp_df[,unique(results_all$name)]
-
-effect_df <- dcast(results_all, as.formula(paste(term_col, "~ name")), value.var = effect_col)
-rownames(effect_df) <- effect_df[[term_col]]
-effect_df[[term_col]] <- NULL
-effect_df <- effect_df[,unique(results_all$name)]
-
-# filter by top terms
-adjp_df <- adjp_df[top_terms,]
-effect_df <- effect_df[top_terms,]
-                                    
-# fill NA for effect_df with 1 or 0 (i.e., neutral enrichment) and for adjp_df with 1 (i.e., no significance)
-effect_df[is.na(effect_df)] <- if (tool=="preranked_GSEApy" | tool=="pycisTarget" | tool=="RcisTarget") 0 else 1
-adjp_df[is.na(adjp_df)] <- if (tool=="pycisTarget" | tool=="RcisTarget") 0 else 1
-                                     
-# make stat. sign. annotation for effect-size plot later
-adjp_annot <- adjp_df
-if(tool=="pycisTarget" | tool=="RcisTarget"){
-    adjp_annot[adjp_df >= adjp_th] <- "*"
-    adjp_annot[adjp_df < adjp_th] <- ""
-}else{
-    adjp_annot[adjp_df <= adjp_th] <- "*"
-    adjp_annot[adjp_df > adjp_th] <- ""
-}
+    # log2 transform odds ratios
+    if (tool!="preranked_GSEApy" & tool!="pycisTarget" & tool!="RcisTarget"){
+        effect_df <- log2(effect_df)
+    }
 
-# log2 transform odds ratios
-if (tool!="preranked_GSEApy" & tool!="pycisTarget" & tool!="RcisTarget"){
-    effect_df <- log2(effect_df)
-}
+    # transform and cap adjp and effect size
+    if(tool!="pycisTarget" & tool!="RcisTarget"){
+        # cap effect_df for plotting depending on tool  abs(log2(or)) < or_cap OR abs(NES) < nes_cap
+        effect_df[effect_df > effect_cap] <- effect_cap
+        effect_df[effect_df < -effect_cap] <- -effect_cap
 
-# transform and cap adjp and effect size
-if(tool!="pycisTarget" & tool!="RcisTarget"){
-    # cap effect_df for plotting depending on tool  abs(log2(or)) < or_cap OR abs(NES) < nes_cap
-    effect_df[effect_df > effect_cap] <- effect_cap
-    effect_df[effect_df < -effect_cap] <- -effect_cap
+        # log10 transform adjp & cap -log10(adjpvalue) < adjp_cap
+        adjp_df <- -log10(adjp_df)
+        adjp_df[adjp_df > adjp_cap] <- adjp_cap
+    }
 
-    # log10 transform adjp & cap -log10(adjpvalue) < adjp_cap
-    adjp_df <- -log10(adjp_df)
-    adjp_df[adjp_df > adjp_cap] <- adjp_cap
-}
-                                     
-# plot hierarchically clustered heatmap for adjp and effect
-width_hm <- 0.2 * dim(adjp_df)[2] + 5
-height_hm <- 0.2 * dim(adjp_df)[1] + 3
-
-pheatmap(adjp_df,
-         display_numbers=adjp_annot,
-         main= if (tool=="pycisTarget" | tool=="RcisTarget") adjp_col else"-log10(adj. p-values)",
-         treeheight_row = 10,
-         treeheight_col = 10,
-         fontsize = 6,
-         fontsize_number = 10,
-         cluster_rows = ifelse(nrow(adjp_df)>1, TRUE, FALSE),
-         cluster_cols = ifelse(ncol(adjp_df)>1, cluster_flag, FALSE),
-         silent=TRUE,
-         width=width_hm,
-         height=height_hm,
-         angle_col=45, 
-         cellwidth=10,
-         cellheight=10,
-         filename=adjp_hm_path,
-         breaks= if (max(adjp_df)==0) seq(0, 1, length.out=200) else seq(0, max(adjp_df), length.out=200),
-         color=colorRampPalette(c("white", "red"))(200)
-        )
-
-pheatmap(effect_df,
-         display_numbers=adjp_annot,
-         main = if (tool=="preranked_GSEApy" | tool=="pycisTarget" | tool=="RcisTarget") effect_col else paste0("log2(",effect_col,")"),
-         treeheight_row = 10,
-         treeheight_col = 10,
-         fontsize = 6,
-         fontsize_number = 10,
-         cluster_rows = ifelse(nrow(effect_df)>1, TRUE, FALSE),
-         cluster_cols = ifelse(ncol(effect_df)>1, cluster_flag, FALSE),
-         silent=TRUE,
-         width=width_hm,
-         height=height_hm,
-         angle_col=45, 
-         cellwidth=10,
-         cellheight=10,
-         filename=effect_hm_path,
-         breaks=seq(-max(abs(effect_df)), max(abs(effect_df)), length.out=200),
-         color=colorRampPalette(c("blue", "white", "red"))(200)
-        )
-
-# perform hierarchical clustering on the effect-sizes (NES or log2 odds ratios) of the terms and reorder dataframe
-if (nrow(effect_df)>1){
-    hc_rows <- hclust(dist(effect_df))
-    hc_row_names <- rownames(effect_df)[hc_rows$order]
-    effect_df <- effect_df[hc_rows$order,]
-}else{
-    hc_row_names <- rownames(effect_df)
-}
+    # perform hierarchical clustering on the effect-sizes (NES or log2 odds ratios) of the terms and reorder dataframe
+    if (nrow(effect_df)>1){
+        hc_rows <- hclust(dist(effect_df))
+        hc_row_names <- rownames(effect_df)[hc_rows$order]
+        effect_df <- effect_df[hc_rows$order,]
+    }else{
+        hc_row_names <- rownames(effect_df)
+    }
 
-if (ncol(effect_df)>1 & cluster_flag){
-    hc_cols <- hclust(dist(t(effect_df)))
-    hc_col_names <- colnames(effect_df)[hc_cols$order]
-    effect_df <- effect_df[, hc_cols$order]
-} else{
-    hc_col_names <- colnames(effect_df)
-}
+    if (ncol(effect_df)>1 & cluster_flag){
+        hc_cols <- hclust(dist(t(effect_df)))
+        hc_col_names <- colnames(effect_df)[hc_cols$order]
+        effect_df <- effect_df[, hc_cols$order]
+    } else{
+        hc_col_names <- colnames(effect_df)
+    }
 
-# add a column for the terms
-effect_df$terms <- rownames(effect_df)
-# melt data frame for plotting
-plot_df <- melt(data=effect_df,
-                id.vars="terms",
-                measure.vars=colnames(adjp_df),
-                variable.name = "feature_set",
-               value.name = "effect")
-
-# add adjusted p-values to plot dataframe
-plot_df$adjp <- apply(plot_df, 1, function(x) adjp_df[x['terms'], x['feature_set']])
-
-# set effect-size and adjusted p-value conditional to NA (odds ratios == 0 and NES == 0) for plotting
-plot_df$effect[plot_df$effect==0] <- NA
-plot_df$adjp[plot_df$adjp==0] <- NA
-
-# ensure that the order of terms and feature sets is kept
-plot_df$terms <- factor(plot_df$terms,levels=hc_row_names)
-if (cluster_flag){
-    plot_df$feature_set <- factor(plot_df$feature_set, levels=hc_col_names)
-}
+    # add a column for the terms
+    effect_df$terms <- rownames(effect_df)
+    # melt data frame for plotting
+    plot_df <- melt(data=effect_df,
+                    id.vars="terms",
+                    measure.vars=colnames(adjp_df),
+                    variable.name = "feature_set",
+                    value.name = "effect")
+
+    # add adjusted p-values to plot dataframe
+    plot_df$adjp <- apply(plot_df, 1, function(x) adjp_df[x['terms'], x['feature_set']])
+
+    # set effect-size and adjusted p-value conditional to NA (odds ratios == 0 and NES == 0) for plotting
+    plot_df$effect[plot_df$effect==0] <- NA
+    plot_df$adjp[plot_df$adjp==0] <- NA
+
+    # ensure that the order of terms and feature sets is kept
+    plot_df$terms <- factor(plot_df$terms,levels=hc_row_names)
+    if (cluster_flag){
+        plot_df$feature_set <- factor(plot_df$feature_set, levels=hc_col_names)
+    }
 
-# stat. significance star df
-if(tool=="pycisTarget" | tool=="RcisTarget"){
-    adjp_star_df <- plot_df[(!is.na(plot_df$adjp)) & (plot_df$adjp >= adjp_th),]
-}else{
-    adjp_star_df <- plot_df[(!is.na(plot_df$adjp)) & (plot_df$adjp >= -log10(adjp_th)),]
-}
+    # stat. significance star df
+    if(tool=="pycisTarget" | tool=="RcisTarget"){
+        adjp_star_df <- plot_df[(!is.na(plot_df$adjp)) & (plot_df$adjp >= adjp_th),]
+    }else{
+        adjp_star_df <- plot_df[(!is.na(plot_df$adjp)) & (plot_df$adjp >= -log10(adjp_th)),]
+    }
 
-# plot
-enr_plot <- ggplot(plot_df, aes(x=feature_set, y=terms, fill=effect, size=adjp))+ 
-        geom_point(shape=21, stroke=0.25) +
-        geom_point(data = adjp_star_df, aes(x=feature_set, y=terms), shape=8, size=0.5, color = "black", alpha = 0.5) + # stars for statistical significance
-        scale_fill_gradient2(midpoint=0, low="royalblue4", mid="white", high="firebrick2", space ="Lab", name = if (tool=="preranked_GSEApy" | tool=="pycisTarget" | tool=="RcisTarget") effect_col else paste0("log2(",effect_col,")")) +
-        scale_y_discrete(label=addline_format) + 
-        scale_size_continuous(range = c(1,5), name = if (tool=="pycisTarget" | tool=="RcisTarget") adjp_col else paste("-log10(",adjp_col,")")) +
-        ggtitle(paste(tool, database, group, sep='\n')) +
-        clean_theme() +
-        theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust=1),
-              axis.title.x=element_blank(),
-              axis.title.y=element_blank()) + guides(size = guide_legend(reverse=TRUE))
-
-width <- 0.15 * dim(adjp_df)[2] + 3
-height <- 0.2 * dim(adjp_df)[1] + 2
-# options(repr.plot.width=width, repr.plot.height=height)
-# enr_plot
-                      
-# save plot
-ggsave_new(filename = paste0(group,'_',database,'_summary'),
+    # plot
+    enr_plot <- ggplot(plot_df, aes(x=feature_set, y=terms, fill=effect, size=adjp))+ 
+            geom_point(shape=21, stroke=0.25) +
+            geom_point(data = adjp_star_df, aes(x=feature_set, y=terms), shape=8, size=0.5, color = "black", alpha = 0.5) +
+            scale_fill_gradient2(midpoint=0, low="royalblue4", mid="white", high="firebrick2", space ="Lab", name = if (tool=="preranked_GSEApy" | tool=="pycisTarget" | tool=="RcisTarget") effect_col else paste0("log2(",effect_col,")")) +
+            scale_y_discrete(label=addline_format) + 
+            scale_size_continuous(range = c(1,5), name = if (tool=="pycisTarget" | tool=="RcisTarget") adjp_col else paste("-log10(",adjp_col,")")) +
+            ggtitle(paste(tool, database, group, sep='\n')) +
+            clean_theme() +
+            theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust=1),
+                  axis.title.x=element_blank(),
+                  axis.title.y=element_blank()) + guides(size = guide_legend(reverse=TRUE))
+
+    width <- 0.15 * dim(adjp_df)[2] + 3
+    height <- 0.2 * dim(adjp_df)[1] + 2
+
+    # save plot
+    if (type == "topTerms"){
+        plot_path <- plot_path_topTerms
+    } else {
+        plot_path <- plot_path_specificTerms
+    }
+    ggsave_new(filename = paste0(group,'_',database,'_summary_', type),
                results_path = dirname(plot_path),
                plot = enr_plot,
                width = width,
                height = height
               )
+}
+
+# create both plots
+make_summary_plot(type = "topTerms")
+make_summary_plot(type = "specificTerms")