epigen · bednarsky · Oct 2, 2025
diff --git a/README.md b/README.md
@@ -120,8 +120,9 @@ The five tools LOLA, GREAT, pycisTarget, RcisTarget and GSEApy (over-representat
         - effect-size is presented by the x-axis position
         - overlap is presented by the dot size
     - group summary/overview
-        - the union of the top `{top_terms_n}` most significant terms per query, method, and database within a group is determined. 
-        - their effect-size (effect) and statistical significance (adjp) are visualized as hierarchically clustered heatmaps, with statistical significance denoted by `\*` (PDF).
+        - two plots: 
+            - top terms: the union of the top `{top_terms_n}` most significant terms per query, method, and database within a group is determined. 
+            - specific terms: the union of statistically significant terms with the lowest average significance across all other groups is determined. This plot is empty if no statistically significant terms are found.
         - a hierarchically clustered bubble plot encoding both effect-size (color) and significance (size) is provided, with statistical significance denoted by `\*` (PNG).
         - all summary visualizations are configured to cap the values (`{adjp_cap}`/`{or_cap}`/`{nes_cap}`) to avoid shifts in the coloring scheme caused by outliers.
 - **results** (`{result_path}/enrichment_analysis`)
@@ -131,9 +132,7 @@ The five tools LOLA, GREAT, pycisTarget, RcisTarget and GSEApy (over-representat
         - enrichment dot plot (PNG): `{query}\_{database}.{png}`
     - `{group}/{method}/{database}/` containing
         - aggregated result table (CSV): `{group}\_{database}\_all.csv`
-        - filtered aggregated result table (CSV): `{group}\_{database}\_sig.csv`
-        - hierarchically clustered heatmaps visualizing statistical significance and effect-sizes of the top `{top_terms_n}` terms (PDF): `{group}\_{database}\_{adjp|effect}\_heatmap.pdf`
-        - hierarchically clustered bubble plot visualizing statistical significance and effect-sizes simultaneously (PNG):  `{group}\_{database}\_summary.{png}`
+        - hierarchically clustered bubble plot visualizing statistical significance and effect-sizes simultaneously (PNG):  `{group}\_{database}\_summary_{topTerms|specificTerms}.{png}`. In case of only one query gene/region set, this plot is empty.
 
 Note:
 - Despite usage of the correct parameter, **rGREAT** was not using the provided cores during testing. Nevertheless, it is still provided as parameter.

diff --git a/workflow/Snakefile b/workflow/Snakefile
@@ -92,12 +92,18 @@ rule all:
         expand(os.path.join(result_path, '{gene_set}', 'preranked_GSEApy','{db}','{gene_set}_{db}.csv'), gene_set=rnk_dict.keys(), db=database_dict.keys()),
         expand(os.path.join(result_path, '{gene_set}', 'preranked_GSEApy','{db}','{gene_set}_{db}.png'), gene_set=rnk_dict.keys(), db=database_dict.keys()),
         # summaries
-        expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary.png'),group=list(set(genes["group"].tolist()+regions["group"].tolist())), tool='ORA_GSEApy', db=database_dict.keys()),
-        expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary.png'),group=rnk["group"].unique(), tool='preranked_GSEApy', db=database_dict.keys()),
-        expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary.png'),group=regions["group"].unique(), tool='GREAT', db=database_dict.keys()),
-        expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary.png'),group=regions["group"].unique(), tool='LOLA', db=lola_db_dict.keys()),
-        expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary.png'),group=regions["group"].unique(), tool='pycisTarget', db=pycistarget_db_dict.keys()),
-        expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary.png'),group=list(set(genes["group"].tolist()+regions["group"].tolist())), tool='RcisTarget', db=rcistarget_db_dict.keys()),
+        expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_topTerms.png'),group=list(set(genes["group"].tolist()+regions["group"].tolist())), tool='ORA_GSEApy', db=database_dict.keys()),
+        expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_specificTerms.png'),group=list(set(genes["group"].tolist()+regions["group"].tolist())), tool='ORA_GSEApy', db=database_dict.keys()),
+        expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_topTerms.png'),group=rnk["group"].unique(), tool='preranked_GSEApy', db=database_dict.keys()),
+        expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_specificTerms.png'),group=rnk["group"].unique(), tool='preranked_GSEApy', db=database_dict.keys()),
+        expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_topTerms.png'),group=regions["group"].unique(), tool='GREAT', db=database_dict.keys()),
+        expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_specificTerms.png'),group=regions["group"].unique(), tool='GREAT', db=database_dict.keys()),
+        expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_topTerms.png'),group=regions["group"].unique(), tool='LOLA', db=lola_db_dict.keys()),
+        expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_specificTerms.png'),group=regions["group"].unique(), tool='LOLA', db=lola_db_dict.keys()),
+        expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_topTerms.png'),group=regions["group"].unique(), tool='pycisTarget', db=pycistarget_db_dict.keys()),
+        expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_specificTerms.png'),group=regions["group"].unique(), tool='pycisTarget', db=pycistarget_db_dict.keys()),
+        expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_topTerms.png'),group=list(set(genes["group"].tolist()+regions["group"].tolist())), tool='RcisTarget', db=rcistarget_db_dict.keys()),
+        expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_specificTerms.png'),group=list(set(genes["group"].tolist()+regions["group"].tolist())), tool='RcisTarget', db=rcistarget_db_dict.keys()),
         # config
         envs = expand(os.path.join(result_path,'envs','{env}.yaml'),env=['region_enrichment_analysis','gene_enrichment_analysis','visualization','pycisTarget','RcisTarget']),
         configs = os.path.join(result_path,'configs','{}_config.yaml'.format(config["project_name"])),

diff --git a/workflow/report/summary_plot_specificTerms.rst b/workflow/report/summary_plot_specificTerms.rst
@@ -0,0 +1 @@
+Summary of the most specific enrichment analysis results of group {{snakemake.wildcards["group"]}} in database {{snakemake.wildcards["db"]}} using {{snakemake.wildcards["tool"]}}.
diff --git a/workflow/report/summary_plot.rst → workflow/report/summary_plot_topTerms.rst b/workflow/report/summary_plot.rst → workflow/report/summary_plot_topTerms.rst
diff --git a/workflow/rules/aggregate.smk b/workflow/rules/aggregate.smk
@@ -5,7 +5,6 @@ rule aggregate:
         enrichment_results = get_group_paths,
     output:
         results_all = os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_all.csv'),
-        results_sig = os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_sig.csv'),
     threads: config.get("threads", 1)
     resources:
         mem_mb=config.get("mem", "16000"),
@@ -21,17 +20,26 @@ rule visualize:
     input:
         results_all = os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_all.csv'),
     output:
-        summary_plot = report(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary.png'),
-                             caption="../report/summary_plot.rst", 
+        summary_plot_topTerms = report(
+                             os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_topTerms.png'),
+                             caption="../report/summary_plot_topTerms.rst", 
+                             category="{}_{}".format(config["project_name"], module_name),
+                             subcategory="{group}",
+                               labels={
+                                  "name": "{tool}",
+                                  "type": "summary plot",
+                                  "misc": "{db}",
+                              }),
+        summary_plot_specificTerms = report(
+                             os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_specificTerms.png'),
+                             caption="../report/summary_plot_specificTerms.rst", 
                              category="{}_{}".format(config["project_name"], module_name),
                              subcategory="{group}",
                                labels={
                                   "name": "{tool}",
                                   "type": "summary plot",
                                   "misc": "{db}",
                               }),
-        adjp_hm = os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_adjp_heatmap.pdf'),
-        effect_hm = os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_effect_heatmap.pdf'),
     params:
         utils_path = workflow.source_path("../scripts/utils.R")
     threads: config.get("threads", 1)

diff --git a/workflow/scripts/aggregate.py b/workflow/scripts/aggregate.py
@@ -12,7 +12,6 @@
 
 # output
 results_all_path = snakemake.output['results_all']
-results_sig_path = snakemake.output['results_sig']
 
 # parameters
 group = snakemake.wildcards["group"]
@@ -41,23 +40,10 @@
 # move on if results are empty
 if len(results_list)==0:
     open(results_all_path, mode='a').close()
-    open(results_sig_path, mode='a').close()
     sys.exit(0)
 
 # concatenate all results into one results dataframe
 result_df = pd.concat(results_list, axis=0)
 
 # save all enirchment results
-result_df.to_csv(results_all_path)
-
-# find union of statistically significant terms
-if tool=="pycisTarget" or tool=="RcisTarget":
-    sig_terms = result_df.loc[result_df[adjp_col] >= adjp_th, term_col].unique()
-else:
-    sig_terms = result_df.loc[result_df[adjp_col] <= adjp_th, term_col].unique()
-
-# filter by significant terms
-result_sig_df = result_df.loc[result_df[term_col].isin(sig_terms), :]
-
-# save filtered enirchment results by significance
-result_sig_df.to_csv(results_sig_path)
+result_df.to_csv(results_all_path)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Summary of the most specific enrichment analysis results of group {{snakemake.wildcards["group"]}} in database {{snakemake.wildcards["db"]}} using {{snakemake.wildcards["tool"]}}.