Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,9 @@ The five tools LOLA, GREAT, pycisTarget, RcisTarget and GSEApy (over-representat
- effect-size is presented by the x-axis position
- overlap is presented by the dot size
- group summary/overview
- the union of the top `{top_terms_n}` most significant terms per query, method, and database within a group is determined.
- their effect-size (effect) and statistical significance (adjp) are visualized as hierarchically clustered heatmaps, with statistical significance denoted by `\*` (PDF).
- two plots:
- top terms: the union of the top `{top_terms_n}` most significant terms per query, method, and database within a group is determined.
- specific terms: the union of statistically significant terms with the lowest average significance across all other groups is determined. This plot is empty if no statistically significant terms are found.
- a hierarchically clustered bubble plot encoding both effect-size (color) and significance (size) is provided, with statistical significance denoted by `\*` (PNG).
- all summary visualizations are configured to cap the values (`{adjp_cap}`/`{or_cap}`/`{nes_cap}`) to avoid shifts in the coloring scheme caused by outliers.
- **results** (`{result_path}/enrichment_analysis`)
Expand All @@ -131,9 +132,7 @@ The five tools LOLA, GREAT, pycisTarget, RcisTarget and GSEApy (over-representat
- enrichment dot plot (PNG): `{query}\_{database}.{png}`
- `{group}/{method}/{database}/` containing
- aggregated result table (CSV): `{group}\_{database}\_all.csv`
- filtered aggregated result table (CSV): `{group}\_{database}\_sig.csv`
- hierarchically clustered heatmaps visualizing statistical significance and effect-sizes of the top `{top_terms_n}` terms (PDF): `{group}\_{database}\_{adjp|effect}\_heatmap.pdf`
- hierarchically clustered bubble plot visualizing statistical significance and effect-sizes simultaneously (PNG): `{group}\_{database}\_summary.{png}`
- hierarchically clustered bubble plot visualizing statistical significance and effect-sizes simultaneously (PNG): `{group}\_{database}\_summary_{topTerms|specificTerms}.{png}`. In case of only one query gene/region set, this plot is empty.

Note:
- Despite usage of the correct parameter, **rGREAT** was not using the provided cores during testing. Nevertheless, it is still provided as parameter.
Expand Down
18 changes: 12 additions & 6 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -92,12 +92,18 @@ rule all:
expand(os.path.join(result_path, '{gene_set}', 'preranked_GSEApy','{db}','{gene_set}_{db}.csv'), gene_set=rnk_dict.keys(), db=database_dict.keys()),
expand(os.path.join(result_path, '{gene_set}', 'preranked_GSEApy','{db}','{gene_set}_{db}.png'), gene_set=rnk_dict.keys(), db=database_dict.keys()),
# summaries
expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary.png'),group=list(set(genes["group"].tolist()+regions["group"].tolist())), tool='ORA_GSEApy', db=database_dict.keys()),
expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary.png'),group=rnk["group"].unique(), tool='preranked_GSEApy', db=database_dict.keys()),
expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary.png'),group=regions["group"].unique(), tool='GREAT', db=database_dict.keys()),
expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary.png'),group=regions["group"].unique(), tool='LOLA', db=lola_db_dict.keys()),
expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary.png'),group=regions["group"].unique(), tool='pycisTarget', db=pycistarget_db_dict.keys()),
expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary.png'),group=list(set(genes["group"].tolist()+regions["group"].tolist())), tool='RcisTarget', db=rcistarget_db_dict.keys()),
expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_topTerms.png'),group=list(set(genes["group"].tolist()+regions["group"].tolist())), tool='ORA_GSEApy', db=database_dict.keys()),
expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_specificTerms.png'),group=list(set(genes["group"].tolist()+regions["group"].tolist())), tool='ORA_GSEApy', db=database_dict.keys()),
expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_topTerms.png'),group=rnk["group"].unique(), tool='preranked_GSEApy', db=database_dict.keys()),
expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_specificTerms.png'),group=rnk["group"].unique(), tool='preranked_GSEApy', db=database_dict.keys()),
expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_topTerms.png'),group=regions["group"].unique(), tool='GREAT', db=database_dict.keys()),
expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_specificTerms.png'),group=regions["group"].unique(), tool='GREAT', db=database_dict.keys()),
expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_topTerms.png'),group=regions["group"].unique(), tool='LOLA', db=lola_db_dict.keys()),
expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_specificTerms.png'),group=regions["group"].unique(), tool='LOLA', db=lola_db_dict.keys()),
expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_topTerms.png'),group=regions["group"].unique(), tool='pycisTarget', db=pycistarget_db_dict.keys()),
expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_specificTerms.png'),group=regions["group"].unique(), tool='pycisTarget', db=pycistarget_db_dict.keys()),
expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_topTerms.png'),group=list(set(genes["group"].tolist()+regions["group"].tolist())), tool='RcisTarget', db=rcistarget_db_dict.keys()),
expand(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_specificTerms.png'),group=list(set(genes["group"].tolist()+regions["group"].tolist())), tool='RcisTarget', db=rcistarget_db_dict.keys()),
# config
envs = expand(os.path.join(result_path,'envs','{env}.yaml'),env=['region_enrichment_analysis','gene_enrichment_analysis','visualization','pycisTarget','RcisTarget']),
configs = os.path.join(result_path,'configs','{}_config.yaml'.format(config["project_name"])),
Expand Down
1 change: 1 addition & 0 deletions workflow/report/summary_plot_specificTerms.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Summary of the most specific enrichment analysis results of group {{snakemake.wildcards["group"]}} in database {{snakemake.wildcards["db"]}} using {{snakemake.wildcards["tool"]}}.
18 changes: 13 additions & 5 deletions workflow/rules/aggregate.smk
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ rule aggregate:
enrichment_results = get_group_paths,
output:
results_all = os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_all.csv'),
results_sig = os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_sig.csv'),
threads: config.get("threads", 1)
resources:
mem_mb=config.get("mem", "16000"),
Expand All @@ -21,17 +20,26 @@ rule visualize:
input:
results_all = os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_all.csv'),
output:
summary_plot = report(os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary.png'),
caption="../report/summary_plot.rst",
summary_plot_topTerms = report(
os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_topTerms.png'),
caption="../report/summary_plot_topTerms.rst",
category="{}_{}".format(config["project_name"], module_name),
subcategory="{group}",
labels={
"name": "{tool}",
"type": "summary plot",
"misc": "{db}",
}),
summary_plot_specificTerms = report(
os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_summary_specificTerms.png'),
caption="../report/summary_plot_specificTerms.rst",
category="{}_{}".format(config["project_name"], module_name),
subcategory="{group}",
labels={
"name": "{tool}",
"type": "summary plot",
"misc": "{db}",
}),
adjp_hm = os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_adjp_heatmap.pdf'),
effect_hm = os.path.join(result_path,'{group}','{tool}','{db}','{group}_{db}_effect_heatmap.pdf'),
params:
utils_path = workflow.source_path("../scripts/utils.R")
threads: config.get("threads", 1)
Expand Down
16 changes: 1 addition & 15 deletions workflow/scripts/aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@

# output
results_all_path = snakemake.output['results_all']
results_sig_path = snakemake.output['results_sig']

# parameters
group = snakemake.wildcards["group"]
Expand Down Expand Up @@ -41,23 +40,10 @@
# move on if results are empty
if len(results_list)==0:
open(results_all_path, mode='a').close()
open(results_sig_path, mode='a').close()
sys.exit(0)

# concatenate all results into one results dataframe
result_df = pd.concat(results_list, axis=0)

# save all enirchment results
result_df.to_csv(results_all_path)

# find union of statistically significant terms
if tool=="pycisTarget" or tool=="RcisTarget":
sig_terms = result_df.loc[result_df[adjp_col] >= adjp_th, term_col].unique()
else:
sig_terms = result_df.loc[result_df[adjp_col] <= adjp_th, term_col].unique()

# filter by significant terms
result_sig_df = result_df.loc[result_df[term_col].isin(sig_terms), :]

# save filtered enirchment results by significance
result_sig_df.to_csv(results_sig_path)
result_df.to_csv(results_all_path)
Loading