diff --git a/.github/workflows/test-spras.yml b/.github/workflows/test-spras.yml index daa9b10cf..0f7f49129 100644 --- a/.github/workflows/test-spras.yml +++ b/.github/workflows/test-spras.yml @@ -94,6 +94,9 @@ jobs: # race conditions from #268 and #279 # We also enforce strict DAG evaluation to catch DAG problems before they appear as user errors. (#359) run: snakemake --cores 4 --configfile config/config.yaml --show-failed-logs --strict-dag-evaluation cyclic-graph --strict-dag-evaluation functions --strict-dag-evaluation periodic-wildcards + - name: Collect Snakemake workflow report + shell: bash --login {0} + run: snakemake --configfile config/config.yaml --report report.zip # Run pre-commit checks on source files pre-commit: diff --git a/.gitignore b/.gitignore index 3629c49c6..49cefe540 100644 --- a/.gitignore +++ b/.gitignore @@ -134,6 +134,8 @@ dmypy.json # Snakemake .snakemake/ +report.html +report.zip # Output files output/ diff --git a/Snakefile b/Snakefile index 02f019e8d..c093791ef 100644 --- a/Snakefile +++ b/Snakefile @@ -290,7 +290,12 @@ rule parse_output: input: raw_file = SEP.join([out_dir, '{dataset}-{algorithm}-{params}', 'raw-pathway.txt']), dataset_file = SEP.join([out_dir, 'dataset-{dataset}-merged.pickle']) - output: standardized_file = SEP.join([out_dir, '{dataset}-{algorithm}-{params}', 'pathway.txt']) + output: + standardized_file = report( + SEP.join([out_dir, '{dataset}-{algorithm}-{params}', 'pathway.txt']), + category="dataset-{dataset}", + subcategory="Reconstructed Output" + ) run: params = reconstruction_params(wildcards.algorithm, wildcards.params).copy() params['dataset'] = input.dataset_file @@ -311,7 +316,11 @@ rule parse_output: rule viz_cytoscape: input: pathways = expand('{out_dir}{sep}{{dataset}}-{algorithm_params}{sep}pathway.txt', out_dir=out_dir, sep=SEP, algorithm_params=algorithms_with_params) output: - session = SEP.join([out_dir, '{dataset}-cytoscape.cys']) + session = report( + SEP.join([out_dir, '{dataset}-cytoscape.cys']), + category="dataset-{dataset}", + subcategory="Visualization" + ) run: cytoscape.run_cytoscape(input.pathways, output.session, FRAMEWORK) @@ -322,7 +331,8 @@ rule summary_table: # Collect all pathways generated for the dataset pathways = expand('{out_dir}{sep}{{dataset}}-{algorithm_params}{sep}pathway.txt', out_dir=out_dir, sep=SEP, algorithm_params=algorithms_with_params), dataset_file = SEP.join([out_dir, 'dataset-{dataset}-merged.pickle']) - output: summary_table = SEP.join([out_dir, '{dataset}-pathway-summary.txt']) + output: + summary_table = report(SEP.join([out_dir, '{dataset}-pathway-summary.txt']), category="dataset-{dataset}", subcategory="Summary") run: # Load the node table from the pickled dataset file node_table = Dataset.from_file(input.dataset_file).node_table @@ -334,13 +344,13 @@ rule ml_analysis: input: pathways = expand('{out_dir}{sep}{{dataset}}-{algorithm_params}{sep}pathway.txt', out_dir=out_dir, sep=SEP, algorithm_params=algorithms_with_params) output: - pca_image = SEP.join([out_dir, '{dataset}-ml', 'pca.png']), - pca_variance= SEP.join([out_dir, '{dataset}-ml', 'pca-variance.txt']), - pca_coordinates = SEP.join([out_dir, '{dataset}-ml', 'pca-coordinates.txt']), - hac_image_vertical = SEP.join([out_dir, '{dataset}-ml', 'hac-vertical.png']), - hac_clusters_vertical = SEP.join([out_dir, '{dataset}-ml', 'hac-clusters-vertical.txt']), - hac_image_horizontal = SEP.join([out_dir, '{dataset}-ml', 'hac-horizontal.png']), - hac_clusters_horizontal = SEP.join([out_dir, '{dataset}-ml', 'hac-clusters-horizontal.txt']), + pca_image = report(SEP.join([out_dir, '{dataset}-ml', 'pca.png']), category="dataset-{dataset}", subcategory="ML"), + pca_variance = report(SEP.join([out_dir, '{dataset}-ml', 'pca-variance.txt']), category="dataset-{dataset}", subcategory="ML"), + pca_coordinates = report(SEP.join([out_dir, '{dataset}-ml', 'pca-coordinates.txt']), category="dataset-{dataset}", subcategory="ML"), + hac_image_vertical = report(SEP.join([out_dir, '{dataset}-ml', 'hac-vertical.png']), category="dataset-{dataset}", subcategory="ML"), + hac_clusters_vertical = report(SEP.join([out_dir, '{dataset}-ml', 'hac-clusters-vertical.txt']), category="dataset-{dataset}", subcategory="ML"), + hac_image_horizontal = report(SEP.join([out_dir, '{dataset}-ml', 'hac-horizontal.png']), category="dataset-{dataset}", subcategory="ML"), + hac_clusters_horizontal = report(SEP.join([out_dir, '{dataset}-ml', 'hac-clusters-horizontal.txt']), category="dataset-{dataset}", subcategory="ML"), run: summary_df = ml.summarize_networks(input.pathways) ml.hac_vertical(summary_df, output.hac_image_vertical, output.hac_clusters_vertical, **hac_params) @@ -353,8 +363,8 @@ rule jaccard_similarity: pathways = expand('{out_dir}{sep}{{dataset}}-{algorithm_params}{sep}pathway.txt', out_dir=out_dir, sep=SEP, algorithm_params=algorithms_with_params) output: - jaccard_similarity_matrix = SEP.join([out_dir, '{dataset}-ml', 'jaccard-matrix.txt']), - jaccard_similarity_heatmap = SEP.join([out_dir, '{dataset}-ml', 'jaccard-heatmap.png']) + jaccard_similarity_matrix = report(SEP.join([out_dir, '{dataset}-ml', 'jaccard-matrix.txt']), category="dataset-{dataset}", subcategory="ML"), + jaccard_similarity_heatmap = report(SEP.join([out_dir, '{dataset}-ml', 'jaccard-heatmap.png']), category="dataset-{dataset}", subcategory="ML"), run: summary_df = ml.summarize_networks(input.pathways) ml.jaccard_similarity_eval(summary_df, output.jaccard_similarity_matrix, output.jaccard_similarity_heatmap) @@ -365,7 +375,7 @@ rule ensemble: input: pathways = expand('{out_dir}{sep}{{dataset}}-{algorithm_params}{sep}pathway.txt', out_dir=out_dir, sep=SEP, algorithm_params=algorithms_with_params) output: - ensemble_network_file = SEP.join([out_dir,'{dataset}-ml', 'ensemble-pathway.txt']) + ensemble_network_file = report(SEP.join([out_dir,'{dataset}-ml', 'ensemble-pathway.txt']), category="dataset-{dataset}", subcategory="ML"), run: summary_df = ml.summarize_networks(input.pathways) ml.ensemble_network(summary_df, output.ensemble_network_file) @@ -381,13 +391,13 @@ rule ml_analysis_aggregate_algo: input: pathways = collect_pathways_per_algo output: - pca_image = SEP.join([out_dir, '{dataset}-ml', '{algorithm}-pca.png']), - pca_variance= SEP.join([out_dir, '{dataset}-ml', '{algorithm}-pca-variance.txt']), - pca_coordinates = SEP.join([out_dir, '{dataset}-ml', '{algorithm}-pca-coordinates.txt']), - hac_image_vertical = SEP.join([out_dir, '{dataset}-ml', '{algorithm}-hac-vertical.png']), - hac_clusters_vertical = SEP.join([out_dir, '{dataset}-ml', '{algorithm}-hac-clusters-vertical.txt']), - hac_image_horizontal = SEP.join([out_dir, '{dataset}-ml', '{algorithm}-hac-horizontal.png']), - hac_clusters_horizontal = SEP.join([out_dir, '{dataset}-ml', '{algorithm}-hac-clusters-horizontal.txt']), + pca_image = report(SEP.join([out_dir, '{dataset}-ml', '{algorithm}-pca.png']), category="dataset-{dataset}", subcategory="ML"), + pca_variance = report(SEP.join([out_dir, '{dataset}-ml', '{algorithm}-pca-variance.txt']), category="dataset-{dataset}", subcategory="ML"), + pca_coordinates = report(SEP.join([out_dir, '{dataset}-ml', '{algorithm}-pca-coordinates.txt']), category="dataset-{dataset}", subcategory="ML"), + hac_image_vertical = report(SEP.join([out_dir, '{dataset}-ml', '{algorithm}-hac-vertical.png']), category="dataset-{dataset}", subcategory="ML"), + hac_clusters_vertical = report(SEP.join([out_dir, '{dataset}-ml', '{algorithm}-hac-clusters-vertical.txt']), category="dataset-{dataset}", subcategory="ML"), + hac_image_horizontal = report(SEP.join([out_dir, '{dataset}-ml', '{algorithm}-hac-horizontal.png']), category="dataset-{dataset}", subcategory="ML"), + hac_clusters_horizontal = report(SEP.join([out_dir, '{dataset}-ml', '{algorithm}-hac-clusters-horizontal.txt']), category="dataset-{dataset}", subcategory="ML"), run: summary_df = ml.summarize_networks(input.pathways) ml.hac_vertical(summary_df, output.hac_image_vertical, output.hac_clusters_vertical, **hac_params) @@ -399,7 +409,7 @@ rule ensemble_per_algo: input: pathways = collect_pathways_per_algo output: - ensemble_network_file = SEP.join([out_dir,'{dataset}-ml', '{algorithm}-ensemble-pathway.txt']) + ensemble_network_file = report(SEP.join([out_dir,'{dataset}-ml', '{algorithm}-ensemble-pathway.txt']), category="dataset-{dataset}", subcategory="ML"), run: summary_df = ml.summarize_networks(input.pathways) ml.ensemble_network(summary_df, output.ensemble_network_file) @@ -409,8 +419,8 @@ rule jaccard_similarity_per_algo: input: pathways = collect_pathways_per_algo output: - jaccard_similarity_matrix = SEP.join([out_dir, '{dataset}-ml', '{algorithm}-jaccard-matrix.txt']), - jaccard_similarity_heatmap = SEP.join([out_dir, '{dataset}-ml', '{algorithm}-jaccard-heatmap.png']) + jaccard_similarity_matrix = report(SEP.join([out_dir, '{dataset}-ml', '{algorithm}-jaccard-matrix.txt']), category="dataset-{dataset}", subcategory="ML"), + jaccard_similarity_heatmap = report(SEP.join([out_dir, '{dataset}-ml', '{algorithm}-jaccard-heatmap.png']), category="dataset-{dataset}", subcategory="ML"), run: summary_df = ml.summarize_networks(input.pathways) ml.jaccard_similarity_eval(summary_df, output.jaccard_similarity_matrix, output.jaccard_similarity_heatmap) @@ -439,8 +449,8 @@ rule evaluation_pr_per_pathways: node_gold_standard_file = get_gold_standard_pickle_file, pathways = collect_pathways_per_dataset output: - node_pr_file = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', "pr-per-pathway-nodes.txt"]), - node_pr_png = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-per-pathway-nodes.png']), + node_pr_file = report(SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', "pr-per-pathway-nodes.txt"]), category="dgs-{dataset_gold_standard_pair}", subcategory="Evaluation"), + node_pr_png = report(SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-per-pathway-nodes.png']), category="dgs-{dataset_gold_standard_pair}", subcategory="Evaluation"), run: node_table = Evaluation.from_file(input.node_gold_standard_file).node_table pr_df = Evaluation.node_precision_and_recall(input.pathways, node_table) @@ -458,8 +468,8 @@ rule evaluation_per_algo_pr_per_pathways: node_gold_standard_file = get_gold_standard_pickle_file, pathways = collect_pathways_per_algo_per_dataset, output: - node_pr_file = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', "pr-per-pathway-for-{algorithm}-nodes.txt"]), - node_pr_png = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-per-pathway-for-{algorithm}-nodes.png']), + node_pr_file = report(SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', "pr-per-pathway-for-{algorithm}-nodes.txt"]), category="dgs-{dataset_gold_standard_pair}", subcategory="Evaluation"), + node_pr_png = report(SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-per-pathway-for-{algorithm}-nodes.png']), category="dgs-{dataset_gold_standard_pair}", subcategory="Evaluation"), run: node_table = Evaluation.from_file(input.node_gold_standard_file).node_table pr_df = Evaluation.node_precision_and_recall(input.pathways, node_table) @@ -484,8 +494,8 @@ rule evaluation_pca_chosen: pca_coordinates_file = collect_pca_coordinates_per_dataset, pathway_summary_file = collect_summary_statistics_per_dataset output: - node_pca_chosen_pr_file = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-pca-chosen-pathway-nodes.txt']), - node_pca_chosen_pr_png = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-pca-chosen-pathway-nodes.png']), + node_pca_chosen_pr_file = report(SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-pca-chosen-pathway-nodes.txt']), category="dgs-{dataset_gold_standard_pair}", subcategory="Evaluation"), + node_pca_chosen_pr_png = report(SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-pca-chosen-pathway-nodes.png']), category="dgs-{dataset_gold_standard_pair}", subcategory="Evaluation"), run: node_table = Evaluation.from_file(input.node_gold_standard_file).node_table pca_chosen_pathway = Evaluation.pca_chosen_pathway(input.pca_coordinates_file, input.pathway_summary_file, out_dir) @@ -505,8 +515,8 @@ rule evaluation_per_algo_pca_chosen: pca_coordinates_file = collect_pca_coordinates_per_algo_per_dataset, pathway_summary_file = collect_summary_statistics_per_dataset output: - node_pca_chosen_pr_file = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-pca-chosen-pathway-per-algorithm-nodes.txt']), - node_pca_chosen_pr_png = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-pca-chosen-pathway-per-algorithm-nodes.png']), + node_pca_chosen_pr_file = report(SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-pca-chosen-pathway-per-algorithm-nodes.txt']), category="dgs-{dataset_gold_standard_pair}", subcategory="Evaluation"), + node_pca_chosen_pr_png = report(SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-pca-chosen-pathway-per-algorithm-nodes.png']), category="dgs-{dataset_gold_standard_pair}", subcategory="Evaluation"), run: node_table = Evaluation.from_file(input.node_gold_standard_file).node_table pca_chosen_pathways = Evaluation.pca_chosen_pathway(input.pca_coordinates_file, input.pathway_summary_file, out_dir) @@ -530,8 +540,8 @@ rule evaluation_ensemble_pr_curve: dataset_file = get_dataset_pickle_file, ensemble_file = collect_ensemble_per_dataset output: - node_pr_curve_png = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-curve-ensemble-nodes.png']), - node_pr_curve_file = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-curve-ensemble-nodes.txt']), + node_pr_curve_png = report(SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-curve-ensemble-nodes.png']), category="dgs-{dataset_gold_standard_pair}", subcategory="Evaluation"), + node_pr_curve_file = report(SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-curve-ensemble-nodes.txt']), category="dgs-{dataset_gold_standard_pair}", subcategory="Evaluation"), run: node_table = Evaluation.from_file(input.node_gold_standard_file).node_table node_ensemble_dict = Evaluation.edge_frequency_node_ensemble(node_table, input.ensemble_file, input.dataset_file) @@ -549,8 +559,8 @@ rule evaluation_per_algo_ensemble_pr_curve: dataset_file = get_dataset_pickle_file, ensemble_files = collect_ensemble_per_algo_per_dataset output: - node_pr_curve_png = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-curve-ensemble-nodes-per-algorithm-nodes.png']), - node_pr_curve_file = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-curve-ensemble-nodes-per-algorithm-nodes.txt']), + node_pr_curve_png = report(SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-curve-ensemble-nodes-per-algorithm-nodes.png']), category="dgs-{dataset_gold_standard_pair}", subcategory="Evaluation"), + node_pr_curve_file = report(SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-curve-ensemble-nodes-per-algorithm-nodes.txt']), category="dgs-{dataset_gold_standard_pair}", subcategory="Evaluation"), run: node_table = Evaluation.from_file(input.node_gold_standard_file).node_table node_ensembles_dict = Evaluation.edge_frequency_node_ensemble(node_table, input.ensemble_files, input.dataset_file) @@ -560,7 +570,7 @@ rule evaluation_edge_dummy: input: edge_gold_standard_file = get_gold_standard_pickle_file, output: - dummy_file = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'dummy-edge.txt']), + dummy_file = report(SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'dummy-edge.txt']), category="dgs-{dataset_gold_standard_pair}", subcategory="Evaluation"), run: mixed_edge_table = Evaluation.from_file(input.edge_gold_standard_file).mixed_edge_table undirected_edge_table = Evaluation.from_file(input.edge_gold_standard_file).undirected_edge_table