Merge branch 'main' into synthetic

tristan-f-r · tristan-f-r · commit fc12b4ec87fd · 2025-07-28T11:12:52.000-07:00
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -12,11 +12,6 @@ permissions:
   pages: write
   id-token: write
 
-# Allow one concurrent deployment
-concurrency:
-  group: 'pages'
-  cancel-in-progress: true
-
 jobs:
   pre-commit:
     name: Run pre-commit checks
@@ -61,7 +56,7 @@ jobs:
         run: sh run_snakemake.sh
       - name: Run Snakemake workflow for DMMMs
         shell: bash --login {0}
-        run: snakemake --cores 1 --configfile configs/dmmm.yaml --show-failed-logs -s spras/Snakefile
+        run: snakemake --cores 4 --configfile configs/dmmm.yaml --show-failed-logs -s spras/Snakefile
       # TODO: re-enable PRAs once RN/synthetic data PRs are merged.
       # - name: Run Snakemake workflow for PRAs
       #   shell: bash --login {0}
@@ -88,6 +83,9 @@ jobs:
     environment:
       name: github-pages
       url: ${{ steps.deployment.outputs.page_url }}
+    concurrency:
+      group: 'pages'
+      cancel-in-progress: true
     steps:
       - name: Download Artifacts
         uses: actions/download-artifact@v4
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -11,10 +11,11 @@ To add a dataset (see `datasets/yeast-osmotic-stress` as an example of a dataset
 1. Check that your dataset provider isn't already added (some of these datasets act as providers for multiple datasets)
 1. Create a new folder under `datasets/<your-dataset>`
 1. Add a `raw` folder containing your data
-1. Add an attached Snakefile that converts your `raw` data to `processed` data
-1. Add your snakefile to the top-level `run_snakemake.sh` file.
-1. If your dataset is a paper reproduction, add a `reproduction/raw` and `reproduction/processed` folder
+1. Add an attached Snakefile that converts your `raw` data to `processed` data.
+    - Make sure to use `uv` here. See `yeast-osmotic-stress`'s Snakefile for an example.
+1. Add your Snakefile to the top-level `run_snakemake.sh` file.
 1. Add your datasets to the appropiate `configs`
+    - If your dataset has gold standards, make sure to include them here.
 
 ## Adding an algorithm
 
diff --git a/README.md b/README.md
@@ -1,8 +1,9 @@
-# SPRAS benchmarking
+# [SPRAS benchmarking](https://reed-compbio.github.io/spras-benchmarking/)
 
 ![example workflow](https://github.com/Reed-CompBio/spras-benchmarking/actions/workflows/publish.yml/badge.svg)
 
 Benchmarking datasets for the [SPRAS](https://github.com/Reed-CompBio/spras) project. This repository contains gold standard datasets to evaluate on as well as paper reproductions & improvements incorporating new methodologies.
+The results of every benchmarking run are deployed on GitHub pages. [(See the current web output)](https://reed-compbio.github.io/spras-benchmarking/).
 
 ## Setup
 
@@ -28,3 +29,29 @@ snakemake --cores 1 --configfile configs/dmmm.yaml --show-failed-logs -s spras/S
 > [!NOTE]
 > Each one of the dataset categories (at the time of writing, DMMM and PRA) are split into different configuration files.
 > Run each one as you would want.
+
+## Organization
+
+There are four primary folders in this repository:
+
+```
+.
+├── configs
+├── datasets
+├── spras
+└── web
+```
+
+`spras` is the cloned submodule of [SPRAS](https://github.com/reed-compbio/spras), `web` is an
+[astro](https://astro.build/) app which generates the `spras-benchmarking` [output](https://reed-compbio.github.io/spras-benchmarking/),
+`configs` is the YAML file used to talk to SPRAS, and `datasets` contains the raw data.
+
+The workflow runs as so:
+
+1. For every dataset, run its inner `Snakefile` with [Snakemake](https://snakemake.readthedocs.io/en/stable/). This is orchestrated
+through the top-level [`run_snakemake.sh`](./run_snakemake.sh) shell script.
+1. Run each config YAML file in `configs/` with SPRAS.
+1. Build the website in `web` with the generated `output` from all of the SPRAS runs, and deploy it on [GitHub Pages](https://pages.github.com/).
+To see how to build the website, go to its [README](./web/README.md).
+
+For more information on how to add a dataset, see [CONTRIBUTING.md](./CONTRIBUTING.md).
diff --git a/configs/dmmm.yaml b/configs/dmmm.yaml
@@ -43,14 +43,21 @@ algorithms:
         g: [0]
 
 datasets:
+  # TODO: use old paramaters for datasets
+  # HIV: https://github.com/Reed-CompBio/spras-benchmarking/blob/0293ae4dc0be59502fac06b42cfd9796a4b4413e/hiv-benchmarking/spras-config/config.yaml
   - label: dmmmhiv060
     node_files: ["processed_prize_060.txt"]
     edge_files: ["phosphosite-irefindex13.0-uniprot.txt"]
-    # Placeholder
     other_files: []
     data_dir: "datasets/hiv/processed"
   - label: dmmmhiv05
     node_files: ["processed_prize_05.txt"]
     edge_files: ["phosphosite-irefindex13.0-uniprot.txt"]
     other_files: []
     data_dir: "datasets/hiv/processed"
+  # Yeast: https://github.com/tristan-f-r/spras-benchmarking/blob/9477d85871024a5e3a4b0b8b9be7e78c0d0ee961/yeast-osmotic-stress/config.yaml
+  - label: dmmmyeast
+    node_files: ["prizes1_dummies.txt"]
+    edge_files: ["network1.txt"]
+    other_files: []
+    data_dir: "datasets/yeast-osmotic-stress/processed"
diff --git a/datasets/README.md b/datasets/README.md
@@ -0,0 +1,4 @@
+# datasets
+
+Datasets contains both the raw data (straight from the study/database), as well as Python scripts and an associated Snakemake file
+which take all of the raw data and produce SPRAS-compatible data.
diff --git a/egfr/egfr-param-tuning.yaml b/egfr/egfr-param-tuning.yaml
@@ -0,0 +1,87 @@
+hash_length: 7
+container_framework: docker
+unpack_singularity: false
+container_registry:
+  base_url: docker.io
+  owner: reedcompbio
+algorithms:
+  - name: omicsintegrator2
+    params:
+      include: true
+      run1:
+        b: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+        g: [2, 3, 4, 5, 6, 7]
+        w: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+  - name: domino
+    params:
+      include: true
+      run1:
+        module_threshold: [0.001, 0.01, 0.02]
+        slice_threshold: [0.001, 0.1, 0.3, 0.9, 1]
+  - name: mincostflow
+    params:
+      include: true
+      run1:
+        capacity: [1, 5, 10, 15]
+        flow: [6, 8, 20, 50, 60, 70, 80, 90, 150]
+  - name: pathlinker
+    params:
+      include: true
+      run1:
+        k: [10, 20, 30, 40, 50, 60, 100, 200, 500]
+  - name: allpairs
+    params:
+      include: true
+  - name: meo
+    params:
+      include: true
+      run1:
+        local_search: ['No']
+        max_path_length: [2]
+        rand_restarts: [10]
+  - name: omicsintegrator1
+    params:
+      include: true
+      run1:
+        b: [0.01, 0.55, 2, 5, 10]
+        d: [10, 20, 30, 40]
+        g: [0.0001, 0.001]
+        mu: [0.001, 0.005, 0.008, 0.02, 0.03]
+        r: [0.01, 0.1, 1]
+        w: [0.001, 0.1, 0.5, 2, 8]
+datasets:
+  - label: tps_egfr
+    node_files:
+      - tps-egfr-prizes.txt
+    edge_files:
+      - phosphosite-irefindex13.0-uniprot.txt
+    other_files: []
+    data_dir: input
+gold_standards:
+  - label: gs_egfr
+    node_files:
+      - gs-egfr.txt
+    data_dir: input
+    dataset_labels:
+      - tps_egfr
+reconstruction_settings:
+  locations:
+    reconstruction_dir: output/tps_egfr
+  run: true
+analysis:
+  summary:
+    include: true
+  graphspace:
+    include: false
+  cytoscape:
+    include: false
+  ml:
+    include: true
+    aggregate_per_algorithm: true
+    components: 4
+    labels: false
+    linkage: ward
+    metric: euclidean
+  evaluation:
+    include: false
+    aggregate_per_algorithm: false
diff --git a/spras b/spras
@@ -1 +1 @@
-Subproject commit 5d6f2154489977dff7eceb98bc4b5a47946fd81c
+Subproject commit 892b364dd176716ba8ac03e1fa979bfc1da2a758
diff --git a/web/README.md b/web/README.md
@@ -0,0 +1,26 @@
+# web
+
+This module is an [Astro](https://astro.build/) project which wraps the output from SPRAS
+into a presentable webpage. See the output: https://reed-compbio.github.io/spras-benchmarking/
+
+## Building
+
+To build this, you need [`pnpm`](https://pnpm.io/). It is recommended to use a node version manager
+([nvm](https://github.com/nvm-sh/nvm) for mac/linux, [nvm-windows](https://github.com/coreybutler/nvm-windows) for windows),
+to install `nodejs` and `npm` (at the time of writing, this would be node `v22`), and use `npm` to install `pnpm`:
+
+```sh
+npm install --global pnpm
+```
+
+After this, you can install the dependencies (make sure your current working directory is `web`):
+
+```sh
+pnpm install
+```
+
+Then, assuming your data is in `public/data`, build the website:
+
+```sh
+pnpm run build
+```