greenelab · lagillenwater · Mar 6, 2026 · Mar 30, 2026 · Mar 30, 2026 · Mar 30, 2026
diff --git a/README.md b/README.md
@@ -26,14 +26,7 @@ conda activate multi_dwpc
 pip install -e ".[dev]"
 ```
 
-## Run Pipeline
-
-This stage covers:
-- loading and harmonizing 2016/2024 GO-gene data,
-- percent-change and IQR filtering,
-- optional GO hierarchy analysis for publication mode,
-- Jaccard-based redundancy filtering,
-- permutation and random null dataset generation.
+## Prepare the Data
 
 ```bash
 # Production pipeline (skips GO hierarchy analysis)
@@ -45,9 +38,8 @@ poe pipeline-publication
 # Run individual steps
 poe load-data
 poe filter-change
+poe go-hierarchy-analysis
 poe filter-jaccard
-poe gen-permutation
-poe gen-random
 ```
 
 ### Available tasks
@@ -61,13 +53,11 @@ Run `poe --help` to see all available tasks:
 | `filter-jaccard` | Jaccard filtering (all_GO, plus parents_GO when available) |
 | `gen-permutation` | Generate permutation null datasets |
 | `gen-random` | Generate random null datasets |
+| `compute-dwpc-direct` | Compute DWPC via direct matrix multiplication |
 | `pipeline-production` | Run full production pipeline |
-| `pipeline-publication` | Run full publication pipeline |
 | `pipeline-null` | Run null dataset generation |
 
-Note: `filter-jaccard` includes parents_GO_postive_growth only when run with
-`python scripts/jaccard_similarity_and_filtering.py --include-parents` or via
-`poe pipeline-publication`.
+
 
 ### Pipeline scripts
 
@@ -79,13 +69,10 @@ Located in `scripts/`:
 4. **jaccard_similarity_and_filtering.py** - Jaccard filtering for all_GO (and parents_GO when available)
 5. **permutation_null_datasets.py** - Generates permutation-based null datasets
 6. **random_null_datasets.py** - Generates random null datasets
-7. **pipeline_publication.py** - Full publication pipeline runner
+7. **compute_dwpc_direct.py** - Direct DWPC computation
 8. **pipeline_production.py** - Full production pipeline runner
 
-### Dataset naming
 
-- `all_GO_positive_growth`: all GO terms with positive growth after IQR filtering
-- `parents_GO_positive_growth`: parents of leaf terms within the same filtered set
 
 # AI Assistance
 This project utilized the AI assistant Claude, developed by Anthropic, during the development process. Its assistance included generating initial code snippets and improving documentation. All AI-generated content was reviewed, tested, and validated by human developers.
diff --git a/pyproject.toml b/pyproject.toml
@@ -23,6 +23,11 @@ dependencies = [
     "hetnet-analysis @ git+https://github.com/CU-DBMI/hetionet-analysis.git@f371064da1498ac984aa6ec6ed0ba2739c7b0259",
 ]
 
+[project.optional-dependencies]
+dev = [
+    "jupytext",
+    "poethepoet",
+]
 
 [build-system]
 requires = ["setuptools>=61.0"]
@@ -34,8 +39,8 @@ include = ["src*"]
 
 [tool.poe.tasks]
 # Individual pipeline steps
-where = ["src"]
-include = ["multi_dwpc*"]
+load-data = "python scripts/load_data.py"
+filter-change = "python scripts/percent_change_and_filtering.py"
 go-hierarchy-analysis = "python scripts/go_hierarchy_analysis.py"
 filter-jaccard = "python scripts/jaccard_similarity_and_filtering.py"
 gen-permutation = "python scripts/permutation_null_datasets.py"
@@ -44,28 +49,11 @@ gen-random = "python scripts/random_null_datasets.py"
 # DWPC computation
 # Option A: Direct matrix computation (no Docker required)
 compute-dwpc-direct = "python scripts/compute_dwpc_direct.py"
-# Option B: Via Docker API (requires connectivity-search-backend running)
-lookup-dwpc-api = "python scripts/lookup_dwpc_api.py"
-lookup-dwpc-api-with-docker = { shell = "COMPOSE_PROGRESS=plain open -a Docker && until docker info >/dev/null 2>&1; do echo 'Waiting for Docker daemon...'; sleep 5; done && cd connectivity-search-backend && COMPOSE_PROGRESS=plain ./run_stack.sh up --build -d && cd .. && python scripts/wait_for_api.py && poe lookup-dwpc-api && cd connectivity-search-backend && COMPOSE_PROGRESS=plain docker compose down" }
 
-# Post-DWPC analysis
-metapath-signature-analysis = "python scripts/metapath_signature_analysis.py"
-divergence-score-analysis = "python scripts/divergence_score_analysis.py"
-lookup-dwpc-api-with-docker = { shell = "COMPOSE_PROGRESS=plain until docker info >/dev/null 2>&1; do echo 'Waiting for Docker daemon...'; sleep 5; done && cd connectivity-search-backend && COMPOSE_PROGRESS=plain ./run_stack.sh up --build -d && cd .. && python scripts/wait_for_api.py && poe lookup-dwpc-api && cd connectivity-search-backend && COMPOSE_PROGRESS=plain docker compose down" }
 # Testing
 test-dwpc-accuracy = "python scripts/test_dwpc_accuracy.py"
 benchmark-dwpc = "python scripts/benchmark_dwpc_methods.py"
 
 # Grouped tasks
 pipeline-production = "python scripts/pipeline_production.py"
-pipeline-publication = "python scripts/pipeline_publication.py"
 pipeline-null = ["gen-permutation", "gen-random"]
-direct-dwpc-analyses = ["compute-dwpc-direct", "metapath-signature-analysis", "divergence-score-analysis"]
-
-# Utility tasks
-convert-notebooks = { shell = "for nb in notebooks/1.*.ipynb; do jupytext --to py:percent -o scripts/$(basename \"$nb\" .ipynb | tr '.' '_').py \"$nb\"; done" }
-clean = { shell = "rm -rf data/ output/ gene_ontology/" }
-top-bp-paths = "python scripts/top_bps_by_metapath.py"
-plot-top-paths-network = "python scripts/plot_top_paths_networks.py"
-extract-top-paths-local = "python scripts/extract_top_paths_local.py"
-plot-path-instances-network = "python scripts/plot_path_instances_networks.py"