From 13824a679841c05338b77817b0f516c75a7a89f4 Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Mon, 23 Jun 2025 20:31:59 +0000
Subject: [PATCH 01/60] chore: re-refactor PRM properties

---
 CONTRIBUTING.md           |  2 --
 spras/allpairs.py         |  1 +
 spras/domino.py           |  1 +
 spras/meo.py              |  1 +
 spras/mincostflow.py      |  1 +
 spras/omicsintegrator1.py |  1 +
 spras/pathlinker.py       |  1 +
 spras/prm.py              | 22 +++++++++++++++-------
 8 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 02f5e5a45..41eb155de 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -142,8 +142,6 @@ and your editor's interpreter is set to using the SPRAS environment over the bas
 Note the behaviors of the `request_node_columns` function when there are missing values in that column of the node table and when multiple columns are requested.
 `request_node_columns` always returns the `NODEID` column in addition to the requested columns.
 
-Note: If you encounter a `'property' object is not iterable` error arising from inside the Snakefile, this means that `required_inputs` is not set. This is because when `required_inputs` is not set inside an algorithm wrapper, it falls back to the underlying unimplemented function inside the PRM base class, which, while it is marked as a property function, is non-static; therefore, when the runner utility class tries to dynamically fetch `required_inputs` with reflection, it ends up grabbing the `property` function instead of the underlying error, and tries to iterate over it (since `required_inputs` is usually a list.)
-
 Now implement the `generate_inputs` function.
 Start by inspecting the `omicsintegrator1.py` example, but note the differences in the expected file formats generated for the two algorithms with respect to the header rows and node prize column.
 The selected nodes should be any node in the dataset that has a prize set, any node that is active, any node that is a source, or any node that is a target.
diff --git a/spras/allpairs.py b/spras/allpairs.py
index adae77dbf..ea0ca5821 100644
--- a/spras/allpairs.py
+++ b/spras/allpairs.py
@@ -12,6 +12,7 @@
 
 class AllPairs(PRM):
     required_inputs = ['nodetypes', 'network']
+    doi = []
 
     @staticmethod
     def generate_inputs(data, filename_map):
diff --git a/spras/domino.py b/spras/domino.py
index 2364300aa..19f4b591e 100644
--- a/spras/domino.py
+++ b/spras/domino.py
@@ -28,6 +28,7 @@
 """
 class DOMINO(PRM):
     required_inputs = ['network', 'active_genes']
+    doi = ["10.15252/msb.20209593"]
 
     @staticmethod
     def generate_inputs(data, filename_map):
diff --git a/spras/meo.py b/spras/meo.py
index 172aa0363..ae97b6dec 100644
--- a/spras/meo.py
+++ b/spras/meo.py
@@ -84,6 +84,7 @@ def write_properties(filename=Path('properties.txt'), edges=None, sources=None,
 
 class MEO(PRM):
     required_inputs = ['sources', 'targets', 'edges']
+    doi = ["10.1093/nar/gkq1207"]
 
     @staticmethod
     def generate_inputs(data, filename_map):
diff --git a/spras/mincostflow.py b/spras/mincostflow.py
index 84105bdaf..b7f33bf3b 100644
--- a/spras/mincostflow.py
+++ b/spras/mincostflow.py
@@ -24,6 +24,7 @@
 """
 class MinCostFlow (PRM):
     required_inputs = ['sources', 'targets', 'edges']
+    doi = ["10.1038/s41540-020-00167-1"]
 
     @staticmethod
     def generate_inputs(data, filename_map):
diff --git a/spras/omicsintegrator1.py b/spras/omicsintegrator1.py
index f858d46fc..42dfde9aa 100644
--- a/spras/omicsintegrator1.py
+++ b/spras/omicsintegrator1.py
@@ -50,6 +50,7 @@ class OmicsIntegrator1(PRM):
 
     """
     required_inputs = ['prizes', 'edges', 'dummy_nodes']
+    doi = ["10.1371/journal.pcbi.1004879"]
 
     @staticmethod
     def generate_inputs(data, filename_map):
diff --git a/spras/pathlinker.py b/spras/pathlinker.py
index dde8b9c5c..03e771d06 100644
--- a/spras/pathlinker.py
+++ b/spras/pathlinker.py
@@ -24,6 +24,7 @@
 """
 class PathLinker(PRM):
     required_inputs = ['nodetypes', 'network']
+    doi = ["10.1038/npjsba.2016.2"]
 
     @staticmethod
     def generate_inputs(data, filename_map):
diff --git a/spras/prm.py b/spras/prm.py
index f1dc37231..bb0a17c87 100644
--- a/spras/prm.py
+++ b/spras/prm.py
@@ -1,4 +1,5 @@
 from abc import ABC, abstractmethod
+import typing
 
 from spras.dataset import Dataset
 
@@ -10,13 +11,20 @@ class PRM(ABC):
     algorithms.
     """
 
-    @property
-    @staticmethod
-    @abstractmethod
-    def required_inputs(self):
-        # Note: This NotImplementedError will never trigger.
-        # See CONTRIBUTING.md for more information.
-        raise NotImplementedError
+    required_inputs: list[str] = []
+    # DOIs aren't strictly required (e.g. local neighborhood),
+    # but it should be explicitly declared that there are no DOIs.
+    doi: list[str] = typing.cast(list[str], None)
+
+    def __init_subclass__(cls):
+        # modified from https://stackoverflow.com/a/58206480/7589775
+        props = ["required_inputs", "dois"]
+        for prop in props:
+            if getattr(PRM, prop) is getattr(cls, prop):
+                raise NotImplementedError(
+                    "Attribute '{}' has not been overriden in class '{}'" \
+                    .format(prop, cls.__name__)
+                )
 
     @staticmethod
     @abstractmethod

From 543915f6c554a33bd190db31e3482f05b09c3f22 Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Mon, 23 Jun 2025 20:50:30 +0000
Subject: [PATCH 02/60] fix: correct prop names

---
 spras/allpairs.py         | 2 +-
 spras/domino.py           | 2 +-
 spras/meo.py              | 2 +-
 spras/mincostflow.py      | 2 +-
 spras/omicsintegrator1.py | 2 +-
 spras/omicsintegrator2.py | 1 +
 spras/pathlinker.py       | 2 +-
 spras/prm.py              | 2 +-
 8 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/spras/allpairs.py b/spras/allpairs.py
index ea0ca5821..3b79f5ef2 100644
--- a/spras/allpairs.py
+++ b/spras/allpairs.py
@@ -12,7 +12,7 @@
 
 class AllPairs(PRM):
     required_inputs = ['nodetypes', 'network']
-    doi = []
+    dois = []
 
     @staticmethod
     def generate_inputs(data, filename_map):
diff --git a/spras/domino.py b/spras/domino.py
index 19f4b591e..f9890a7d4 100644
--- a/spras/domino.py
+++ b/spras/domino.py
@@ -28,7 +28,7 @@
 """
 class DOMINO(PRM):
     required_inputs = ['network', 'active_genes']
-    doi = ["10.15252/msb.20209593"]
+    dois = ["10.15252/msb.20209593"]
 
     @staticmethod
     def generate_inputs(data, filename_map):
diff --git a/spras/meo.py b/spras/meo.py
index ae97b6dec..c94d57b80 100644
--- a/spras/meo.py
+++ b/spras/meo.py
@@ -84,7 +84,7 @@ def write_properties(filename=Path('properties.txt'), edges=None, sources=None,
 
 class MEO(PRM):
     required_inputs = ['sources', 'targets', 'edges']
-    doi = ["10.1093/nar/gkq1207"]
+    dois = ["10.1093/nar/gkq1207"]
 
     @staticmethod
     def generate_inputs(data, filename_map):
diff --git a/spras/mincostflow.py b/spras/mincostflow.py
index b7f33bf3b..1c64d2aa1 100644
--- a/spras/mincostflow.py
+++ b/spras/mincostflow.py
@@ -24,7 +24,7 @@
 """
 class MinCostFlow (PRM):
     required_inputs = ['sources', 'targets', 'edges']
-    doi = ["10.1038/s41540-020-00167-1"]
+    dois = ["10.1038/s41540-020-00167-1"]
 
     @staticmethod
     def generate_inputs(data, filename_map):
diff --git a/spras/omicsintegrator1.py b/spras/omicsintegrator1.py
index 42dfde9aa..976664d82 100644
--- a/spras/omicsintegrator1.py
+++ b/spras/omicsintegrator1.py
@@ -50,7 +50,7 @@ class OmicsIntegrator1(PRM):
 
     """
     required_inputs = ['prizes', 'edges', 'dummy_nodes']
-    doi = ["10.1371/journal.pcbi.1004879"]
+    dois = ["10.1371/journal.pcbi.1004879"]
 
     @staticmethod
     def generate_inputs(data, filename_map):
diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py
index b631da90f..26357f4fd 100644
--- a/spras/omicsintegrator2.py
+++ b/spras/omicsintegrator2.py
@@ -22,6 +22,7 @@
 """
 class OmicsIntegrator2(PRM):
     required_inputs = ['prizes', 'edges']
+    dois = []
 
     def generate_inputs(data: Dataset, filename_map):
         """
diff --git a/spras/pathlinker.py b/spras/pathlinker.py
index 03e771d06..dce31d9fe 100644
--- a/spras/pathlinker.py
+++ b/spras/pathlinker.py
@@ -24,7 +24,7 @@
 """
 class PathLinker(PRM):
     required_inputs = ['nodetypes', 'network']
-    doi = ["10.1038/npjsba.2016.2"]
+    dois = ["10.1038/npjsba.2016.2"]
 
     @staticmethod
     def generate_inputs(data, filename_map):
diff --git a/spras/prm.py b/spras/prm.py
index bb0a17c87..ca004f5b6 100644
--- a/spras/prm.py
+++ b/spras/prm.py
@@ -14,7 +14,7 @@ class PRM(ABC):
     required_inputs: list[str] = []
     # DOIs aren't strictly required (e.g. local neighborhood),
     # but it should be explicitly declared that there are no DOIs.
-    doi: list[str] = typing.cast(list[str], None)
+    dois: list[str] = typing.cast(list[str], None)
 
     def __init_subclass__(cls):
         # modified from https://stackoverflow.com/a/58206480/7589775

From 352ba56229fbbd3979583d0c36581b53663f140d Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Mon, 23 Jun 2025 20:53:58 +0000
Subject: [PATCH 03/60] style: fmt

---
 spras/prm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spras/prm.py b/spras/prm.py
index ca004f5b6..c3a16277d 100644
--- a/spras/prm.py
+++ b/spras/prm.py
@@ -1,5 +1,5 @@
-from abc import ABC, abstractmethod
 import typing
+from abc import ABC, abstractmethod
 
 from spras.dataset import Dataset
 

From 2fdb13cd2e801efe8a95f8832de9aeb6b27e8977 Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Mon, 23 Jun 2025 15:31:55 -0700
Subject: [PATCH 04/60] chore: add second doi in pl

---
 spras/pathlinker.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spras/pathlinker.py b/spras/pathlinker.py
index dce31d9fe..fc94d9818 100644
--- a/spras/pathlinker.py
+++ b/spras/pathlinker.py
@@ -24,7 +24,7 @@
 """
 class PathLinker(PRM):
     required_inputs = ['nodetypes', 'network']
-    dois = ["10.1038/npjsba.2016.2"]
+    dois = ["10.1038/npjsba.2016.2", "10.1089/cmb.2012.0274"]
 
     @staticmethod
     def generate_inputs(data, filename_map):

From 49fd4beea68563570132afaff06e2aaa6e17e95d Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Tue, 24 Jun 2025 17:13:22 +0000
Subject: [PATCH 05/60] refactor: don't use globals in runner

makes 'unused variable' warnings in runner.py meaningful - this bit @AMINOexe when running into an 'algorithm is not supported' error despite having the class prepared.
---
 spras/runner.py | 50 ++++++++++++++++++++++++++-----------------------
 1 file changed, 27 insertions(+), 23 deletions(-)

diff --git a/spras/runner.py b/spras/runner.py
index 8490644c1..632cf2531 100644
--- a/spras/runner.py
+++ b/spras/runner.py
@@ -1,22 +1,35 @@
 # supported algorithm imports
-from spras.allpairs import AllPairs as allpairs
+from spras.allpairs import AllPairs
 from spras.dataset import Dataset
-from spras.domino import DOMINO as domino
-from spras.meo import MEO as meo
-from spras.mincostflow import MinCostFlow as mincostflow
-from spras.omicsintegrator1 import OmicsIntegrator1 as omicsintegrator1
-from spras.omicsintegrator2 import OmicsIntegrator2 as omicsintegrator2
-from spras.pathlinker import PathLinker as pathlinker
+from spras.domino import DOMINO
+from spras.meo import MEO
+from spras.mincostflow import MinCostFlow
+from spras.omicsintegrator1 import OmicsIntegrator1
+from spras.omicsintegrator2 import OmicsIntegrator2
+from spras.pathlinker import PathLinker
+from spras.prm import PRM
 
+algorithms: dict[str, type[PRM]] = {
+    "allpairs": AllPairs,
+    "domino": DOMINO,
+    "meo": MEO,
+    "mincostflow": MinCostFlow,
+    "omicsintegrator1": OmicsIntegrator1,
+    "omicsintegrator2": OmicsIntegrator2,
+    "pathlinker": PathLinker,
+}
+
+def get_algorithm(algorithm: str) -> type[PRM]:
+    try:
+        return algorithms[algorithm.lower()]
+    except KeyError as exc:
+        raise NotImplementedError(f'{algorithm} is not currently supported.') from exc
 
 def run(algorithm: str, params):
     """
     A generic interface to the algorithm-specific run functions
     """
-    try:
-        algorithm_runner = globals()[algorithm.lower()]
-    except KeyError as exc:
-        raise NotImplementedError(f'{algorithm} is not currently supported') from exc
+    algorithm_runner = get_algorithm(algorithm)
     algorithm_runner.run(**params)
 
 
@@ -26,10 +39,7 @@ def get_required_inputs(algorithm: str):
     @param algorithm: algorithm name
     @return: A list of strings of input files types
     """
-    try:
-        algorithm_runner = globals()[algorithm.lower()]
-    except KeyError as exc:
-        raise NotImplementedError(f'{algorithm} is not currently supported') from exc
+    algorithm_runner = get_algorithm(algorithm)
     return algorithm_runner.required_inputs
 
 
@@ -52,10 +62,7 @@ def prepare_inputs(algorithm: str, data_file: str, filename_map: dict[str, str])
     @return:
     """
     dataset = Dataset.from_file(data_file)
-    try:
-        algorithm_runner = globals()[algorithm.lower()]
-    except KeyError as exc:
-        raise NotImplementedError(f'{algorithm} is not currently supported') from exc
+    algorithm_runner = get_algorithm(algorithm)
     return algorithm_runner.generate_inputs(dataset, filename_map)
 
 
@@ -66,8 +73,5 @@ def parse_output(algorithm: str, raw_pathway_file: str, standardized_pathway_fil
     @param raw_pathway_file: pathway file produced by an algorithm's run function
     @param standardized_pathway_file: the same pathway written in the universal format
     """
-    try:
-        algorithm_runner = globals()[algorithm.lower()]
-    except KeyError as exc:
-        raise NotImplementedError(f'{algorithm} is not currently supported') from exc
+    algorithm_runner = get_algorithm(algorithm)
     return algorithm_runner.parse_output(raw_pathway_file, standardized_pathway_file)

From c2e64f7eb0c6fbe0ab15b1e8f0707a2c63bbec47 Mon Sep 17 00:00:00 2001
From: "Tristan F." <LeoDog896@hotmail.com>
Date: Wed, 25 Jun 2025 12:47:04 -0700
Subject: [PATCH 06/60] feat: begin config refactor

---
 config/config.yaml                        |   6 +-
 config/egfr-param-tuning.yaml             |   1 -
 config/egfr.yaml                          |   1 -
 docker-wrappers/SPRAS/example_config.yaml |   2 -
 environment.yml                           |   1 +
 pyproject.toml                            |   1 +
 spras/{ => config}/config.py              | 169 +++++++++++-----------
 spras/config/raw_config.py                |  64 ++++++++
 spras/containers.py                       |   2 +-
 test/AllPairs/test_ap.py                  |   2 +-
 test/DOMINO/test_domino.py                |   2 +-
 test/LocalNeighborhood/test_ln.py         |   2 +-
 test/MEO/test_meo.py                      |   2 +-
 test/MinCostFlow/test_mcf.py              |   2 +-
 test/OmicsIntegrator1/test_oi1.py         |   2 +-
 test/OmicsIntegrator2/test_oi2.py         |   2 +-
 test/PathLinker/test_pathlinker.py        |   2 +-
 test/analysis/input/config.yaml           |   1 -
 test/analysis/input/egfr.yaml             |   1 -
 test/analysis/test_cytoscape.py           |   2 +-
 test/analysis/test_summary.py             |   2 +-
 test/test_config.py                       |   2 +-
 test/test_util.py                         |   2 +-
 23 files changed, 169 insertions(+), 104 deletions(-)
 rename spras/{ => config}/config.py (80%)
 create mode 100644 spras/config/raw_config.py

diff --git a/config/config.yaml b/config/config.yaml
index 1f246dd15..3179dfedc 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -63,6 +63,11 @@ algorithms:
   - name: "omicsintegrator2"
     params:
       include: true
+      runs:
+        - b: 4
+          g: 0
+        - b: 2
+          g: 3
       run1:
         b: 4
         g: 0
@@ -144,7 +149,6 @@ reconstruction_settings:
     # TODO move to global
     reconstruction_dir: "output"
 
-  run: true
 
 analysis:
   # Create one summary per pathway file and a single summary table for all pathways for each dataset
diff --git a/config/egfr-param-tuning.yaml b/config/egfr-param-tuning.yaml
index a0a965b70..50a4788a2 100644
--- a/config/egfr-param-tuning.yaml
+++ b/config/egfr-param-tuning.yaml
@@ -3440,7 +3440,6 @@ gold_standards:
 reconstruction_settings:
   locations:
     reconstruction_dir: output/tps_egfr
-  run: true
 analysis:
   summary:
     include: true
diff --git a/config/egfr.yaml b/config/egfr.yaml
index b8c5138b8..cea3ad54b 100644
--- a/config/egfr.yaml
+++ b/config/egfr.yaml
@@ -74,7 +74,6 @@ datasets:
 reconstruction_settings:
   locations:
     reconstruction_dir: output/egfr
-  run: true
 analysis:
   graphspace:
     include: false
diff --git a/docker-wrappers/SPRAS/example_config.yaml b/docker-wrappers/SPRAS/example_config.yaml
index f7fd74e98..9791c2f23 100644
--- a/docker-wrappers/SPRAS/example_config.yaml
+++ b/docker-wrappers/SPRAS/example_config.yaml
@@ -123,8 +123,6 @@ reconstruction_settings:
     # TODO move to global
     reconstruction_dir: "output"
 
-  run: true
-
 analysis:
   # Create one summary per pathway file and a single summary table for all pathways for each dataset
   summary:
diff --git a/environment.yml b/environment.yml
index 6694b9812..7d14e3ea4 100644
--- a/environment.yml
+++ b/environment.yml
@@ -8,6 +8,7 @@ dependencies:
   - matplotlib=3.6
   - networkx=2.8
   - pandas=1.5
+  - pydantic=2.11.7
   - numpy=1.26.4
   - pre-commit=2.20 # Only required for development
   - pytest=8.0 # Only required for development
diff --git a/pyproject.toml b/pyproject.toml
index 3e90f7b1e..27dee2693 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,6 +25,7 @@ dependencies = [
     "matplotlib==3.6",
     "networkx==2.8",
     "pandas==1.5",
+    "pydantic==2.11.7",
     "numpy==1.26.4",
     "pip==22.1",
     "requests==2.28",
diff --git a/spras/config.py b/spras/config/config.py
similarity index 80%
rename from spras/config.py
rename to spras/config/config.py
index 7bbf9cd1b..f6c5ada8b 100644
--- a/spras/config.py
+++ b/spras/config/config.py
@@ -16,19 +16,19 @@
 import itertools as it
 import os
 import re
+import warnings
 from collections.abc import Iterable
 
 import numpy as np
 import yaml
 
 from spras.util import NpHashEncoder, hash_params_sha1_base32
-
-# The default length of the truncated hash used to identify parameter combinations
-DEFAULT_HASH_LENGTH = 7
-DEFAULT_CONTAINER_PREFIX = "docker.io/reedcompbio"
+from spras.config.raw_config import ContainerFramework, RawConfig, DEFAULT_HASH_LENGTH
 
 config = None
 
+DEFAULT_CONTAINER_PREFIX = "docker.io/reedcompbio"
+
 # This will get called in the Snakefile, instantiating the singleton with the raw config
 def init_global(config_dict):
     global config
@@ -43,11 +43,9 @@ def init_from_file(filepath):
         with open(filepath, 'r') as yaml_file:
             config_dict = yaml.safe_load(yaml_file)
     except FileNotFoundError:
-        print(f"Error: The specified config '{filepath}' could not be found.")
-        return False
+        raise RuntimeError(f"Error: The specified config '{filepath}' could not be found.")
     except yaml.YAMLError as e:
-        print(f"Error: Failed to parse config '{filepath}': {e}")
-        return False
+        raise RuntimeError(f"Error: Failed to parse config '{filepath}': {e}")
 
     # And finally, initialize
     config = Config(config_dict)
@@ -55,18 +53,15 @@ def init_from_file(filepath):
 
 class Config:
     def __init__(self, raw_config):
-        # Since process_config winds up modifying the raw_config passed to it as a side effect,
-        # we'll make a deep copy here to guarantee we don't break anything. This preserves the
-        # config as it's given to the Snakefile by Snakemake
+        # Member vars populated by process_config. Any values that don't have sensible initial values are set to None
+        # before they are populated for __init__ to show exactly what is being configured.
 
-        # Member vars populated by process_config. Set to None before they are populated so that our
-        # __init__ makes clear exactly what is being configured.
         # Directory used for storing output
         self.out_dir = None
         # Container framework used by PRMs. Valid options are "docker", "dsub", and "singularity"
         self.container_framework = None
         # The container prefix (host and organization) to use for images. Default is "docker.io/reedcompbio"
-        self.container_prefix = DEFAULT_CONTAINER_PREFIX
+        self.container_prefix: str = DEFAULT_CONTAINER_PREFIX
         # A Boolean specifying whether to unpack singularity containers. Default is False
         self.unpack_singularity = False
         # A dictionary to store configured datasets against which SPRAS will be run
@@ -74,7 +69,7 @@ def __init__(self, raw_config):
         # A dictionary to store configured gold standard data against output of SPRAS runs
         self.gold_standards = None
         # The hash length SPRAS will use to identify parameter combinations. Default is 7
-        self.hash_length = DEFAULT_HASH_LENGTH
+        self.hash_length: int = DEFAULT_HASH_LENGTH
         # The list of algorithms to run in the workflow. Each is a dict with 'name' as an expected key.
         self.algorithms = None
         # A nested dict mapping algorithm names to dicts that map parameter hashes to parameter combinations.
@@ -107,44 +102,24 @@ def __init__(self, raw_config):
         # A Boolean specifying whether to run the evaluation per algorithm analysis
         self.analysis_include_evaluation_aggregate_algo = None
 
-        _raw_config = copy.deepcopy(raw_config)
-        self.process_config(_raw_config)
-
-    def process_config(self, raw_config):
+        # Since snakemake provides an empty config, we provide this
+        # wrapper error first before passing validation to pydantic.
         if raw_config == {}:
             raise ValueError("Config file cannot be empty. Use --configfile <filename> to set a config file.")
 
-        # Set up a few top-level config variables
-        self.out_dir = raw_config["reconstruction_settings"]["locations"]["reconstruction_dir"]
-
-        # We allow the container framework not to be defined in the config. In the case it isn't, default to docker.
-        # However, if we get a bad value, we raise an exception.
-        if "container_framework" in raw_config:
-            container_framework = raw_config["container_framework"].lower()
-            if container_framework not in ("docker", "singularity", "dsub"):
-                msg = "SPRAS was configured to run with an unknown container framework: '" + raw_config["container_framework"] + "'. Accepted values are 'docker', 'singularity' or 'dsub'."
-                raise ValueError(msg)
-            if container_framework == "dsub":
-                print("Warning: 'dsub' framework integration is experimental and may not be fully supported.")
-            self.container_framework = container_framework
-        else:
-            self.container_framework = "docker"
-
-        # Unpack settings for running in singularity mode. Needed when running PRM containers if already in a container.
-        if "unpack_singularity" in raw_config:
-            # The value in the config is a string, and we need to convert it to a bool.
-            unpack_singularity = raw_config["unpack_singularity"]
-            if unpack_singularity and self.container_framework != "singularity":
-                print("Warning: unpack_singularity is set to True, but the container framework is not singularity. This setting will have no effect.")
-            self.unpack_singularity = unpack_singularity
-
-        # Grab registry from the config, and if none is provided default to docker
-        if "container_registry" in raw_config and raw_config["container_registry"]["base_url"] != "" and raw_config["container_registry"]["owner"] != "":
-            self.container_prefix = raw_config["container_registry"]["base_url"] + "/" + raw_config["container_registry"]["owner"]
-
-        # Parse dataset information
-        # Datasets is initially a list, where each list entry has a dataset label and lists of input files
-        # Convert the dataset list into a dict where the label is the key and update the config data structure
+        # Since process_config winds up modifying the raw_config passed to it as a side effect,
+        # we'll make a deep copy here to guarantee we don't break anything. This preserves the
+        # config as it's given to the Snakefile by Snakemake
+        _raw_config = copy.deepcopy(raw_config)
+        parsed_raw_config = RawConfig.model_validate_json(_raw_config)
+        self.process_config(parsed_raw_config)
+    
+    def process_datasets(self, raw_config: RawConfig):
+        """
+        Parse dataset information
+        Datasets is initially a list, where each list entry has a dataset label and lists of input files
+        Convert the dataset list into a dict where the label is the key and update the config data structure
+        """
         # TODO allow labels to be optional and assign default labels
         # TODO check for collisions in dataset labels, warn, and make the labels unique
         # Need to work more on input file naming to make less strict assumptions
@@ -152,24 +127,20 @@ def process_config(self, raw_config):
         # Currently assumes all datasets have a label and the labels are unique
         # When Snakemake parses the config file it loads the datasets as OrderedDicts not dicts
         # Convert to dicts to simplify the yaml logging
-        self.datasets = {dataset["label"]: dict(dataset) for dataset in raw_config["datasets"]}
-
-        for key in self.datasets:
-            pattern = r'^\w+$'
-            if not bool(re.match(pattern, key)):
-                raise ValueError(f"Dataset label \'{key}\' contains invalid values. Dataset labels can only contain letters, numbers, or underscores.")
+        self.datasets = {}
+        for dataset in raw_config.datasets:
+            label = dataset.label
+            if label in self.datasets:
+                raise ValueError(f"Datasets must have unique labels, but the label {label} appears at least twice.")
+            self.datasets[label] = dict(dataset)
+
+            # Validate dataset labels
+            label_pattern = r'^\w+$'
+            if not bool(re.match(label_pattern, label)):
+                raise ValueError(f"Dataset label '{label}' contains invalid values. Dataset labels can only contain letters, numbers, or underscores.")
 
         # parse gold standard information
-        try:
-            self.gold_standards = {gold_standard["label"]: dict(gold_standard) for gold_standard in raw_config["gold_standards"]}
-        except:
-            self.gold_standards = {}
-
-        # check that gold_standard labels are formatted correctly
-        for key in self.gold_standards:
-            pattern = r'^\w+$'
-            if not bool(re.match(pattern, key)):
-                raise ValueError(f"Gold standard label \'{key}\' contains invalid values. Gold standard labels can only contain letters, numbers, or underscores.")
+        self.gold_standards = {gold_standard.label: dict(gold_standard) for gold_standard in raw_config.gold_standards}
 
         # check that all the dataset labels in the gold standards are existing datasets labels
         dataset_labels = set(self.datasets.keys())
@@ -182,33 +153,30 @@ def process_config(self, raw_config):
         # dataset_labels = [dataset.get('label', f'dataset{index}') for index, dataset in enumerate(datasets)]
         # Maps from the dataset label to the dataset list index
         # dataset_dict = {dataset.get('label', f'dataset{index}'): index for index, dataset in enumerate(datasets)}
-
-        # Override the default parameter hash length if specified in the config file
-        if "hash_length" in raw_config and raw_config["hash_length"] != "":
-            self.hash_length = int(raw_config["hash_length"])
-
+    
+    def process_algorithms(self, raw_config: RawConfig):
+        """
+        Parse algorithm information
+        Each algorithm's parameters are provided as a list of dictionaries
+        Defaults are handled in the Python function or class that wraps
+        running that algorithm
+        Keys in the parameter dictionary are strings
+        """
         prior_params_hashes = set()
-
-        # Parse algorithm information
-        # Each algorithm's parameters are provided as a list of dictionaries
-        # Defaults are handled in the Python function or class that wraps
-        # running that algorithm
-        # Keys in the parameter dictionary are strings
         self.algorithm_params = dict()
         self.algorithm_directed = dict()
-        self.algorithms = raw_config["algorithms"]
+        self.algorithms = raw_config.algorithms
         for alg in self.algorithms:
-            cur_params = alg["params"]
-            if "include" in cur_params and cur_params.pop("include"):
+            cur_params = alg.params
+            if cur_params.include:
                 # This dict maps from parameter combinations hashes to parameter combination dictionaries
-                self.algorithm_params[alg["name"]] = dict()
+                self.algorithm_params[alg.name] = dict()
             else:
                 # Do not parse the rest of the parameters for this algorithm if it is not included
                 continue
 
-            if "directed" in cur_params:
-                print("UPDATE: we no longer use the directed key in the config file")
-                cur_params.pop("directed")
+            if cur_params.directed != None:
+                warnings.warn("UPDATE: we no longer use the directed key in the config file")
 
             # The algorithm has no named arguments so create a default placeholder
             if len(cur_params) == 0:
@@ -265,6 +233,39 @@ def process_config(self, raw_config):
                                         f'(current length {self.hash_length}).')
                     self.algorithm_params[alg["name"]][params_hash] = run_dict
 
+    def process_config(self, raw_config: RawConfig):
+        # Set up a few top-level config variables
+        self.out_dir = raw_config.reconstruction_settings.locations.reconstruction_dir
+
+        # We allow the container framework not to be defined in the config. In the case it isn't, default to docker.
+        # However, if we get a bad value, we raise an exception.
+        if raw_config.container_framework != None:
+            container_framework = raw_config.container_framework
+            if container_framework == ContainerFramework.dsub:
+                warnings.warn("'dsub' framework integration is experimental and may not be fully supported.")
+            self.container_framework = container_framework
+        else:
+            self.container_framework = "docker"
+
+        # Unpack settings for running in singularity mode. Needed when running PRM containers if already in a container.
+        if raw_config.unpack_singularity:
+            # The value in the config is a string, and we need to convert it to a bool.
+            unpack_singularity = raw_config["unpack_singularity"]
+            if unpack_singularity and self.container_framework != "singularity":
+                warnings.warn("unpack_singularity is set to True, but the container framework is not singularity. This setting will have no effect.")
+            self.unpack_singularity = unpack_singularity
+
+        # Grab registry from the config, and if none is provided default to docker
+        if raw_config.container_registry and raw_config["container_registry"]["base_url"] != "" and raw_config["container_registry"]["owner"] != "":
+            self.container_prefix = raw_config["container_registry"]["base_url"] + "/" + raw_config["container_registry"]["owner"]
+
+        # Override the default parameter hash length if specified in the config file
+        if "hash_length" in raw_config and raw_config["hash_length"] != "":
+            self.hash_length = int(raw_config["hash_length"])
+
+        self.process_datasets(raw_config)
+        self.process_algorithms(raw_config)
+
         self.analysis_params = raw_config["analysis"] if "analysis" in raw_config else {}
         self.ml_params = self.analysis_params["ml"] if "ml" in self.analysis_params else {}
         self.evaluation_params = self.analysis_params["evaluation"] if "evaluation" in self.analysis_params else {}
diff --git a/spras/config/raw_config.py b/spras/config/raw_config.py
new file mode 100644
index 000000000..1810bf0e5
--- /dev/null
+++ b/spras/config/raw_config.py
@@ -0,0 +1,64 @@
+"""
+Contains the raw pydantic schema for the configuration file.
+"""
+
+from enum import Enum
+from pydantic import BaseModel, ConfigDict, Field
+from typing import Optional
+
+# The default length of the truncated hash used to identify parameter combinations
+DEFAULT_HASH_LENGTH = 7
+
+class ContainerFramework(str, Enum):
+    docker = 'docker'
+    # TODO: add apptainer variant once #260 gets merged
+    singularity = 'singularity'
+    dsub = 'dsub'
+
+class ContainerRegistry(BaseModel):
+    base_url: str
+    owner: str = Field(description="The owner or project of the registry")
+
+class AlgorithmParams(BaseModel):
+    include: bool = Field(default=False)
+    directed: Optional[bool]
+    # TODO
+
+class Algorithm(BaseModel):
+    name: str
+    params: AlgorithmParams
+
+class Dataset(BaseModel):
+    label: str
+    node_files: list[str]
+    edge_files: list[str]
+    other_files: list[str]
+    data_dir: str
+
+class GoldStandard(BaseModel):
+    label: str
+    node_files: list[str]
+    data_dir: str
+    dataset_labels: list[str]
+
+class Locations(BaseModel):
+    reconstruction_dir: str
+
+class ReconstructionSettings(BaseModel):
+    locations: Locations
+
+class RawConfig(BaseModel):
+    # TODO: move this to nested container key
+    container_framework: Optional[ContainerFramework]
+    unpack_singularity: bool = Field(default=False)
+    container_registry: ContainerRegistry
+
+    hash_length: Optional[int] = Field(
+        description="The length of the hash used to identify a parameter combination",
+        default=DEFAULT_HASH_LENGTH)
+
+    algorithms: list[Algorithm]
+    datasets: list[Dataset]
+    gold_standards: list[GoldStandard] = Field(default=[])
+
+    reconstruction_settings: ReconstructionSettings
diff --git a/spras/containers.py b/spras/containers.py
index a1fda05f2..3e6c7c3fc 100644
--- a/spras/containers.py
+++ b/spras/containers.py
@@ -7,7 +7,7 @@
 
 import docker
 
-import spras.config as config
+import spras.config.config as config
 from spras.logging import indent
 from spras.util import hash_filename
 
diff --git a/test/AllPairs/test_ap.py b/test/AllPairs/test_ap.py
index 442b26a73..31dd612d9 100644
--- a/test/AllPairs/test_ap.py
+++ b/test/AllPairs/test_ap.py
@@ -4,7 +4,7 @@
 
 import pytest
 
-import spras.config as config
+import spras.config.config as config
 from spras.allpairs import AllPairs
 
 # Note that we don't directly use the config in the test, but we need the config
diff --git a/test/DOMINO/test_domino.py b/test/DOMINO/test_domino.py
index 7f09fa975..4323ea4c9 100644
--- a/test/DOMINO/test_domino.py
+++ b/test/DOMINO/test_domino.py
@@ -4,7 +4,7 @@
 
 import pytest
 
-import spras.config as config
+import spras.config.config as config
 from spras.domino import DOMINO, post_domino_id_transform, pre_domino_id_transform
 
 config.init_from_file("config/config.yaml")
diff --git a/test/LocalNeighborhood/test_ln.py b/test/LocalNeighborhood/test_ln.py
index fbee54902..9093efc68 100644
--- a/test/LocalNeighborhood/test_ln.py
+++ b/test/LocalNeighborhood/test_ln.py
@@ -4,7 +4,7 @@
 
 import pytest
 
-import spras.config as config
+import spras.config.config as config
 
 config.init_from_file("config/config.yaml")
 
diff --git a/test/MEO/test_meo.py b/test/MEO/test_meo.py
index e2abdb72d..32958be20 100644
--- a/test/MEO/test_meo.py
+++ b/test/MEO/test_meo.py
@@ -3,7 +3,7 @@
 
 import pytest
 
-import spras.config as config
+import spras.config.config as config
 from spras.meo import MEO, write_properties
 
 config.init_from_file("config/config.yaml")
diff --git a/test/MinCostFlow/test_mcf.py b/test/MinCostFlow/test_mcf.py
index 89bd61d0b..c777a665d 100644
--- a/test/MinCostFlow/test_mcf.py
+++ b/test/MinCostFlow/test_mcf.py
@@ -3,7 +3,7 @@
 
 import pytest
 
-import spras.config as config
+import spras.config.config as config
 from spras.mincostflow import MinCostFlow
 
 config.init_from_file("config/config.yaml")
diff --git a/test/OmicsIntegrator1/test_oi1.py b/test/OmicsIntegrator1/test_oi1.py
index 35b41d428..a484c0af3 100644
--- a/test/OmicsIntegrator1/test_oi1.py
+++ b/test/OmicsIntegrator1/test_oi1.py
@@ -3,7 +3,7 @@
 
 import pytest
 
-import spras.config as config
+import spras.config.config as config
 from spras.omicsintegrator1 import OmicsIntegrator1, write_conf
 
 config.init_from_file("config/config.yaml")
diff --git a/test/OmicsIntegrator2/test_oi2.py b/test/OmicsIntegrator2/test_oi2.py
index 2a0a3e3c1..13f7f30b6 100644
--- a/test/OmicsIntegrator2/test_oi2.py
+++ b/test/OmicsIntegrator2/test_oi2.py
@@ -3,7 +3,7 @@
 
 import pytest
 
-import spras.config as config
+import spras.config.config as config
 from spras.omicsintegrator2 import OmicsIntegrator2
 
 config.init_from_file("config/config.yaml")
diff --git a/test/PathLinker/test_pathlinker.py b/test/PathLinker/test_pathlinker.py
index 3fd6a96bd..ed9f10670 100644
--- a/test/PathLinker/test_pathlinker.py
+++ b/test/PathLinker/test_pathlinker.py
@@ -3,7 +3,7 @@
 
 import pytest
 
-import spras.config as config
+import spras.config.config as config
 from spras.pathlinker import PathLinker
 
 config.init_from_file("config/config.yaml")
diff --git a/test/analysis/input/config.yaml b/test/analysis/input/config.yaml
index 833e6c4bb..49879e461 100644
--- a/test/analysis/input/config.yaml
+++ b/test/analysis/input/config.yaml
@@ -102,7 +102,6 @@ reconstruction_settings:
   locations:
     #place the save path here
     reconstruction_dir: "output"
-  run: true
 
 analysis:
   # Create one summary per pathway file and a single summary table for all pathways for each dataset
diff --git a/test/analysis/input/egfr.yaml b/test/analysis/input/egfr.yaml
index 1ddac1cae..281ecb495 100644
--- a/test/analysis/input/egfr.yaml
+++ b/test/analysis/input/egfr.yaml
@@ -91,7 +91,6 @@ datasets:
 reconstruction_settings:
   locations:
     reconstruction_dir: output/egfr
-  run: true
 analysis:
   graphspace:
     include: true
diff --git a/test/analysis/test_cytoscape.py b/test/analysis/test_cytoscape.py
index 7451b9876..68a77cd07 100644
--- a/test/analysis/test_cytoscape.py
+++ b/test/analysis/test_cytoscape.py
@@ -2,7 +2,7 @@
 
 import pytest
 
-import spras.config as config
+import spras.config.config as config
 from spras.analysis.cytoscape import run_cytoscape
 
 config.init_from_file("test/analysis/input/config.yaml")
diff --git a/test/analysis/test_summary.py b/test/analysis/test_summary.py
index 4ff5396da..0400d1f1b 100644
--- a/test/analysis/test_summary.py
+++ b/test/analysis/test_summary.py
@@ -3,7 +3,7 @@
 
 import pandas as pd
 
-import spras.config as config
+import spras.config.config as config
 from spras.analysis.summary import summarize_networks
 from spras.dataset import Dataset
 
diff --git a/test/test_config.py b/test/test_config.py
index 6c773ddc0..26b18a4e9 100644
--- a/test/test_config.py
+++ b/test/test_config.py
@@ -3,7 +3,7 @@
 import numpy as np
 import pytest
 
-import spras.config as config
+import spras.config.config as config
 
 
 # Set up a dummy config for testing. For now, only include things that MUST exist in the dict
diff --git a/test/test_util.py b/test/test_util.py
index baf9db0ed..2a25fc0d1 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -2,7 +2,7 @@
 
 import pytest
 
-import spras.config as config
+import spras.config.config as config
 from spras.containers import convert_docker_path, prepare_path_docker, prepare_volume
 from spras.util import hash_params_sha1_base32
 

From 4d1a19c54e22052d8e82f9c10bfdc791e4df5143 Mon Sep 17 00:00:00 2001
From: "Tristan F." <LeoDog896@hotmail.com>
Date: Wed, 25 Jun 2025 20:49:36 +0000
Subject: [PATCH 07/60] feat: mostly structured config

---
 spras/config/__init__.py            |   0
 spras/config/config.py              | 113 +++++++++++++---------------
 spras/config/raw_config.py          |  27 ++++++-
 spras/config/raw_config_analysis.py |  33 ++++++++
 4 files changed, 109 insertions(+), 64 deletions(-)
 create mode 100644 spras/config/__init__.py
 create mode 100644 spras/config/raw_config_analysis.py

diff --git a/spras/config/__init__.py b/spras/config/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/spras/config/config.py b/spras/config/config.py
index f6c5ada8b..751c774a5 100644
--- a/spras/config/config.py
+++ b/spras/config/config.py
@@ -18,12 +18,13 @@
 import re
 import warnings
 from collections.abc import Iterable
+from typing import Any
 
 import numpy as np
 import yaml
 
 from spras.util import NpHashEncoder, hash_params_sha1_base32
-from spras.config.raw_config import ContainerFramework, RawConfig, DEFAULT_HASH_LENGTH
+from spras.config.raw_config import ContainerFramework, RawConfig
 
 config = None
 
@@ -52,12 +53,15 @@ def init_from_file(filepath):
 
 
 class Config:
-    def __init__(self, raw_config):
-        # Member vars populated by process_config. Any values that don't have sensible initial values are set to None
+    def __init__(self, raw_config: dict[str, Any]):
+        parsed_raw_config = RawConfig.model_validate(raw_config)
+        self.process_config(parsed_raw_config)
+
+        # Member vars populated by process_config. Any values that don't have quick initial values are set to None
         # before they are populated for __init__ to show exactly what is being configured.
 
         # Directory used for storing output
-        self.out_dir = None
+        self.out_dir = parsed_raw_config.reconstruction_settings.locations.reconstruction_dir
         # Container framework used by PRMs. Valid options are "docker", "dsub", and "singularity"
         self.container_framework = None
         # The container prefix (host and organization) to use for images. Default is "docker.io/reedcompbio"
@@ -68,8 +72,8 @@ def __init__(self, raw_config):
         self.datasets = None
         # A dictionary to store configured gold standard data against output of SPRAS runs
         self.gold_standards = None
-        # The hash length SPRAS will use to identify parameter combinations. Default is 7
-        self.hash_length: int = DEFAULT_HASH_LENGTH
+        # The hash length SPRAS will use to identify parameter combinations.
+        self.hash_length = parsed_raw_config.hash_length
         # The list of algorithms to run in the workflow. Each is a dict with 'name' as an expected key.
         self.algorithms = None
         # A nested dict mapping algorithm names to dicts that map parameter hashes to parameter combinations.
@@ -106,13 +110,6 @@ def __init__(self, raw_config):
         # wrapper error first before passing validation to pydantic.
         if raw_config == {}:
             raise ValueError("Config file cannot be empty. Use --configfile <filename> to set a config file.")
-
-        # Since process_config winds up modifying the raw_config passed to it as a side effect,
-        # we'll make a deep copy here to guarantee we don't break anything. This preserves the
-        # config as it's given to the Snakefile by Snakemake
-        _raw_config = copy.deepcopy(raw_config)
-        parsed_raw_config = RawConfig.model_validate_json(_raw_config)
-        self.process_config(parsed_raw_config)
     
     def process_datasets(self, raw_config: RawConfig):
         """
@@ -178,8 +175,12 @@ def process_algorithms(self, raw_config: RawConfig):
             if cur_params.directed != None:
                 warnings.warn("UPDATE: we no longer use the directed key in the config file")
 
+            cur_params = cur_params.__pydantic_extra__
+            if not cur_params:
+                raise RuntimeError("An internal error occured: ConfigDict extra should be set on AlgorithmParams.")
+
             # The algorithm has no named arguments so create a default placeholder
-            if len(cur_params) == 0:
+            if len(cur_params.keys()) == 0:
                 cur_params["run1"] = {"spras_placeholder": ["no parameters"]}
 
             # Each set of runs should be 1 level down in the config file
@@ -210,7 +211,7 @@ def process_algorithms(self, raw_config: RawConfig):
                                     # Catch-all for strings
                                     obj = [obj]
                             if not isinstance(obj, Iterable):
-                                raise ValueError(f"The object `{obj}` in algorithm {alg['name']} at key '{p}' in run '{run_params}' is not iterable!") from None
+                                raise ValueError(f"The object `{obj}` in algorithm {alg.name} at key '{p}' in run '{run_params}' is not iterable!") from None
                         all_runs.append(obj)
                 run_list_tuples = list(it.product(*all_runs))
                 param_name_tuple = tuple(param_name_list)
@@ -231,47 +232,18 @@ def process_algorithms(self, raw_config: RawConfig):
                     if params_hash in prior_params_hashes:
                         raise ValueError(f'Parameter hash collision detected. Increase the hash_length in the config file '
                                         f'(current length {self.hash_length}).')
-                    self.algorithm_params[alg["name"]][params_hash] = run_dict
-
-    def process_config(self, raw_config: RawConfig):
-        # Set up a few top-level config variables
-        self.out_dir = raw_config.reconstruction_settings.locations.reconstruction_dir
-
-        # We allow the container framework not to be defined in the config. In the case it isn't, default to docker.
-        # However, if we get a bad value, we raise an exception.
-        if raw_config.container_framework != None:
-            container_framework = raw_config.container_framework
-            if container_framework == ContainerFramework.dsub:
-                warnings.warn("'dsub' framework integration is experimental and may not be fully supported.")
-            self.container_framework = container_framework
-        else:
-            self.container_framework = "docker"
-
-        # Unpack settings for running in singularity mode. Needed when running PRM containers if already in a container.
-        if raw_config.unpack_singularity:
-            # The value in the config is a string, and we need to convert it to a bool.
-            unpack_singularity = raw_config["unpack_singularity"]
-            if unpack_singularity and self.container_framework != "singularity":
-                warnings.warn("unpack_singularity is set to True, but the container framework is not singularity. This setting will have no effect.")
-            self.unpack_singularity = unpack_singularity
-
-        # Grab registry from the config, and if none is provided default to docker
-        if raw_config.container_registry and raw_config["container_registry"]["base_url"] != "" and raw_config["container_registry"]["owner"] != "":
-            self.container_prefix = raw_config["container_registry"]["base_url"] + "/" + raw_config["container_registry"]["owner"]
+                    self.algorithm_params[alg.name][params_hash] = run_dict
 
-        # Override the default parameter hash length if specified in the config file
-        if "hash_length" in raw_config and raw_config["hash_length"] != "":
-            self.hash_length = int(raw_config["hash_length"])
+    def process_analysis(self, raw_config: RawConfig):
+        if not raw_config.analysis:
+            return
 
-        self.process_datasets(raw_config)
-        self.process_algorithms(raw_config)
-
-        self.analysis_params = raw_config["analysis"] if "analysis" in raw_config else {}
-        self.ml_params = self.analysis_params["ml"] if "ml" in self.analysis_params else {}
-        self.evaluation_params = self.analysis_params["evaluation"] if "evaluation" in self.analysis_params else {}
+        self.analysis_params = raw_config.analysis
+        self.ml_params = self.analysis_params.ml if self.analysis_params.ml else {}
+        self.evaluation_params = self.analysis_params.evaluation if self.analysis_params.evaluation else {}
 
         self.pca_params = {}
-        if "components" in self.ml_params:
+        if self.ml_params.components:
             self.pca_params["components"] = self.ml_params["components"]
         if "labels" in self.ml_params:
             self.pca_params["labels"] = self.ml_params["labels"]
@@ -282,14 +254,14 @@ def process_config(self, raw_config: RawConfig):
         if "metric" in self.ml_params:
             self.hac_params["metric"] = self.ml_params ["metric"]
 
-        self.analysis_include_summary = raw_config["analysis"]["summary"]["include"]
-        self.analysis_include_graphspace = raw_config["analysis"]["graphspace"]["include"]
-        self.analysis_include_cytoscape = raw_config["analysis"]["cytoscape"]["include"]
-        self.analysis_include_ml = raw_config["analysis"]["ml"]["include"]
-        self.analysis_include_evaluation = raw_config["analysis"]["evaluation"]["include"]
+        self.analysis_include_summary = raw_config.analysis.summary.include
+        self.analysis_include_graphspace = raw_config.analysis.graphspace.include
+        self.analysis_include_cytoscape = raw_config.analysis.cytoscape.include
+        self.analysis_include_ml = raw_config.analysis.ml.include
+        self.analysis_include_evaluation = raw_config.analysis.evaluation.include
 
         # Only run ML aggregate per algorithm if analysis include ML is set to True
-        if 'aggregate_per_algorithm' in self.ml_params and self.analysis_include_ml:
+        if self.ml_params.aggregate_per_algorithm and self.analysis_include_ml:
             self.analysis_include_ml_aggregate_algo = raw_config["analysis"]["ml"]["aggregate_per_algorithm"]
         else:
             self.analysis_include_ml_aggregate_algo = False
@@ -304,11 +276,32 @@ def process_config(self, raw_config: RawConfig):
             self.analysis_include_evaluation = False
 
         # Only run Evaluation aggregate per algorithm if analysis include ML is set to True
-        if 'aggregate_per_algorithm' in self.evaluation_params and self.analysis_include_evaluation:
-            self.analysis_include_evaluation_aggregate_algo = raw_config["analysis"]["evaluation"]["aggregate_per_algorithm"]
+        if self.evaluation_params.aggregate_per_algorithm and self.analysis_include_evaluation:
+            self.analysis_include_evaluation_aggregate_algo = raw_config.analysis.evaluation.aggregate_per_algorithm
         else:
             self.analysis_include_evaluation_aggregate_algo = False
 
         # Only run Evaluation per algorithm if ML per algorithm is set to True
         if not self.analysis_include_ml_aggregate_algo:
             self.analysis_include_evaluation_aggregate_algo = False
+
+    def process_config(self, raw_config: RawConfig):
+        # Set up a few top-level config variables
+        self.out_dir = raw_config.reconstruction_settings.locations.reconstruction_dir
+
+        if raw_config.container_framework == ContainerFramework.dsub:
+            warnings.warn("'dsub' framework integration is experimental and may not be fully supported.")
+        self.container_framework = raw_config.container_framework
+
+        # Unpack settings for running in singularity mode. Needed when running PRM containers if already in a container.
+        if raw_config.unpack_singularity and self.container_framework != "singularity":
+            warnings.warn("unpack_singularity is set to True, but the container framework is not singularity. This setting will have no effect.")
+        self.unpack_singularity = raw_config.unpack_singularity
+
+        # Grab registry from the config, and if none is provided default to docker
+        if raw_config.container_registry and raw_config.container_registry.base_url != "" and raw_config.container_registry.owner != "":
+            self.container_prefix = raw_config.container_registry.base_url + "/" + raw_config.container_registry.owner
+
+        self.process_datasets(raw_config)
+        self.process_algorithms(raw_config)
+        self.process_analysis(raw_config)
diff --git a/spras/config/raw_config.py b/spras/config/raw_config.py
index 1810bf0e5..76992c8f1 100644
--- a/spras/config/raw_config.py
+++ b/spras/config/raw_config.py
@@ -6,6 +6,8 @@
 from pydantic import BaseModel, ConfigDict, Field
 from typing import Optional
 
+from spras.config.raw_config_analysis import Analysis
+
 # The default length of the truncated hash used to identify parameter combinations
 DEFAULT_HASH_LENGTH = 7
 
@@ -19,15 +21,21 @@ class ContainerRegistry(BaseModel):
     base_url: str
     owner: str = Field(description="The owner or project of the registry")
 
+    model_config = ConfigDict(extra='forbid')
+
 class AlgorithmParams(BaseModel):
     include: bool = Field(default=False)
     directed: Optional[bool]
-    # TODO
+
+    # TODO: use array of runs instead
+    model_config = ConfigDict(extra='allow')
 
 class Algorithm(BaseModel):
     name: str
     params: AlgorithmParams
 
+    model_config = ConfigDict(extra='forbid')
+
 class Dataset(BaseModel):
     label: str
     node_files: list[str]
@@ -35,30 +43,41 @@ class Dataset(BaseModel):
     other_files: list[str]
     data_dir: str
 
+    model_config = ConfigDict(extra='forbid')
+
 class GoldStandard(BaseModel):
     label: str
     node_files: list[str]
     data_dir: str
     dataset_labels: list[str]
 
+    model_config = ConfigDict(extra='forbid')
+
 class Locations(BaseModel):
     reconstruction_dir: str
 
+    model_config = ConfigDict(extra='forbid')
+
 class ReconstructionSettings(BaseModel):
     locations: Locations
 
+    model_config = ConfigDict(extra='forbid')
+
 class RawConfig(BaseModel):
-    # TODO: move this to nested container key
-    container_framework: Optional[ContainerFramework]
+    # TODO: move these container values to a nested container key
+    container_framework: ContainerFramework = Field(default=ContainerFramework.docker)
     unpack_singularity: bool = Field(default=False)
     container_registry: ContainerRegistry
 
-    hash_length: Optional[int] = Field(
+    hash_length: int = Field(
         description="The length of the hash used to identify a parameter combination",
         default=DEFAULT_HASH_LENGTH)
 
     algorithms: list[Algorithm]
     datasets: list[Dataset]
     gold_standards: list[GoldStandard] = Field(default=[])
+    analysis: Optional[Analysis]
 
     reconstruction_settings: ReconstructionSettings
+
+    model_config = ConfigDict(extra='forbid')
diff --git a/spras/config/raw_config_analysis.py b/spras/config/raw_config_analysis.py
new file mode 100644
index 000000000..8743f5969
--- /dev/null
+++ b/spras/config/raw_config_analysis.py
@@ -0,0 +1,33 @@
+from pydantic import BaseModel
+from typing import Optional
+
+class SummaryAnalysis(BaseModel):
+    include: bool
+
+class GraphspaceAnalysis(BaseModel):
+    include: bool
+
+class CytoscapeAnalysis(BaseModel):
+    include: bool
+
+class MlAnalysis(BaseModel):
+    include: bool
+    aggregate_per_algorithm: bool
+    components: int
+    labels: bool
+    # TODO: enumify
+    linkage: str
+    # TODO: enumify
+    metric: str
+
+class EvaluationAnalysis(BaseModel):
+    include: bool
+    aggregate_per_algorithm: bool
+
+
+class Analysis(BaseModel):
+    summary: Optional[SummaryAnalysis]
+    graphspace: Optional[GraphspaceAnalysis]
+    cytoscape: Optional[CytoscapeAnalysis]
+    ml: Optional[MlAnalysis]
+    evaluation: Optional[EvaluationAnalysis]

From b56ecde361c9138cea993472d1f52c0adbf0dff2 Mon Sep 17 00:00:00 2001
From: "Tristan F." <LeoDog896@hotmail.com>
Date: Wed, 25 Jun 2025 20:53:36 +0000
Subject: [PATCH 08/60] feat: add enum variants on ml

---
 config/config.yaml                  |  6 ------
 spras/config/config.py              |  8 ++++----
 spras/config/raw_config.py          |  3 ++-
 spras/config/raw_config_analysis.py | 22 +++++++++++++++++-----
 4 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/config/config.yaml b/config/config.yaml
index 3179dfedc..68c580683 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -63,11 +63,6 @@ algorithms:
   - name: "omicsintegrator2"
     params:
       include: true
-      runs:
-        - b: 4
-          g: 0
-        - b: 2
-          g: 3
       run1:
         b: 4
         g: 0
@@ -149,7 +144,6 @@ reconstruction_settings:
     # TODO move to global
     reconstruction_dir: "output"
 
-
 analysis:
   # Create one summary per pathway file and a single summary table for all pathways for each dataset
   summary:
diff --git a/spras/config/config.py b/spras/config/config.py
index 751c774a5..b6be80ef1 100644
--- a/spras/config/config.py
+++ b/spras/config/config.py
@@ -23,8 +23,8 @@
 import numpy as np
 import yaml
 
-from spras.util import NpHashEncoder, hash_params_sha1_base32
 from spras.config.raw_config import ContainerFramework, RawConfig
+from spras.util import NpHashEncoder, hash_params_sha1_base32
 
 config = None
 
@@ -110,7 +110,7 @@ def __init__(self, raw_config: dict[str, Any]):
         # wrapper error first before passing validation to pydantic.
         if raw_config == {}:
             raise ValueError("Config file cannot be empty. Use --configfile <filename> to set a config file.")
-    
+
     def process_datasets(self, raw_config: RawConfig):
         """
         Parse dataset information
@@ -150,7 +150,7 @@ def process_datasets(self, raw_config: RawConfig):
         # dataset_labels = [dataset.get('label', f'dataset{index}') for index, dataset in enumerate(datasets)]
         # Maps from the dataset label to the dataset list index
         # dataset_dict = {dataset.get('label', f'dataset{index}'): index for index, dataset in enumerate(datasets)}
-    
+
     def process_algorithms(self, raw_config: RawConfig):
         """
         Parse algorithm information
@@ -172,7 +172,7 @@ def process_algorithms(self, raw_config: RawConfig):
                 # Do not parse the rest of the parameters for this algorithm if it is not included
                 continue
 
-            if cur_params.directed != None:
+            if cur_params.directed is not None:
                 warnings.warn("UPDATE: we no longer use the directed key in the config file")
 
             cur_params = cur_params.__pydantic_extra__
diff --git a/spras/config/raw_config.py b/spras/config/raw_config.py
index 76992c8f1..f60cb47c5 100644
--- a/spras/config/raw_config.py
+++ b/spras/config/raw_config.py
@@ -3,9 +3,10 @@
 """
 
 from enum import Enum
-from pydantic import BaseModel, ConfigDict, Field
 from typing import Optional
 
+from pydantic import BaseModel, ConfigDict, Field
+
 from spras.config.raw_config_analysis import Analysis
 
 # The default length of the truncated hash used to identify parameter combinations
diff --git a/spras/config/raw_config_analysis.py b/spras/config/raw_config_analysis.py
index 8743f5969..7eae2a127 100644
--- a/spras/config/raw_config_analysis.py
+++ b/spras/config/raw_config_analysis.py
@@ -1,6 +1,9 @@
-from pydantic import BaseModel
+from enum import Enum
 from typing import Optional
 
+from pydantic import BaseModel
+
+
 class SummaryAnalysis(BaseModel):
     include: bool
 
@@ -10,15 +13,24 @@ class GraphspaceAnalysis(BaseModel):
 class CytoscapeAnalysis(BaseModel):
     include: bool
 
+class MlLinkage(str, Enum):
+    ward = 'ward'
+    complete = 'complete'
+    average = 'average'
+    single = 'single'
+
+class MlMetric(str, Enum):
+    euclidean = 'euclidean'
+    manhattan = 'manhattan'
+    cosine = 'cosine'
+
 class MlAnalysis(BaseModel):
     include: bool
     aggregate_per_algorithm: bool
     components: int
     labels: bool
-    # TODO: enumify
-    linkage: str
-    # TODO: enumify
-    metric: str
+    linkage: MlLinkage
+    metric: MlMetric
 
 class EvaluationAnalysis(BaseModel):
     include: bool

From bf95888ccbe00d90f124a998ab128f03b324c33f Mon Sep 17 00:00:00 2001
From: "Tristan F." <LeoDog896@hotmail.com>
Date: Wed, 25 Jun 2025 21:06:53 +0000
Subject: [PATCH 09/60] fix: some defaults

---
 spras/config/config.py              |  6 +++---
 spras/config/raw_config.py          | 12 ++++++------
 spras/config/raw_config_analysis.py | 18 +++++++++---------
 test/test_config.py                 |  1 -
 4 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/spras/config/config.py b/spras/config/config.py
index b6be80ef1..75c9124c6 100644
--- a/spras/config/config.py
+++ b/spras/config/config.py
@@ -43,10 +43,10 @@ def init_from_file(filepath):
     try:
         with open(filepath, 'r') as yaml_file:
             config_dict = yaml.safe_load(yaml_file)
-    except FileNotFoundError:
-        raise RuntimeError(f"Error: The specified config '{filepath}' could not be found.")
+    except FileNotFoundError as e:
+        raise RuntimeError(f"Error: The specified config '{filepath}' could not be found.") from e
     except yaml.YAMLError as e:
-        raise RuntimeError(f"Error: Failed to parse config '{filepath}': {e}")
+        raise RuntimeError(f"Error: Failed to parse config '{filepath}'") from e
 
     # And finally, initialize
     config = Config(config_dict)
diff --git a/spras/config/raw_config.py b/spras/config/raw_config.py
index f60cb47c5..1580706e2 100644
--- a/spras/config/raw_config.py
+++ b/spras/config/raw_config.py
@@ -25,8 +25,8 @@ class ContainerRegistry(BaseModel):
     model_config = ConfigDict(extra='forbid')
 
 class AlgorithmParams(BaseModel):
-    include: bool = Field(default=False)
-    directed: Optional[bool]
+    include: bool = False
+    directed: Optional[bool] = None
 
     # TODO: use array of runs instead
     model_config = ConfigDict(extra='allow')
@@ -66,8 +66,8 @@ class ReconstructionSettings(BaseModel):
 
 class RawConfig(BaseModel):
     # TODO: move these container values to a nested container key
-    container_framework: ContainerFramework = Field(default=ContainerFramework.docker)
-    unpack_singularity: bool = Field(default=False)
+    container_framework: ContainerFramework = ContainerFramework.docker
+    unpack_singularity: bool = False
     container_registry: ContainerRegistry
 
     hash_length: int = Field(
@@ -76,8 +76,8 @@ class RawConfig(BaseModel):
 
     algorithms: list[Algorithm]
     datasets: list[Dataset]
-    gold_standards: list[GoldStandard] = Field(default=[])
-    analysis: Optional[Analysis]
+    gold_standards: list[GoldStandard] = []
+    analysis: Optional[Analysis] = None
 
     reconstruction_settings: ReconstructionSettings
 
diff --git a/spras/config/raw_config_analysis.py b/spras/config/raw_config_analysis.py
index 7eae2a127..5682fb6e6 100644
--- a/spras/config/raw_config_analysis.py
+++ b/spras/config/raw_config_analysis.py
@@ -27,10 +27,10 @@ class MlMetric(str, Enum):
 class MlAnalysis(BaseModel):
     include: bool
     aggregate_per_algorithm: bool
-    components: int
-    labels: bool
-    linkage: MlLinkage
-    metric: MlMetric
+    components: int = 2
+    labels: bool = True
+    linkage: MlLinkage = MlLinkage.ward
+    metric: MlMetric = MlMetric.euclidean
 
 class EvaluationAnalysis(BaseModel):
     include: bool
@@ -38,8 +38,8 @@ class EvaluationAnalysis(BaseModel):
 
 
 class Analysis(BaseModel):
-    summary: Optional[SummaryAnalysis]
-    graphspace: Optional[GraphspaceAnalysis]
-    cytoscape: Optional[CytoscapeAnalysis]
-    ml: Optional[MlAnalysis]
-    evaluation: Optional[EvaluationAnalysis]
+    summary: Optional[SummaryAnalysis] = None
+    graphspace: Optional[GraphspaceAnalysis] = None
+    cytoscape: Optional[CytoscapeAnalysis] = None
+    ml: Optional[MlAnalysis] = None
+    evaluation: Optional[EvaluationAnalysis] = None
diff --git a/test/test_config.py b/test/test_config.py
index 26b18a4e9..3039e2dc3 100644
--- a/test/test_config.py
+++ b/test/test_config.py
@@ -25,7 +25,6 @@ def get_test_config():
         "datasets": [{"label": "alg1"}, {"label": "alg2"}],
         "gold_standards": [{"label": "gs1", "dataset_labels": []}],
         "algorithms": [
-            {"params": ["param2", "param2"]},
             {
                 "name": "strings",
                 "params": {

From 51d6a7b1efeee0b97f5ad6e1ca377f91fbd9ce19 Mon Sep 17 00:00:00 2001
From: "Tristan F." <LeoDog896@hotmail.com>
Date: Wed, 25 Jun 2025 21:47:26 +0000
Subject: [PATCH 10/60] feat: fully finish config parsing

---
 spras/config/config.py              | 43 ++++++++++-------------------
 spras/config/raw_config.py          | 23 ++++++++++-----
 spras/config/raw_config_analysis.py | 21 +++++++-------
 spras/config/util_enum.py           | 14 ++++++++++
 test/test_config.py                 | 33 +++++++++++++++++++---
 5 files changed, 83 insertions(+), 51 deletions(-)
 create mode 100644 spras/config/util_enum.py

diff --git a/spras/config/config.py b/spras/config/config.py
index 75c9124c6..02ab61ca0 100644
--- a/spras/config/config.py
+++ b/spras/config/config.py
@@ -54,8 +54,12 @@ def init_from_file(filepath):
 
 class Config:
     def __init__(self, raw_config: dict[str, Any]):
+        # Since snakemake provides an empty config, we provide this
+        # wrapper error first before passing validation to pydantic.
+        if raw_config == {}:
+            raise ValueError("Config file cannot be empty. Use --configfile <filename> to set a config file.")
+
         parsed_raw_config = RawConfig.model_validate(raw_config)
-        self.process_config(parsed_raw_config)
 
         # Member vars populated by process_config. Any values that don't have quick initial values are set to None
         # before they are populated for __init__ to show exactly what is being configured.
@@ -106,10 +110,7 @@ def __init__(self, raw_config: dict[str, Any]):
         # A Boolean specifying whether to run the evaluation per algorithm analysis
         self.analysis_include_evaluation_aggregate_algo = None
 
-        # Since snakemake provides an empty config, we provide this
-        # wrapper error first before passing validation to pydantic.
-        if raw_config == {}:
-            raise ValueError("Config file cannot be empty. Use --configfile <filename> to set a config file.")
+        self.process_config(parsed_raw_config)
 
     def process_datasets(self, raw_config: RawConfig):
         """
@@ -118,7 +119,6 @@ def process_datasets(self, raw_config: RawConfig):
         Convert the dataset list into a dict where the label is the key and update the config data structure
         """
         # TODO allow labels to be optional and assign default labels
-        # TODO check for collisions in dataset labels, warn, and make the labels unique
         # Need to work more on input file naming to make less strict assumptions
         # about the filename structure
         # Currently assumes all datasets have a label and the labels are unique
@@ -130,12 +130,7 @@ def process_datasets(self, raw_config: RawConfig):
             if label in self.datasets:
                 raise ValueError(f"Datasets must have unique labels, but the label {label} appears at least twice.")
             self.datasets[label] = dict(dataset)
-
-            # Validate dataset labels
-            label_pattern = r'^\w+$'
-            if not bool(re.match(label_pattern, label)):
-                raise ValueError(f"Dataset label '{label}' contains invalid values. Dataset labels can only contain letters, numbers, or underscores.")
-
+        
         # parse gold standard information
         self.gold_standards = {gold_standard.label: dict(gold_standard) for gold_standard in raw_config.gold_standards}
 
@@ -173,7 +168,7 @@ def process_algorithms(self, raw_config: RawConfig):
                 continue
 
             if cur_params.directed is not None:
-                warnings.warn("UPDATE: we no longer use the directed key in the config file")
+                warnings.warn("UPDATE: we no longer use the directed key in the config file", stacklevel=2)
 
             cur_params = cur_params.__pydantic_extra__
             if not cur_params:
@@ -239,20 +234,10 @@ def process_analysis(self, raw_config: RawConfig):
             return
 
         self.analysis_params = raw_config.analysis
-        self.ml_params = self.analysis_params.ml if self.analysis_params.ml else {}
-        self.evaluation_params = self.analysis_params.evaluation if self.analysis_params.evaluation else {}
-
-        self.pca_params = {}
-        if self.ml_params.components:
-            self.pca_params["components"] = self.ml_params["components"]
-        if "labels" in self.ml_params:
-            self.pca_params["labels"] = self.ml_params["labels"]
+        self.ml_params = self.analysis_params.ml
+        self.evaluation_params = self.analysis_params.evaluation
 
-        self.hac_params = {}
-        if "linkage" in self.ml_params:
-            self.hac_params["linkage"] = self.ml_params["linkage"]
-        if "metric" in self.ml_params:
-            self.hac_params["metric"] = self.ml_params ["metric"]
+        self.pca_params = self.ml_params
 
         self.analysis_include_summary = raw_config.analysis.summary.include
         self.analysis_include_graphspace = raw_config.analysis.graphspace.include
@@ -262,7 +247,7 @@ def process_analysis(self, raw_config: RawConfig):
 
         # Only run ML aggregate per algorithm if analysis include ML is set to True
         if self.ml_params.aggregate_per_algorithm and self.analysis_include_ml:
-            self.analysis_include_ml_aggregate_algo = raw_config["analysis"]["ml"]["aggregate_per_algorithm"]
+            self.analysis_include_ml_aggregate_algo = raw_config.analysis.ml.aggregate_per_algorithm
         else:
             self.analysis_include_ml_aggregate_algo = False
 
@@ -290,12 +275,12 @@ def process_config(self, raw_config: RawConfig):
         self.out_dir = raw_config.reconstruction_settings.locations.reconstruction_dir
 
         if raw_config.container_framework == ContainerFramework.dsub:
-            warnings.warn("'dsub' framework integration is experimental and may not be fully supported.")
+            warnings.warn("'dsub' framework integration is experimental and may not be fully supported.", stacklevel=2)
         self.container_framework = raw_config.container_framework
 
         # Unpack settings for running in singularity mode. Needed when running PRM containers if already in a container.
         if raw_config.unpack_singularity and self.container_framework != "singularity":
-            warnings.warn("unpack_singularity is set to True, but the container framework is not singularity. This setting will have no effect.")
+            warnings.warn("unpack_singularity is set to True, but the container framework is not singularity. This setting will have no effect.", stacklevel=2)
         self.unpack_singularity = raw_config.unpack_singularity
 
         # Grab registry from the config, and if none is provided default to docker
diff --git a/spras/config/raw_config.py b/spras/config/raw_config.py
index 1580706e2..5ab6fed1a 100644
--- a/spras/config/raw_config.py
+++ b/spras/config/raw_config.py
@@ -2,17 +2,26 @@
 Contains the raw pydantic schema for the configuration file.
 """
 
-from enum import Enum
-from typing import Optional
+import re
+from spras.config.util_enum import CaseInsensitiveEnum
+from typing import Annotated, Optional
 
-from pydantic import BaseModel, ConfigDict, Field
+from pydantic import AfterValidator, BaseModel, ConfigDict, Field
 
 from spras.config.raw_config_analysis import Analysis
 
 # The default length of the truncated hash used to identify parameter combinations
 DEFAULT_HASH_LENGTH = 7
 
-class ContainerFramework(str, Enum):
+def label_validator(name: str):
+    label_pattern = r'^\w+$'
+    def validate(label: str):
+        if not bool(re.match(label_pattern, label)):
+            raise ValueError(f"{name} label '{label}' contains invalid values. {name} labels can only contain letters, numbers, or underscores.")
+        return label
+    return validate
+
+class ContainerFramework(CaseInsensitiveEnum):
     docker = 'docker'
     # TODO: add apptainer variant once #260 gets merged
     singularity = 'singularity'
@@ -26,7 +35,7 @@ class ContainerRegistry(BaseModel):
 
 class AlgorithmParams(BaseModel):
     include: bool = False
-    directed: Optional[bool] = None
+    directed: Annotated[Optional[bool], Field(deprecated=True)] = None
 
     # TODO: use array of runs instead
     model_config = ConfigDict(extra='allow')
@@ -38,7 +47,7 @@ class Algorithm(BaseModel):
     model_config = ConfigDict(extra='forbid')
 
 class Dataset(BaseModel):
-    label: str
+    label: Annotated[str, AfterValidator(label_validator("Dataset"))]
     node_files: list[str]
     edge_files: list[str]
     other_files: list[str]
@@ -47,7 +56,7 @@ class Dataset(BaseModel):
     model_config = ConfigDict(extra='forbid')
 
 class GoldStandard(BaseModel):
-    label: str
+    label: Annotated[str, AfterValidator(label_validator("Gold Standard"))]
     node_files: list[str]
     data_dir: str
     dataset_labels: list[str]
diff --git a/spras/config/raw_config_analysis.py b/spras/config/raw_config_analysis.py
index 5682fb6e6..6b5be9bab 100644
--- a/spras/config/raw_config_analysis.py
+++ b/spras/config/raw_config_analysis.py
@@ -1,5 +1,4 @@
-from enum import Enum
-from typing import Optional
+from spras.config.util_enum import CaseInsensitiveEnum
 
 from pydantic import BaseModel
 
@@ -13,20 +12,20 @@ class GraphspaceAnalysis(BaseModel):
 class CytoscapeAnalysis(BaseModel):
     include: bool
 
-class MlLinkage(str, Enum):
+class MlLinkage(CaseInsensitiveEnum):
     ward = 'ward'
     complete = 'complete'
     average = 'average'
     single = 'single'
 
-class MlMetric(str, Enum):
+class MlMetric(CaseInsensitiveEnum):
     euclidean = 'euclidean'
     manhattan = 'manhattan'
     cosine = 'cosine'
 
 class MlAnalysis(BaseModel):
     include: bool
-    aggregate_per_algorithm: bool
+    aggregate_per_algorithm: bool = False
     components: int = 2
     labels: bool = True
     linkage: MlLinkage = MlLinkage.ward
@@ -34,12 +33,12 @@ class MlAnalysis(BaseModel):
 
 class EvaluationAnalysis(BaseModel):
     include: bool
-    aggregate_per_algorithm: bool
+    aggregate_per_algorithm: bool = False
 
 
 class Analysis(BaseModel):
-    summary: Optional[SummaryAnalysis] = None
-    graphspace: Optional[GraphspaceAnalysis] = None
-    cytoscape: Optional[CytoscapeAnalysis] = None
-    ml: Optional[MlAnalysis] = None
-    evaluation: Optional[EvaluationAnalysis] = None
+    summary: SummaryAnalysis = SummaryAnalysis(include=False)
+    graphspace: GraphspaceAnalysis = GraphspaceAnalysis(include=False)
+    cytoscape: CytoscapeAnalysis = CytoscapeAnalysis(include=False)
+    ml: MlAnalysis = MlAnalysis(include=False)
+    evaluation: EvaluationAnalysis = EvaluationAnalysis(include=False)
diff --git a/spras/config/util_enum.py b/spras/config/util_enum.py
new file mode 100644
index 000000000..ec5700fd3
--- /dev/null
+++ b/spras/config/util_enum.py
@@ -0,0 +1,14 @@
+from enum import Enum
+from typing import Any
+
+# https://stackoverflow.com/a/76883868/7589775
+class CaseInsensitiveEnum(str, Enum):
+    @classmethod
+    def _missing_(cls, value: Any):
+        if isinstance(value, str):
+            value = value.lower()
+
+            for member in cls:
+                if member.lower() == value:
+                    return member
+        return None
diff --git a/test/test_config.py b/test/test_config.py
index 3039e2dc3..9a3bf6549 100644
--- a/test/test_config.py
+++ b/test/test_config.py
@@ -4,7 +4,14 @@
 import pytest
 
 import spras.config.config as config
+from spras.config.raw_config import DEFAULT_HASH_LENGTH
 
+filler_dataset_data: dict[str, str | list[str]] = {
+    "data_dir": "fake",
+    "edge_files": [],
+    "other_files": [],
+    "node_files": []
+}
 
 # Set up a dummy config for testing. For now, only include things that MUST exist in the dict
 # in order for the config init to complete. To test particular parts of the config initialization,
@@ -22,8 +29,25 @@ def get_test_config():
                 "reconstruction_dir": "my_dir"
             }
         },
-        "datasets": [{"label": "alg1"}, {"label": "alg2"}],
-        "gold_standards": [{"label": "gs1", "dataset_labels": []}],
+        "datasets": [{
+            "label": "alg1",
+            "data_dir": "fake",
+            "edge_files": [],
+            "other_files": [],
+            "node_files": []
+        }, {
+            "label": "alg2",
+            "data_dir": "faux",
+            "edge_files": [],
+            "other_files": [],
+            "node_files": []
+        }],
+        "gold_standards": [{
+            "label": "gs1",
+            "dataset_labels": [],
+            "node_files": [],
+            "data_dir": "gs-fake"
+        }],
         "algorithms": [
             {
                 "name": "strings",
@@ -125,9 +149,9 @@ def test_config_hash_length(self):
         config.init_global(test_config)
         assert (config.config.hash_length == 7)
 
-        test_config["hash_length"] = ""
+        test_config.pop("hash_length", None)
         config.init_global(test_config)
-        assert (config.config.hash_length == config.DEFAULT_HASH_LENGTH)
+        assert (config.config.hash_length == DEFAULT_HASH_LENGTH)
 
         # Initialize the configuration
         test_config["hash_length"] = "12"
@@ -193,6 +217,7 @@ def test_correct_dataset_label(self):
         test_config = get_test_config()
         correct_test_dicts = [{"label": "test"},  {"label": "123"}, {"label": "test123"}, {"label": "123test"}, {"label": "_"},
                               {"label": "test_test"}, {"label": "_test"}, {"label": "test_"}]
+        correct_test_dicts = [dict(list(d.items()) + list(filler_dataset_data.items())) for d in correct_test_dicts]
 
         for test_dict in correct_test_dicts:
             test_config["datasets"] = [test_dict]

From a27d38decf17c3579753e3b8e7e9b603abc1e538 Mon Sep 17 00:00:00 2001
From: "Tristan F." <LeoDog896@hotmail.com>
Date: Wed, 25 Jun 2025 21:49:28 +0000
Subject: [PATCH 11/60] style: fmt

---
 spras/config/config.py              | 2 +-
 spras/config/raw_config.py          | 2 +-
 spras/config/raw_config_analysis.py | 4 ++--
 spras/config/util_enum.py           | 1 +
 4 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/spras/config/config.py b/spras/config/config.py
index 02ab61ca0..b627fcc73 100644
--- a/spras/config/config.py
+++ b/spras/config/config.py
@@ -130,7 +130,7 @@ def process_datasets(self, raw_config: RawConfig):
             if label in self.datasets:
                 raise ValueError(f"Datasets must have unique labels, but the label {label} appears at least twice.")
             self.datasets[label] = dict(dataset)
-        
+
         # parse gold standard information
         self.gold_standards = {gold_standard.label: dict(gold_standard) for gold_standard in raw_config.gold_standards}
 
diff --git a/spras/config/raw_config.py b/spras/config/raw_config.py
index 5ab6fed1a..4c1cc3581 100644
--- a/spras/config/raw_config.py
+++ b/spras/config/raw_config.py
@@ -3,12 +3,12 @@
 """
 
 import re
-from spras.config.util_enum import CaseInsensitiveEnum
 from typing import Annotated, Optional
 
 from pydantic import AfterValidator, BaseModel, ConfigDict, Field
 
 from spras.config.raw_config_analysis import Analysis
+from spras.config.util_enum import CaseInsensitiveEnum
 
 # The default length of the truncated hash used to identify parameter combinations
 DEFAULT_HASH_LENGTH = 7
diff --git a/spras/config/raw_config_analysis.py b/spras/config/raw_config_analysis.py
index 6b5be9bab..194fc8f7c 100644
--- a/spras/config/raw_config_analysis.py
+++ b/spras/config/raw_config_analysis.py
@@ -1,7 +1,7 @@
-from spras.config.util_enum import CaseInsensitiveEnum
-
 from pydantic import BaseModel
 
+from spras.config.util_enum import CaseInsensitiveEnum
+
 
 class SummaryAnalysis(BaseModel):
     include: bool
diff --git a/spras/config/util_enum.py b/spras/config/util_enum.py
index ec5700fd3..3e73eda98 100644
--- a/spras/config/util_enum.py
+++ b/spras/config/util_enum.py
@@ -1,6 +1,7 @@
 from enum import Enum
 from typing import Any
 
+
 # https://stackoverflow.com/a/76883868/7589775
 class CaseInsensitiveEnum(str, Enum):
     @classmethod

From dd4674a22d9af81c210093a8fd9da393074e38db Mon Sep 17 00:00:00 2001
From: "Tristan F." <LeoDog896@hotmail.com>
Date: Wed, 25 Jun 2025 22:03:04 +0000
Subject: [PATCH 12/60] fix: remove dep mark, use strict is None

---
 spras/config/config.py     | 2 +-
 spras/config/raw_config.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/spras/config/config.py b/spras/config/config.py
index b627fcc73..55d061607 100644
--- a/spras/config/config.py
+++ b/spras/config/config.py
@@ -171,7 +171,7 @@ def process_algorithms(self, raw_config: RawConfig):
                 warnings.warn("UPDATE: we no longer use the directed key in the config file", stacklevel=2)
 
             cur_params = cur_params.__pydantic_extra__
-            if not cur_params:
+            if cur_params is None:
                 raise RuntimeError("An internal error occured: ConfigDict extra should be set on AlgorithmParams.")
 
             # The algorithm has no named arguments so create a default placeholder
diff --git a/spras/config/raw_config.py b/spras/config/raw_config.py
index 4c1cc3581..caff7c690 100644
--- a/spras/config/raw_config.py
+++ b/spras/config/raw_config.py
@@ -35,7 +35,7 @@ class ContainerRegistry(BaseModel):
 
 class AlgorithmParams(BaseModel):
     include: bool = False
-    directed: Annotated[Optional[bool], Field(deprecated=True)] = None
+    directed: Optional[bool] = None
 
     # TODO: use array of runs instead
     model_config = ConfigDict(extra='allow')

From 5a8826d24434613a611ccdfef69c81260e4d3129 Mon Sep 17 00:00:00 2001
From: "Tristan F." <LeoDog896@hotmail.com>
Date: Wed, 25 Jun 2025 23:01:30 +0000
Subject: [PATCH 13/60] chore: correct config loc

---
 Snakefile              | 2 +-
 spras/config/config.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Snakefile b/Snakefile
index df90f8e4a..f5817650d 100644
--- a/Snakefile
+++ b/Snakefile
@@ -5,7 +5,7 @@ import yaml
 from spras.dataset import Dataset
 from spras.evaluation import Evaluation
 from spras.analysis import ml, summary, graphspace, cytoscape
-import spras.config as _config
+import spras.config.config as _config
 
 # Snakemake updated the behavior in the 6.5.0 release https://github.com/snakemake/snakemake/pull/1037
 # and using the wrong separator prevents Snakemake from matching filenames to the rules that can produce them
diff --git a/spras/config/config.py b/spras/config/config.py
index 55d061607..5894dd304 100644
--- a/spras/config/config.py
+++ b/spras/config/config.py
@@ -6,7 +6,7 @@
 module that imports this module can access a config option by checking the object's
 value. For example
 
-import spras.config as config
+import spras.config.config as config
 container_framework = config.config.container_framework
 
 will grab the top level registry configuration option as it appears in the config file

From a47b0dfbe7128221f2ee378f3188402a83f60ae6 Mon Sep 17 00:00:00 2001
From: "Tristan F." <LeoDog896@hotmail.com>
Date: Wed, 25 Jun 2025 23:45:53 +0000
Subject: [PATCH 14/60] fix: specify hac params

---
 spras/config/config.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/spras/config/config.py b/spras/config/config.py
index 5894dd304..0db7bedf1 100644
--- a/spras/config/config.py
+++ b/spras/config/config.py
@@ -239,6 +239,11 @@ def process_analysis(self, raw_config: RawConfig):
 
         self.pca_params = self.ml_params
 
+        self.hac_params = {
+            "linkage": self.ml_params.linkage,
+            "metric": self.ml_params.metric
+        }
+
         self.analysis_include_summary = raw_config.analysis.summary.include
         self.analysis_include_graphspace = raw_config.analysis.graphspace.include
         self.analysis_include_cytoscape = raw_config.analysis.cytoscape.include

From 8d756045da12d48e603c5d6f7a7846d41d24fd53 Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Thu, 26 Jun 2025 16:04:27 +0000
Subject: [PATCH 15/60] fix: expand class params

---
 spras/config/config.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/spras/config/config.py b/spras/config/config.py
index 0db7bedf1..b479c2a53 100644
--- a/spras/config/config.py
+++ b/spras/config/config.py
@@ -233,9 +233,10 @@ def process_analysis(self, raw_config: RawConfig):
         if not raw_config.analysis:
             return
 
-        self.analysis_params = raw_config.analysis
-        self.ml_params = self.analysis_params.ml
-        self.evaluation_params = self.analysis_params.evaluation
+        # these params are classes - we need to turn them into var dicts
+        self.analysis_params = vars(raw_config.analysis)
+        self.ml_params = vars(self.analysis_params.ml)
+        self.evaluation_params = vars(self.analysis_params.evaluation)
 
         self.pca_params = self.ml_params
 

From afa1de5605fee43de0457aa05c8ee956eb1982df Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Thu, 26 Jun 2025 16:06:36 +0000
Subject: [PATCH 16/60] fix: expand on pca_params

---
 spras/config/config.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/spras/config/config.py b/spras/config/config.py
index b479c2a53..515a677c5 100644
--- a/spras/config/config.py
+++ b/spras/config/config.py
@@ -233,12 +233,12 @@ def process_analysis(self, raw_config: RawConfig):
         if not raw_config.analysis:
             return
 
-        # these params are classes - we need to turn them into var dicts
-        self.analysis_params = vars(raw_config.analysis)
-        self.ml_params = vars(self.analysis_params.ml)
-        self.evaluation_params = vars(self.analysis_params.evaluation)
+        self.analysis_params = raw_config.analysis
+        self.ml_params = self.analysis_params.ml
+        self.evaluation_params = self.analysis_params.evaluation
 
-        self.pca_params = self.ml_params
+        # self.ml_params is a class, pca_params needs to be a dict.
+        self.pca_params = vars(self.ml_params)
 
         self.hac_params = {
             "linkage": self.ml_params.linkage,

From 5eefc51f79ad38c8c086f0c0e233fa6f91966d36 Mon Sep 17 00:00:00 2001
From: "Tristan F." <LeoDog896@hotmail.com>
Date: Thu, 26 Jun 2025 09:35:20 -0700
Subject: [PATCH 17/60] fix: drop include dict

---
 spras/config/config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spras/config/config.py b/spras/config/config.py
index 515a677c5..8d52ebd97 100644
--- a/spras/config/config.py
+++ b/spras/config/config.py
@@ -238,7 +238,7 @@ def process_analysis(self, raw_config: RawConfig):
         self.evaluation_params = self.analysis_params.evaluation
 
         # self.ml_params is a class, pca_params needs to be a dict.
-        self.pca_params = vars(self.ml_params)
+        self.pca_params = {k: v for k, v in vars(self.ml_params).items if k != 'include'}
 
         self.hac_params = {
             "linkage": self.ml_params.linkage,

From 52431866627ce081795487c37064403a8045c829 Mon Sep 17 00:00:00 2001
From: "Tristan F." <LeoDog896@hotmail.com>
Date: Thu, 26 Jun 2025 10:28:05 -0700
Subject: [PATCH 18/60] fix: call items

---
 spras/config/config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spras/config/config.py b/spras/config/config.py
index 8d52ebd97..23d8c44fd 100644
--- a/spras/config/config.py
+++ b/spras/config/config.py
@@ -238,7 +238,7 @@ def process_analysis(self, raw_config: RawConfig):
         self.evaluation_params = self.analysis_params.evaluation
 
         # self.ml_params is a class, pca_params needs to be a dict.
-        self.pca_params = {k: v for k, v in vars(self.ml_params).items if k != 'include'}
+        self.pca_params = {k: v for k, v in vars(self.ml_params).items() if k != 'include'}
 
         self.hac_params = {
             "linkage": self.ml_params.linkage,

From 3b20c48bae7ef46a4d15a9b34dc65c399732f95c Mon Sep 17 00:00:00 2001
From: "Tristan F." <LeoDog896@hotmail.com>
Date: Thu, 26 Jun 2025 17:51:54 +0000
Subject: [PATCH 19/60] fix: better typing and deafults

---
 spras/config/config.py     | 17 +++++++++--------
 spras/config/raw_config.py |  2 +-
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/spras/config/config.py b/spras/config/config.py
index 23d8c44fd..d944cc976 100644
--- a/spras/config/config.py
+++ b/spras/config/config.py
@@ -23,7 +23,7 @@
 import numpy as np
 import yaml
 
-from spras.config.raw_config import ContainerFramework, RawConfig
+from spras.config.raw_config import ContainerFramework, RawConfig, Analysis
 from spras.util import NpHashEncoder, hash_params_sha1_base32
 
 config = None
@@ -86,9 +86,11 @@ def __init__(self, raw_config: dict[str, Any]):
         # Deprecated. Previously a dict mapping algorithm names to a Boolean tracking whether they used directed graphs.
         self.algorithm_directed = None
         # A dict with the analysis settings
-        self.analysis_params = None
+        self.analysis_params = parsed_raw_config.analysis
+        # A dict with the evaluation settings
+        self.evaluation_params = self.analysis_params.evaluation
         # A dict with the ML settings
-        self.ml_params = None
+        self.ml_params = self.analysis_params.ml
         # A Boolean specifying whether to run ML analysis for individual algorithms
         self.analysis_include_ml_aggregate_algo = None
         # A dict with the PCA settings
@@ -233,12 +235,11 @@ def process_analysis(self, raw_config: RawConfig):
         if not raw_config.analysis:
             return
 
-        self.analysis_params = raw_config.analysis
-        self.ml_params = self.analysis_params.ml
-        self.evaluation_params = self.analysis_params.evaluation
-
         # self.ml_params is a class, pca_params needs to be a dict.
-        self.pca_params = {k: v for k, v in vars(self.ml_params).items() if k != 'include'}
+        self.pca_params = {
+            "components": self.ml_params.components,
+            "labels": self.ml_params.labels
+        }
 
         self.hac_params = {
             "linkage": self.ml_params.linkage,
diff --git a/spras/config/raw_config.py b/spras/config/raw_config.py
index caff7c690..409b48427 100644
--- a/spras/config/raw_config.py
+++ b/spras/config/raw_config.py
@@ -86,7 +86,7 @@ class RawConfig(BaseModel):
     algorithms: list[Algorithm]
     datasets: list[Dataset]
     gold_standards: list[GoldStandard] = []
-    analysis: Optional[Analysis] = None
+    analysis: Analysis = Analysis()
 
     reconstruction_settings: ReconstructionSettings
 

From 2d4a90f669f77a9d209c658ce5596833a2d43881 Mon Sep 17 00:00:00 2001
From: "Tristan F." <LeoDog896@hotmail.com>
Date: Thu, 26 Jun 2025 11:09:54 -0700
Subject: [PATCH 20/60] style: fmt

---
 spras/config/config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spras/config/config.py b/spras/config/config.py
index d944cc976..0c176cbab 100644
--- a/spras/config/config.py
+++ b/spras/config/config.py
@@ -23,7 +23,7 @@
 import numpy as np
 import yaml
 
-from spras.config.raw_config import ContainerFramework, RawConfig, Analysis
+from spras.config.raw_config import Analysis, ContainerFramework, RawConfig
 from spras.util import NpHashEncoder, hash_params_sha1_base32
 
 config = None

From 31ba9d8415f5c0c514d92020331a65bbd6381194 Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Tue, 1 Jul 2025 14:54:16 -0700
Subject: [PATCH 21/60] docs: mention oi2 paper link

---
 spras/omicsintegrator2.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py
index 26357f4fd..0dfdcc852 100644
--- a/spras/omicsintegrator2.py
+++ b/spras/omicsintegrator2.py
@@ -22,7 +22,8 @@
 """
 class OmicsIntegrator2(PRM):
     required_inputs = ['prizes', 'edges']
-    dois = []
+    # OI2 does not have a specific paper. Instead, we link to the OI1 paper.
+    dois = ["10.1371/journal.pcbi.1004879"]
 
     def generate_inputs(data: Dataset, filename_map):
         """

From fcbf67385817fd4c7841c004d59e6f624c747240 Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Tue, 1 Jul 2025 15:03:33 -0700
Subject: [PATCH 22/60] chore: add nodoi to rwr

---
 spras/rwr.py   | 1 +
 spras/strwr.py | 1 +
 2 files changed, 2 insertions(+)

diff --git a/spras/rwr.py b/spras/rwr.py
index adeccaaed..5c08d6777 100644
--- a/spras/rwr.py
+++ b/spras/rwr.py
@@ -12,6 +12,7 @@
 
 class RWR(PRM):
     required_inputs = ['network','nodes']
+    dois = []
 
     @staticmethod
     def generate_inputs(data, filename_map):
diff --git a/spras/strwr.py b/spras/strwr.py
index dfa1adc2a..fc8536507 100644
--- a/spras/strwr.py
+++ b/spras/strwr.py
@@ -13,6 +13,7 @@
 # Note: This class is almost identical to the rwr.py file.
 class ST_RWR(PRM):
     required_inputs = ['network','sources','targets']
+    dois = []
 
     @staticmethod
     def generate_inputs(data, filename_map):

From b9352e8dea4be6a0679a250899c25cb104f893b3 Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Thu, 3 Jul 2025 14:57:29 -0700
Subject: [PATCH 23/60] fix: use correct naming convention for strwr

---
 spras/runner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spras/runner.py b/spras/runner.py
index 0e956bc62..1235efc2c 100644
--- a/spras/runner.py
+++ b/spras/runner.py
@@ -24,7 +24,7 @@
     "omicsintegrator2": OmicsIntegrator2,
     "pathlinker": PathLinker,
     "rwr": RWR,
-    "st_rwr": ST_RWR,
+    "strwr": ST_RWR,
 }
 
 def get_algorithm(algorithm: str) -> type[PRM]:

From 2a4fb2ec5c923954c9748dc63009002264fcebf9 Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Wed, 9 Jul 2025 10:40:44 -0700
Subject: [PATCH 24/60] refactor: add config forbid

---
 spras/config/raw_config_analysis.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/spras/config/raw_config_analysis.py b/spras/config/raw_config_analysis.py
index 194fc8f7c..dbec5f1b9 100644
--- a/spras/config/raw_config_analysis.py
+++ b/spras/config/raw_config_analysis.py
@@ -1,4 +1,4 @@
-from pydantic import BaseModel
+from pydantic import BaseModel, ConfigDict
 
 from spras.config.util_enum import CaseInsensitiveEnum
 
@@ -6,12 +6,13 @@
 class SummaryAnalysis(BaseModel):
     include: bool
 
-class GraphspaceAnalysis(BaseModel):
-    include: bool
+    model_config = ConfigDict(extra='forbid')
 
 class CytoscapeAnalysis(BaseModel):
     include: bool
 
+    model_config = ConfigDict(extra='forbid')
+
 class MlLinkage(CaseInsensitiveEnum):
     ward = 'ward'
     complete = 'complete'
@@ -31,14 +32,18 @@ class MlAnalysis(BaseModel):
     linkage: MlLinkage = MlLinkage.ward
     metric: MlMetric = MlMetric.euclidean
 
+    model_config = ConfigDict(extra='forbid')
+
 class EvaluationAnalysis(BaseModel):
     include: bool
     aggregate_per_algorithm: bool = False
 
+    model_config = ConfigDict(extra='forbid')
 
 class Analysis(BaseModel):
     summary: SummaryAnalysis = SummaryAnalysis(include=False)
-    graphspace: GraphspaceAnalysis = GraphspaceAnalysis(include=False)
     cytoscape: CytoscapeAnalysis = CytoscapeAnalysis(include=False)
     ml: MlAnalysis = MlAnalysis(include=False)
     evaluation: EvaluationAnalysis = EvaluationAnalysis(include=False)
+
+    model_config = ConfigDict(extra='forbid')

From 4ded57eeb490b21a51feec81ed119e400fc363ef Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Wed, 9 Jul 2025 17:45:47 +0000
Subject: [PATCH 25/60] refactor: update config imports

---
 test/BowTieBuilder/test_btb.py | 2 +-
 test/RWR/test_RWR.py           | 2 +-
 test/ST_RWR/test_STRWR.py      | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/BowTieBuilder/test_btb.py b/test/BowTieBuilder/test_btb.py
index 88b12d0dd..d4a458b3c 100644
--- a/test/BowTieBuilder/test_btb.py
+++ b/test/BowTieBuilder/test_btb.py
@@ -4,7 +4,7 @@
 
 import pytest
 
-import spras.config as config
+import spras.config.config as config
 
 config.init_from_file("config/config.yaml")
 
diff --git a/test/RWR/test_RWR.py b/test/RWR/test_RWR.py
index 4d6ce7864..b0316ded0 100644
--- a/test/RWR/test_RWR.py
+++ b/test/RWR/test_RWR.py
@@ -4,7 +4,7 @@
 
 import pytest
 
-import spras.config as config
+import spras.config.config as config
 from spras.rwr import RWR
 
 config.init_from_file("config/config.yaml")
diff --git a/test/ST_RWR/test_STRWR.py b/test/ST_RWR/test_STRWR.py
index a0a5b4ea9..898b24055 100644
--- a/test/ST_RWR/test_STRWR.py
+++ b/test/ST_RWR/test_STRWR.py
@@ -4,7 +4,7 @@
 
 import pytest
 
-import spras.config as config
+import spras.config.config as config
 from spras.strwr import ST_RWR
 
 config.init_from_file("config/config.yaml")

From 22b568645ef19cde8d430c1bf77a1aff5057ee84 Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Wed, 9 Jul 2025 19:23:09 +0000
Subject: [PATCH 26/60] refactor: better names to schema files

---
 spras/config/{raw_config_analysis.py => analysis_schema.py} | 0
 spras/config/config.py                                      | 2 +-
 spras/config/{raw_config.py => schema.py}                   | 2 +-
 test/test_config.py                                         | 2 +-
 4 files changed, 3 insertions(+), 3 deletions(-)
 rename spras/config/{raw_config_analysis.py => analysis_schema.py} (100%)
 rename spras/config/{raw_config.py => schema.py} (98%)

diff --git a/spras/config/raw_config_analysis.py b/spras/config/analysis_schema.py
similarity index 100%
rename from spras/config/raw_config_analysis.py
rename to spras/config/analysis_schema.py
diff --git a/spras/config/config.py b/spras/config/config.py
index 2f4b44efa..293a08ec7 100644
--- a/spras/config/config.py
+++ b/spras/config/config.py
@@ -23,7 +23,7 @@
 import numpy as np
 import yaml
 
-from spras.config.raw_config import Analysis, ContainerFramework, RawConfig
+from spras.config.schema import Analysis, ContainerFramework, RawConfig
 from spras.util import NpHashEncoder, hash_params_sha1_base32
 
 config = None
diff --git a/spras/config/raw_config.py b/spras/config/schema.py
similarity index 98%
rename from spras/config/raw_config.py
rename to spras/config/schema.py
index 409b48427..f882e5382 100644
--- a/spras/config/raw_config.py
+++ b/spras/config/schema.py
@@ -7,7 +7,7 @@
 
 from pydantic import AfterValidator, BaseModel, ConfigDict, Field
 
-from spras.config.raw_config_analysis import Analysis
+from spras.config.analysis_schema import Analysis
 from spras.config.util_enum import CaseInsensitiveEnum
 
 # The default length of the truncated hash used to identify parameter combinations
diff --git a/test/test_config.py b/test/test_config.py
index 84d7d1d54..6095ad145 100644
--- a/test/test_config.py
+++ b/test/test_config.py
@@ -4,7 +4,7 @@
 import pytest
 
 import spras.config.config as config
-from spras.config.raw_config import DEFAULT_HASH_LENGTH
+from spras.config.schema import DEFAULT_HASH_LENGTH
 
 filler_dataset_data: dict[str, str | list[str]] = {
     "data_dir": "fake",

From 7df701dbc11ab69353e37386c3beb609b14cef69 Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Thu, 10 Jul 2025 17:14:19 +0000
Subject: [PATCH 27/60] chore: add btb doi

---
 spras/btb.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/spras/btb.py b/spras/btb.py
index 416395a55..35d33bb72 100644
--- a/spras/btb.py
+++ b/spras/btb.py
@@ -25,6 +25,7 @@
 
 class BowTieBuilder(PRM):
     required_inputs = ['sources', 'targets', 'edges']
+    dois = ["10.1186/1752-0509-3-67"]
 
     #generate input taken from meo.py beacuse they have same input requirements
     @staticmethod

From ea59e4cec7d5197fd394f3704a5406facaa6f1a9 Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Fri, 11 Jul 2025 21:56:42 +0000
Subject: [PATCH 28/60] fix: no default include, mention model_config allow
 reason

---
 spras/config/schema.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/spras/config/schema.py b/spras/config/schema.py
index f882e5382..d1b86e06d 100644
--- a/spras/config/schema.py
+++ b/spras/config/schema.py
@@ -34,10 +34,12 @@ class ContainerRegistry(BaseModel):
     model_config = ConfigDict(extra='forbid')
 
 class AlgorithmParams(BaseModel):
-    include: bool = False
+    include: bool
     directed: Optional[bool] = None
 
-    # TODO: use array of runs instead
+    # TODO: use array of runs instead. We currently rely on the
+    # extra parameters here to extract the algorithm parameter information,
+    # which is why this deviates from the usual ConfigDict(extra='forbid').
     model_config = ConfigDict(extra='allow')
 
 class Algorithm(BaseModel):

From fa7d7c984b1e91454926fb0ffa1b51ecdc2377b5 Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Fri, 11 Jul 2025 15:10:01 -0700
Subject: [PATCH 29/60] fix(config): case-insensitive check on labels

---
 spras/config/config.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/spras/config/config.py b/spras/config/config.py
index 293a08ec7..c6ac8f8e0 100644
--- a/spras/config/config.py
+++ b/spras/config/config.py
@@ -127,8 +127,8 @@ def process_datasets(self, raw_config: RawConfig):
         self.datasets = {}
         for dataset in raw_config.datasets:
             label = dataset.label
-            if label in self.datasets:
-                raise ValueError(f"Datasets must have unique labels, but the label {label} appears at least twice.")
+            if label.lower() in [key.lower() for key in self.datasets.keys()]:
+                raise ValueError(f"Datasets must have unique case-insensitive labels, but the label {label} appears at least twice.")
             self.datasets[label] = dict(dataset)
 
         # parse gold standard information

From 52eab214a9946a58f33ab34847da2119e2be6807 Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Mon, 14 Jul 2025 16:23:15 +0000
Subject: [PATCH 30/60] refactor: merge config

---
 spras/config/analysis_schema.py | 49 -----------------------------
 spras/config/schema.py          | 56 ++++++++++++++++++++++++++++++++-
 spras/config/util_enum.py       |  4 +++
 3 files changed, 59 insertions(+), 50 deletions(-)
 delete mode 100644 spras/config/analysis_schema.py

diff --git a/spras/config/analysis_schema.py b/spras/config/analysis_schema.py
deleted file mode 100644
index dbec5f1b9..000000000
--- a/spras/config/analysis_schema.py
+++ /dev/null
@@ -1,49 +0,0 @@
-from pydantic import BaseModel, ConfigDict
-
-from spras.config.util_enum import CaseInsensitiveEnum
-
-
-class SummaryAnalysis(BaseModel):
-    include: bool
-
-    model_config = ConfigDict(extra='forbid')
-
-class CytoscapeAnalysis(BaseModel):
-    include: bool
-
-    model_config = ConfigDict(extra='forbid')
-
-class MlLinkage(CaseInsensitiveEnum):
-    ward = 'ward'
-    complete = 'complete'
-    average = 'average'
-    single = 'single'
-
-class MlMetric(CaseInsensitiveEnum):
-    euclidean = 'euclidean'
-    manhattan = 'manhattan'
-    cosine = 'cosine'
-
-class MlAnalysis(BaseModel):
-    include: bool
-    aggregate_per_algorithm: bool = False
-    components: int = 2
-    labels: bool = True
-    linkage: MlLinkage = MlLinkage.ward
-    metric: MlMetric = MlMetric.euclidean
-
-    model_config = ConfigDict(extra='forbid')
-
-class EvaluationAnalysis(BaseModel):
-    include: bool
-    aggregate_per_algorithm: bool = False
-
-    model_config = ConfigDict(extra='forbid')
-
-class Analysis(BaseModel):
-    summary: SummaryAnalysis = SummaryAnalysis(include=False)
-    cytoscape: CytoscapeAnalysis = CytoscapeAnalysis(include=False)
-    ml: MlAnalysis = MlAnalysis(include=False)
-    evaluation: EvaluationAnalysis = EvaluationAnalysis(include=False)
-
-    model_config = ConfigDict(extra='forbid')
diff --git a/spras/config/schema.py b/spras/config/schema.py
index d1b86e06d..f4cad554c 100644
--- a/spras/config/schema.py
+++ b/spras/config/schema.py
@@ -1,5 +1,13 @@
 """
 Contains the raw pydantic schema for the configuration file.
+
+Using Pydantic as our backing config parser allows us to declaratively
+type our config, giving us more robust user errors with guarantees
+that parts of the config exist after parsing it through Pydantic.
+
+We declare models using two classes here:
+- `BaseModel` (docs: https://docs.pydantic.dev/latest/api/base_model/)
+- `CaseInsensitiveEnum` (see ./util_enum.py)
 """
 
 import re
@@ -7,9 +15,55 @@
 
 from pydantic import AfterValidator, BaseModel, ConfigDict, Field
 
-from spras.config.analysis_schema import Analysis
 from spras.config.util_enum import CaseInsensitiveEnum
 
+
+class SummaryAnalysis(BaseModel):
+    include: bool
+
+    model_config = ConfigDict(extra='forbid')
+
+class CytoscapeAnalysis(BaseModel):
+    include: bool
+
+    model_config = ConfigDict(extra='forbid')
+
+class MlLinkage(CaseInsensitiveEnum):
+    ward = 'ward'
+    complete = 'complete'
+    average = 'average'
+    single = 'single'
+
+class MlMetric(CaseInsensitiveEnum):
+    euclidean = 'euclidean'
+    manhattan = 'manhattan'
+    cosine = 'cosine'
+
+class MlAnalysis(BaseModel):
+    include: bool
+    aggregate_per_algorithm: bool = False
+    components: int = 2
+    labels: bool = True
+    linkage: MlLinkage = MlLinkage.ward
+    metric: MlMetric = MlMetric.euclidean
+
+    model_config = ConfigDict(extra='forbid')
+
+class EvaluationAnalysis(BaseModel):
+    include: bool
+    aggregate_per_algorithm: bool = False
+
+    model_config = ConfigDict(extra='forbid')
+
+class Analysis(BaseModel):
+    summary: SummaryAnalysis = SummaryAnalysis(include=False)
+    cytoscape: CytoscapeAnalysis = CytoscapeAnalysis(include=False)
+    ml: MlAnalysis = MlAnalysis(include=False)
+    evaluation: EvaluationAnalysis = EvaluationAnalysis(include=False)
+
+    model_config = ConfigDict(extra='forbid')
+
+
 # The default length of the truncated hash used to identify parameter combinations
 DEFAULT_HASH_LENGTH = 7
 
diff --git a/spras/config/util_enum.py b/spras/config/util_enum.py
index 3e73eda98..b7680222b 100644
--- a/spras/config/util_enum.py
+++ b/spras/config/util_enum.py
@@ -4,6 +4,10 @@
 
 # https://stackoverflow.com/a/76883868/7589775
 class CaseInsensitiveEnum(str, Enum):
+    """
+    We prefer this over Enum to make sure the config parsing
+    is more relaxed when it comes to string enum values.
+    """
     @classmethod
     def _missing_(cls, value: Any):
         if isinstance(value, str):

From 5343fd0fff089f954d2b323bb0d1daf457d5a7b3 Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Mon, 14 Jul 2025 09:28:05 -0700
Subject: [PATCH 31/60] chore: deduplicate err

---
 spras/runner.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/spras/runner.py b/spras/runner.py
index 1235efc2c..a023a9606 100644
--- a/spras/runner.py
+++ b/spras/runner.py
@@ -81,8 +81,5 @@ def parse_output(algorithm: str, raw_pathway_file: str, standardized_pathway_fil
     @param raw_pathway_file: pathway file produced by an algorithm's run function
     @param standardized_pathway_file: the same pathway written in the universal format
     """
-    try:
-        algorithm_runner = get_algorithm(algorithm)
-    except KeyError as exc:
-        raise NotImplementedError(f'{algorithm} is not currently supported') from exc
+    algorithm_runner = get_algorithm(algorithm)
     return algorithm_runner.parse_output(raw_pathway_file, standardized_pathway_file, params)

From 3c305f4b5b4c69e2eec44eee2bc8d2e6a1a92cfe Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Mon, 14 Jul 2025 16:44:39 +0000
Subject: [PATCH 32/60] docs: use concepts link

---
 spras/config/schema.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spras/config/schema.py b/spras/config/schema.py
index f4cad554c..63fe1b613 100644
--- a/spras/config/schema.py
+++ b/spras/config/schema.py
@@ -6,7 +6,7 @@
 that parts of the config exist after parsing it through Pydantic.
 
 We declare models using two classes here:
-- `BaseModel` (docs: https://docs.pydantic.dev/latest/api/base_model/)
+- `BaseModel` (docs: https://docs.pydantic.dev/latest/concepts/models/)
 - `CaseInsensitiveEnum` (see ./util_enum.py)
 """
 

From 49e50a03e85a82949e7913bb66a34c70195cb70a Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Mon, 14 Jul 2025 18:13:59 +0000
Subject: [PATCH 33/60] refactor: mv util_enum -> util

---
 spras/config/schema.py                 | 2 +-
 spras/config/{util_enum.py => util.py} | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename spras/config/{util_enum.py => util.py} (100%)

diff --git a/spras/config/schema.py b/spras/config/schema.py
index 63fe1b613..10991ad2f 100644
--- a/spras/config/schema.py
+++ b/spras/config/schema.py
@@ -15,7 +15,7 @@
 
 from pydantic import AfterValidator, BaseModel, ConfigDict, Field
 
-from spras.config.util_enum import CaseInsensitiveEnum
+from spras.config.util import CaseInsensitiveEnum
 
 
 class SummaryAnalysis(BaseModel):
diff --git a/spras/config/util_enum.py b/spras/config/util.py
similarity index 100%
rename from spras/config/util_enum.py
rename to spras/config/util.py

From cb28f61396c90415611ae970f4d7fdd7b924519b Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Mon, 14 Jul 2025 11:14:19 -0700
Subject: [PATCH 34/60] docs: correct util_enum path

---
 spras/config/schema.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spras/config/schema.py b/spras/config/schema.py
index 10991ad2f..623c9dd9b 100644
--- a/spras/config/schema.py
+++ b/spras/config/schema.py
@@ -7,7 +7,7 @@
 
 We declare models using two classes here:
 - `BaseModel` (docs: https://docs.pydantic.dev/latest/concepts/models/)
-- `CaseInsensitiveEnum` (see ./util_enum.py)
+- `CaseInsensitiveEnum` (see ./util.py)
 """
 
 import re

From 647f947636b061449996f0eff9cbb6af9bc450c3 Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Mon, 14 Jul 2025 11:27:29 -0700
Subject: [PATCH 35/60] feat: rough draft of args design

---
 spras/allpairs.py    | 13 ++++++------
 spras/btb.py         | 32 +++++++++---------------------
 spras/config/util.py |  8 ++++++++
 spras/domino.py      | 47 +++++++++++++++++++-------------------------
 spras/prm.py         | 20 ++++++++++++++-----
 5 files changed, 59 insertions(+), 61 deletions(-)

diff --git a/spras/allpairs.py b/spras/allpairs.py
index 222794dbb..b1ffe2ee9 100644
--- a/spras/allpairs.py
+++ b/spras/allpairs.py
@@ -1,6 +1,7 @@
 import warnings
 from pathlib import Path
 
+from spras.config.util import Empty
 from spras.containers import prepare_volume, run_container_and_log
 from spras.dataset import Dataset
 from spras.interactome import (
@@ -14,7 +15,7 @@
 __all__ = ['AllPairs']
 
 
-class AllPairs(PRM):
+class AllPairs(PRM[Empty]):
     required_inputs = ['nodetypes', 'network', 'directed_flag']
     dois = []
 
@@ -71,7 +72,7 @@ def generate_inputs(data: Dataset, filename_map):
                                       header=["#Interactor1", "Interactor2", "Weight"])
 
     @staticmethod
-    def run(nodetypes=None, network=None, directed_flag=None, output_file=None, container_framework="docker"):
+    def run(inputs, args, output_file, container_framework="docker"):
         """
         Run All Pairs Shortest Paths with Docker
         @param nodetypes: input node types with sources and targets (required)
@@ -79,7 +80,7 @@ def run(nodetypes=None, network=None, directed_flag=None, output_file=None, cont
         @param container_framework: choose the container runtime framework, currently supports "docker" or "singularity" (optional)
         @param output_file: path to the output pathway file (required)
         """
-        if not nodetypes or not network or not output_file or not directed_flag:
+        if not inputs["nodetypes"] or not inputs["network"] or not inputs["directed_flag"]:
             raise ValueError('Required All Pairs Shortest Paths arguments are missing')
 
         work_dir = '/apsp'
@@ -87,10 +88,10 @@ def run(nodetypes=None, network=None, directed_flag=None, output_file=None, cont
         # Each volume is a tuple (src, dest)
         volumes = list()
 
-        bind_path, node_file = prepare_volume(nodetypes, work_dir)
+        bind_path, node_file = prepare_volume(inputs["nodetypes"], work_dir)
         volumes.append(bind_path)
 
-        bind_path, network_file = prepare_volume(network, work_dir)
+        bind_path, network_file = prepare_volume(inputs["network"], work_dir)
         volumes.append(bind_path)
 
         # Create the parent directories for the output file if needed
@@ -103,7 +104,7 @@ def run(nodetypes=None, network=None, directed_flag=None, output_file=None, cont
                    '--network', network_file,
                    '--nodes', node_file,
                    '--output', mapped_out_file]
-        if Path(directed_flag).read_text().strip() == "true":
+        if Path(inputs["directed_flag"]).read_text().strip() == "true":
             command.append("--directed")
 
         container_suffix = "allpairs:v4"
diff --git a/spras/btb.py b/spras/btb.py
index 35d33bb72..a4098ee08 100644
--- a/spras/btb.py
+++ b/spras/btb.py
@@ -1,5 +1,6 @@
 from pathlib import Path
 
+from spras.config.util import Empty
 from spras.containers import prepare_volume, run_container_and_log
 from spras.interactome import (
     convert_undirected_to_directed,
@@ -23,19 +24,13 @@
 Interactor1     Interactor2     Weight
 """
 
-class BowTieBuilder(PRM):
+class BowTieBuilder(PRM[Empty]):
     required_inputs = ['sources', 'targets', 'edges']
     dois = ["10.1186/1752-0509-3-67"]
 
     #generate input taken from meo.py beacuse they have same input requirements
     @staticmethod
     def generate_inputs(data, filename_map):
-        """
-        Access fields from the dataset and write the required input files
-        @param data: dataset
-        @param filename_map: a dict mapping file types in the required_inputs to the filename for that type
-        @return:
-        """
         for input_type in BowTieBuilder.required_inputs:
             if input_type not in filename_map:
                 raise ValueError(f"{input_type} filename is missing")
@@ -70,30 +65,21 @@ def generate_inputs(data, filename_map):
 
     # Skips parameter validation step
     @staticmethod
-    def run(sources=None, targets=None, edges=None, output_file=None, container_framework="docker"):
-        """
-        Run BTB with Docker
-        @param sources:  input source file (required)
-        @param targets:  input target file (required)
-        @param edges:    input edge file (required)
-        @param output_file: path to the output pathway file (required)
-        @param container_framework: choose the container runtime framework, currently supports "docker" or "singularity" (optional)
-        """
-
+    def run(inputs, args, output_file, container_framework="docker"):
         # Tests for pytest (docker container also runs this)
         # Testing out here avoids the trouble that container errors provide
 
-        if not sources or not targets or not edges or not output_file:
+        if not inputs["sources"] or not inputs["targets"] or not inputs["edges"]:
             raise ValueError('Required BowTieBuilder arguments are missing')
 
-        if not Path(sources).exists() or not Path(targets).exists() or not Path(edges).exists():
+        if not Path(inputs["sources"]).exists() or not Path(inputs["targets"]).exists() or not Path(inputs["edges"]).exists():
             raise ValueError('Missing input file')
 
         # Testing for btb index errors
         # TODO: This error will never actually occur if the inputs are passed through
         # `generate_inputs`. See the discussion about removing this or making this a habit at
         # https://github.com/Reed-CompBio/spras/issues/306.
-        with open(edges, 'r') as edge_file:
+        with open(inputs["edges"], 'r') as edge_file:
             try:
                 for line in edge_file:
                     line = line.strip().split('\t')[2]
@@ -107,13 +93,13 @@ def run(sources=None, targets=None, edges=None, output_file=None, container_fram
         # Each volume is a tuple (src, dest)
         volumes = list()
 
-        bind_path, source_file = prepare_volume(sources, work_dir)
+        bind_path, source_file = prepare_volume(inputs["sources"], work_dir)
         volumes.append(bind_path)
 
-        bind_path, target_file = prepare_volume(targets, work_dir)
+        bind_path, target_file = prepare_volume(inputs["targets"], work_dir)
         volumes.append(bind_path)
 
-        bind_path, edges_file = prepare_volume(edges, work_dir)
+        bind_path, edges_file = prepare_volume(inputs["edges"], work_dir)
         volumes.append(bind_path)
 
         # Use its --output argument to set the output file prefix to specify an absolute path and prefix
diff --git a/spras/config/util.py b/spras/config/util.py
index b7680222b..c23374a50 100644
--- a/spras/config/util.py
+++ b/spras/config/util.py
@@ -1,4 +1,5 @@
 from enum import Enum
+from pydantic import BaseModel, ConfigDict
 from typing import Any
 
 
@@ -17,3 +18,10 @@ def _missing_(cls, value: Any):
                 if member.lower() == value:
                     return member
         return None
+
+
+class Empty(BaseModel):
+    """
+    The empty base model. Used for specifying that an algorithm takes no parameters.
+    """
+    model_config = ConfigDict(extra="forbid")
diff --git a/spras/domino.py b/spras/domino.py
index 5205a81cd..a70f1a1e3 100644
--- a/spras/domino.py
+++ b/spras/domino.py
@@ -2,6 +2,8 @@
 from pathlib import Path
 
 import pandas as pd
+from pydantic import BaseModel, ConfigDict
+from typing import Optional
 
 from spras.containers import prepare_volume, run_container_and_log
 from spras.interactome import (
@@ -16,6 +18,14 @@
 ID_PREFIX = 'ENSG0'
 ID_PREFIX_LEN = len(ID_PREFIX)
 
+class DominoParams(BaseModel):
+    module_threshold: Optional[float]
+    "the p-value threshold for considering a slice as relevant (optional)"
+
+    slice_threshold: Optional[float]
+    "the p-value threshold for considering a putative module as final module (optional)"
+
+    model_config = ConfigDict(use_attribute_docstrings=True)
 
 """
 DOMINO will construct a fully undirected graph from the provided input file
@@ -26,18 +36,12 @@
 - the expected raw input file should have node pairs in the 1st and 3rd columns, with a 'ppi' in the 2nd column
 - it can include repeated and bidirectional edges
 """
-class DOMINO(PRM):
+class DOMINO(PRM[DominoParams]):
     required_inputs = ['network', 'active_genes']
     dois = ["10.15252/msb.20209593"]
 
     @staticmethod
     def generate_inputs(data, filename_map):
-        """
-        Access fields from the dataset and write the required input files
-        @param data: dataset
-        @param filename_map: a dict mapping file types in the required_inputs to the filename for that type
-        @return:
-        """
         for input_type in DOMINO.required_inputs:
             if input_type not in filename_map:
                 raise ValueError(f"{input_type} filename is missing")
@@ -72,20 +76,9 @@ def generate_inputs(data, filename_map):
                         header=['ID_interactor_A', 'ppi', 'ID_interactor_B'])
 
     @staticmethod
-    def run(network=None, active_genes=None, output_file=None, slice_threshold=None, module_threshold=None, container_framework="docker"):
-        """
-        Run DOMINO with Docker.
-        Let visualization be always true, parallelization be always 1 thread, and use_cache be always false.
-        DOMINO produces multiple output module files in an HTML format. SPRAS concatenates these files into one file.
-        @param network: input network file (required)
-        @param active_genes: input active genes (required)
-        @param output_file: path to the output pathway file (required)
-        @param slice_threshold: the p-value threshold for considering a slice as relevant (optional)
-        @param module_threshold: the p-value threshold for considering a putative module as final module (optional)
-        @param container_framework: choose the container runtime framework, currently supports "docker" or "singularity" (optional)
-        """
-
-        if not network or not active_genes or not output_file:
+    def run(inputs, args, output_file, container_framework="docker"):
+        # Let visualization be always true, parallelization be always 1 thread, and use_cache be always false.
+        if not inputs["network"] or not inputs["active_genes"]:
             raise ValueError('Required DOMINO arguments are missing')
 
         work_dir = '/spras'
@@ -93,10 +86,10 @@ def run(network=None, active_genes=None, output_file=None, slice_threshold=None,
         # Each volume is a tuple (source, destination)
         volumes = list()
 
-        bind_path, network_file = prepare_volume(network, work_dir)
+        bind_path, network_file = prepare_volume(inputs["network"], work_dir)
         volumes.append(bind_path)
 
-        bind_path, node_file = prepare_volume(active_genes, work_dir)
+        bind_path, node_file = prepare_volume(inputs["active_genes"], work_dir)
         volumes.append(bind_path)
 
         out_dir = Path(output_file).parent
@@ -132,11 +125,11 @@ def run(network=None, active_genes=None, output_file=None, slice_threshold=None,
                           '--visualization', 'true']
 
         # Add optional arguments
-        if slice_threshold is not None:
+        if args.slice_threshold is not None:
             # DOMINO readme has the wrong argument https://github.com/Shamir-Lab/DOMINO/issues/12
-            domino_command.extend(['--slice_threshold', str(slice_threshold)])
-        if module_threshold is not None:
-            domino_command.extend(['--module_threshold', str(module_threshold)])
+            domino_command.extend(['--slice_threshold', str(args.slice_threshold)])
+        if args.module_threshold is not None:
+            domino_command.extend(['--module_threshold', str(args.module_threshold)])
 
         run_container_and_log('DOMINO',
                              container_framework,
diff --git a/spras/prm.py b/spras/prm.py
index b5d8501dd..06d005b2a 100644
--- a/spras/prm.py
+++ b/spras/prm.py
@@ -1,11 +1,12 @@
-import typing
 from abc import ABC, abstractmethod
-from typing import Any
+from pydantic import BaseModel
+from typing import Any, cast, TypeVar, Generic
 
 from spras.dataset import Dataset
 
+T = TypeVar('T', bound=BaseModel)
 
-class PRM(ABC):
+class PRM(ABC, Generic[T]):
     """
     The PRM (Pathway Reconstruction Module) class,
     which defines the interface that `runner.py` uses to handle
@@ -15,7 +16,7 @@ class PRM(ABC):
     required_inputs: list[str] = []
     # DOIs aren't strictly required (e.g. local neighborhood),
     # but it should be explicitly declared that there are no DOIs.
-    dois: list[str] = typing.cast(list[str], None)
+    dois: list[str] = cast(list[str], None)
 
     def __init_subclass__(cls):
         # modified from https://stackoverflow.com/a/58206480/7589775
@@ -30,11 +31,20 @@ def __init_subclass__(cls):
     @staticmethod
     @abstractmethod
     def generate_inputs(data: Dataset, filename_map: dict[str, str]):
+        """
+        Access fields from the dataset and write the required input files
+        @param data: dataset
+        @param filename_map: a dict mapping file types in the required_inputs to the filename for that type
+        """
         raise NotImplementedError
 
     @staticmethod
     @abstractmethod
-    def run(**kwargs):
+    def run(inputs: dict[str, str], args: T, output_file: str, container_framework="docker"):
+        """
+        Runs an algorithm with the specified inputs, algorithm params (T),
+        the designated output_file, and the desired container_framework.
+        """
         raise NotImplementedError
 
     @staticmethod

From 76011e07978d38fd41dded82434a3e8a5f210154 Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Mon, 14 Jul 2025 18:57:10 +0000
Subject: [PATCH 36/60] feat: type oi1/oi2, rwr/strwr

---
 spras/omicsintegrator1.py | 103 ++++++++++++++++++++++++--------------
 spras/omicsintegrator2.py |  76 ++++++++++++++++++++--------
 spras/rwr.py              |  24 ++++++---
 spras/strwr.py            |  27 ++++++----
 4 files changed, 152 insertions(+), 78 deletions(-)

diff --git a/spras/omicsintegrator1.py b/spras/omicsintegrator1.py
index 0d3eb4bfd..7a69a01d6 100644
--- a/spras/omicsintegrator1.py
+++ b/spras/omicsintegrator1.py
@@ -1,4 +1,6 @@
 from pathlib import Path
+from pydantic import BaseModel, ConfigDict
+from typing import Optional
 
 from spras.containers import prepare_volume, run_container_and_log
 from spras.interactome import reinsert_direction_col_mixed
@@ -35,8 +37,47 @@ def write_conf(filename=Path('config.txt'), w=None, b=None, d=None, mu=None, noi
         f.write('processes = 1\n')
         f.write('threads = 1\n')
 
+class OmicsIntegrator1Params(BaseModel):
+    dummy_mode: Optional[str]
+    mu_squared: Optional[str]
+    exclude_terms: Optional[str]
 
-class OmicsIntegrator1(PRM):
+    noisy_edges: Optional[str]
+    "How many times you would like to add noise to the given edge values and re-run the algorithm."
+
+    shuffled_prizes: Optional[int]
+    "shuffled_prizes: How many times the algorithm should shuffle the prizes and re-run"
+
+    random_terminals: Optional[int]
+    "How many times to apply the given prizes to random nodes in the interactome"
+
+    seed: Optional[str]
+    "the randomness seed to use"
+
+    w: Optional[float]
+    "the number of trees"
+
+    b: Optional[str]
+    "the trade-off between including more terminals and using less reliable edges"
+
+    d: Optional[str]
+    "controls the maximum path-length from v0 to terminal nodes"
+
+    mu: Optional[float]
+    "controls the degree-based negative prizes (defualt 0.0)"
+
+    noise: Optional[str]
+    "Standard Deviation of the gaussian noise added to edges in Noisy Edges Randomizations"
+
+    g: Optional[str]
+    "(Gamma) multiplicative edge penalty from degree of endpoints"
+
+    r: Optional[str]
+    "msgsteiner parameter that adds random noise to edges, which is rarely needed because the Forest --noisyEdges option is recommended instead (default 0)"
+
+    model_config = ConfigDict(use_attribute_docstrings=True)
+
+class OmicsIntegrator1(PRM[OmicsIntegrator1Params]):
     """
     Omics Integrator 1 works with partially directed graphs
     - it takes in the universal input directly
@@ -96,27 +137,12 @@ def generate_inputs(data, filename_map):
             with open(filename_map['dummy_nodes'], mode='w'):
                 pass
 
-    # TODO add parameter validation
     # TODO add support for knockout argument
     # TODO add reasonable default values
     # TODO document required arguments
     @staticmethod
-    def run(edges=None, prizes=None, dummy_nodes=None, dummy_mode=None, mu_squared=None, exclude_terms=None,
-            output_file=None, noisy_edges=None, shuffled_prizes=None, random_terminals=None,
-            seed=None, w=None, b=None, d=None, mu=None, noise=None, g=None, r=None, container_framework="docker"):
-        """
-        Run Omics Integrator 1 in the Docker image with the provided parameters.
-        Does not support the garnet, cyto30, knockout, cv, or cv-reps arguments.
-        The configuration file is generated from the provided arguments.
-        Does not support the garnetBeta, processes, or threads configuration file parameters.
-        The msgpath is not required because msgsteiner is available in the Docker image.
-        Only the optimal forest sif file is retained.
-        All other output files are deleted.
-        @param output_file: the name of the output sif file for the optimal forest, which will overwrite any
-        existing file with this name
-        @param container_framework: choose the container runtime framework, currently supports "docker" or "singularity" (optional)
-        """
-        if edges is None or prizes is None or output_file is None or w is None or b is None or d is None:
+    def run(inputs, args, output_file, container_framework="docker"):
+        if inputs["edges"] is None or inputs["prizes"] is None or output_file is None or w is None or b is None or d is None:
             raise ValueError('Required Omics Integrator 1 arguments are missing')
 
         work_dir = '/spras'
@@ -124,10 +150,10 @@ def run(edges=None, prizes=None, dummy_nodes=None, dummy_mode=None, mu_squared=N
         # Each volume is a tuple (src, dest)
         volumes = list()
 
-        bind_path, edge_file = prepare_volume(edges, work_dir)
+        bind_path, edge_file = prepare_volume(inputs["edges"], work_dir)
         volumes.append(bind_path)
 
-        bind_path, prize_file = prepare_volume(prizes, work_dir)
+        bind_path, prize_file = prepare_volume(inputs["prizes"], work_dir)
         volumes.append(bind_path)
 
         # 4 dummy mode possibilities:
@@ -137,10 +163,10 @@ def run(edges=None, prizes=None, dummy_nodes=None, dummy_mode=None, mu_squared=N
         #   4. file -> connect the dummy node to a specific list of nodes provided in a file
 
         # add dummy node file to the volume if dummy_mode is not None and it is 'file'
-        if dummy_mode == 'file':
-            if dummy_nodes is None:
+        if args.dummy_mode == 'file':
+            if inputs["dummy_nodes"] is None:
                 raise ValueError("dummy_nodes file is required when dummy_mode is set to 'file'")
-            bind_path, dummy_file = prepare_volume(dummy_nodes, work_dir)
+            bind_path, dummy_file = prepare_volume(inputs["dummy_nodes"], work_dir)
             volumes.append(bind_path)
 
         out_dir = Path(output_file).parent
@@ -152,7 +178,8 @@ def run(edges=None, prizes=None, dummy_nodes=None, dummy_mode=None, mu_squared=N
         conf_file = 'oi1-configuration.txt'
         conf_file_local = Path(out_dir, conf_file)
         # Temporary file that will be deleted after running Omics Integrator 1
-        write_conf(conf_file_local, w=w, b=b, d=d, mu=mu, noise=noise, g=g, r=r)
+        write_conf(conf_file_local, w=args.w, b=args.b, d=args.d, mu=args.mu,
+                   noise=args.noise, g=args.g, r=args.r)
         bind_path, conf_file = prepare_volume(str(conf_file_local), work_dir)
         volumes.append(bind_path)
 
@@ -165,27 +192,27 @@ def run(edges=None, prizes=None, dummy_nodes=None, dummy_mode=None, mu_squared=N
                    '--outlabel', 'oi1']
 
         # add the dummy mode argument
-        if dummy_mode is not None and dummy_mode:
+        if args.dummy_mode is not None and args.dummy_mode:
             # for custom dummy modes, add the file
-            if dummy_mode == 'file':
-                command.extend(['--dummyMode', dummy_file])
+            if args.dummy_mode == 'file':
+                command.extend(['--dummyMode', inputs["dummy_file"]])
             # else pass in the dummy_mode and let oi1 handle it
             else:
-                command.extend(['--dummyMode', dummy_mode])
+                command.extend(['--dummyMode', args.dummy_mode])
 
         # Add optional arguments
-        if mu_squared is not None and mu_squared:
+        if args.mu_squared is not None and args.mu_squared:
             command.extend(['--musquared'])
-        if exclude_terms is not None and exclude_terms:
+        if args.exclude_terms is not None and args.exclude_terms:
             command.extend(['--excludeTerms'])
-        if noisy_edges is not None:
-            command.extend(['--noisyEdges', str(noisy_edges)])
-        if shuffled_prizes is not None:
-            command.extend(['--shuffledPrizes', str(shuffled_prizes)])
-        if random_terminals is not None:
-            command.extend(['--randomTerminals', str(random_terminals)])
-        if seed is not None:
-            command.extend(['--seed', str(seed)])
+        if args.noisy_edges is not None:
+            command.extend(['--noisyEdges', str(args.noisy_edges)])
+        if args.shuffled_prizes is not None:
+            command.extend(['--shuffledPrizes', str(args.shuffled_prizes)])
+        if args.random_terminals is not None:
+            command.extend(['--randomTerminals', str(args.random_terminals)])
+        if args.seed is not None:
+            command.extend(['--seed', str(args.seed)])
 
         container_suffix = "omics-integrator-1:no-conda" # no-conda version is the default
         run_container_and_log('Omics Integrator 1',
diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py
index 355d71bd6..42dc466cd 100644
--- a/spras/omicsintegrator2.py
+++ b/spras/omicsintegrator2.py
@@ -1,4 +1,6 @@
 from pathlib import Path
+from pydantic import BaseModel, ConfigDict
+from typing import Optional
 
 import pandas as pd
 
@@ -10,6 +12,36 @@
 
 __all__ = ['OmicsIntegrator2']
 
+class OmicsIntegrator2Params(BaseModel):
+    w: float = 6
+    "Omega: the weight of the edges connecting the dummy node to the nodes selected by dummyMode"
+
+    b: float = 1
+    "Beta: scaling factor of prizes"
+
+    g: float = 20
+    "Gamma: multiplicative edge penalty from degree of endpoints"
+
+    noise: Optional[str]
+    "Standard Deviation of the gaussian noise added to edges in Noisy Edges Randomizations."
+
+    noisy_edges: Optional[int]
+    "An integer specifying how many times to add noise to the given edge values and re-run."
+
+    random_terminals: Optional[str]
+    "An integer specifying how many times to apply your given prizes to random nodes in the interactome and re-run"
+
+    dummy_mode: Optional[str]
+    """
+    Tells the program which nodes in the interactome to connect the dummy node to. (default: terminals)
+        "terminals" = connect to all terminals
+        "others" = connect to all nodes except for terminals
+        "all" = connect to all nodes in the interactome.
+    """
+
+    seed: Optional[str]
+    "The random seed to use for this run."
+
 """
 Omics Integrator 2 will construct a fully undirected graph from the provided input file
 - in the algorithm, it uses nx.Graph() objects, which are undirected
@@ -20,11 +52,12 @@
 - the expected raw input file should have node pairs in the 1st and 2nd columns, with a weight in the 3rd column
 - it can include repeated and bidirectional edges
 """
-class OmicsIntegrator2(PRM):
+class OmicsIntegrator2(PRM[OmicsIntegrator2Params]):
     required_inputs = ['prizes', 'edges']
     # OI2 does not have a specific paper. Instead, we link to the OI1 paper.
     dois = ["10.1371/journal.pcbi.1004879"]
 
+    @staticmethod
     def generate_inputs(data: Dataset, filename_map):
         """
         Access fields from the dataset and write the required input files.
@@ -69,8 +102,7 @@ def generate_inputs(data: Dataset, filename_map):
     # TODO add reasonable default values
     # TODO document required arguments
     @staticmethod
-    def run(edges=None, prizes=None, output_file=None, w=None, b=None, g=None, noise=None, noisy_edges=None,
-            random_terminals=None, dummy_mode=None, seed=None, container_framework="docker"):
+    def run(inputs, args, output_file, container_framework="docker"):
         """
         Run Omics Integrator 2 in the Docker image with the provided parameters.
         Only the .tsv output file is retained and then renamed.
@@ -78,7 +110,7 @@ def run(edges=None, prizes=None, output_file=None, w=None, b=None, g=None, noise
         @param output_file: the name of the output file, which will overwrite any existing file with this name
         @param container_framework: choose the container runtime framework, currently supports "docker" or "singularity" (optional)
         """
-        if edges is None or prizes is None or output_file is None:
+        if inputs["edges"] is None or inputs["prizes"] is None:
             raise ValueError('Required Omics Integrator 2 arguments are missing')
 
         work_dir = '/spras'
@@ -86,10 +118,10 @@ def run(edges=None, prizes=None, output_file=None, w=None, b=None, g=None, noise
         # Each volume is a tuple (src, dest)
         volumes = list()
 
-        bind_path, edge_file = prepare_volume(edges, work_dir)
+        bind_path, edge_file = prepare_volume(inputs["edges"], work_dir)
         volumes.append(bind_path)
 
-        bind_path, prize_file = prepare_volume(prizes, work_dir)
+        bind_path, prize_file = prepare_volume(inputs["prizes"], work_dir)
         volumes.append(bind_path)
 
         out_dir = Path(output_file).parent
@@ -102,23 +134,23 @@ def run(edges=None, prizes=None, output_file=None, w=None, b=None, g=None, noise
                    '-o', mapped_out_dir, '--filename', 'oi2']
 
         # Add optional arguments
-        if w is not None:
-            command.extend(['-w', str(w)])
-        if b is not None:
-            command.extend(['-b', str(b)])
-        if g is not None:
-            command.extend(['-g', str(g)])
-        if noise is not None:
-            command.extend(['-noise', str(noise)])
-        if noisy_edges is not None:
-            command.extend(['--noisy_edges', str(noisy_edges)])
-        if random_terminals is not None:
-            command.extend(['--random_terminals', str(random_terminals)])
-        if dummy_mode is not None:
+        if args.w is not None:
+            command.extend(['-w', str(args.w)])
+        if args.w is not None:
+            command.extend(['-b', str(args.b)])
+        if args.w is not None:
+            command.extend(['-g', str(args.g)])
+        if args.noise is not None:
+            command.extend(['-noise', str(args.noise)])
+        if args.noisy_edges is not None:
+            command.extend(['--noisy_edges', str(args.noisy_edges)])
+        if args.random_terminals is not None:
+            command.extend(['--random_terminals', str(args.random_terminals)])
+        if args.dummy_mode is not None:
             # This argument does not follow the other naming conventions
-            command.extend(['--dummyMode', str(dummy_mode)])
-        if seed is not None:
-            command.extend(['--seed', str(seed)])
+            command.extend(['--dummyMode', str(args.dummy_mode)])
+        if args.seed is not None:
+            command.extend(['--seed', str(args.seed)])
 
         container_suffix = "omics-integrator-2:v2"
         run_container_and_log('Omics Integrator 2',
diff --git a/spras/rwr.py b/spras/rwr.py
index 5c08d6777..12fc5d422 100644
--- a/spras/rwr.py
+++ b/spras/rwr.py
@@ -1,4 +1,6 @@
 from pathlib import Path
+from pydantic import BaseModel, ConfigDict
+from typing import Optional
 
 import pandas as pd
 
@@ -10,7 +12,13 @@
 
 __all__ = ['RWR']
 
-class RWR(PRM):
+class RWRParams(BaseModel):
+    threshold: Optional[int]
+    alpha: Optional[float]
+
+    model_config = ConfigDict(use_attribute_docstrings=True)
+
+class RWR(PRM[RWRParams]):
     required_inputs = ['network','nodes']
     dois = []
 
@@ -34,11 +42,11 @@ def generate_inputs(data, filename_map):
         edges.to_csv(filename_map['network'],sep='|',index=False,columns=['Interactor1','Interactor2'],header=False)
 
     @staticmethod
-    def run(network=None, nodes=None, alpha=None, output_file=None, container_framework="docker", threshold=None):
-        if not nodes:
+    def run(inputs, args, output_file, container_framework="docker"):
+        if not inputs["nodes"] or not inputs["network"]:
             raise ValueError('Required RWR arguments are missing')
 
-        with Path(network).open() as network_f:
+        with Path(inputs["network"]).open() as network_f:
             for line in network_f:
                 line = line.strip()
                 endpoints = line.split("|")
@@ -49,10 +57,10 @@ def run(network=None, nodes=None, alpha=None, output_file=None, container_framew
         # Each volume is a tuple (src, dest)
         volumes = list()
 
-        bind_path, nodes_file = prepare_volume(nodes, work_dir)
+        bind_path, nodes_file = prepare_volume(inputs["nodes"], work_dir)
         volumes.append(bind_path)
 
-        bind_path, network_file = prepare_volume(network, work_dir)
+        bind_path, network_file = prepare_volume(inputs["network"], work_dir)
         volumes.append(bind_path)
 
         # RWR does not provide an argument to set the output directory
@@ -70,8 +78,8 @@ def run(network=None, nodes=None, alpha=None, output_file=None, container_framew
                    '--output', mapped_out_prefix]
 
         # Add alpha as an optional argument
-        if alpha is not None:
-            command.extend(['--alpha', str(alpha)])
+        if args.alpha is not None:
+            command.extend(['--alpha', str(args.alpha)])
 
         container_suffix = 'rwr:v1'
         out = run_container(container_framework,
diff --git a/spras/strwr.py b/spras/strwr.py
index fc8536507..6693d7f5e 100644
--- a/spras/strwr.py
+++ b/spras/strwr.py
@@ -1,6 +1,7 @@
 from pathlib import Path
-
 import pandas as pd
+from pydantic import BaseModel, ConfigDict
+from typing import Optional
 
 from spras.containers import prepare_volume, run_container
 from spras.dataset import Dataset
@@ -10,8 +11,14 @@
 
 __all__ = ['ST_RWR']
 
+class ST_RWRParams(BaseModel):
+    threshold: Optional[int]
+    alpha: Optional[float]
+
+    model_config = ConfigDict(use_attribute_docstrings=True)
+
 # Note: This class is almost identical to the rwr.py file.
-class ST_RWR(PRM):
+class ST_RWR(PRM[ST_RWRParams]):
     required_inputs = ['network','sources','targets']
     dois = []
 
@@ -36,11 +43,11 @@ def generate_inputs(data, filename_map):
         edges.to_csv(filename_map['network'],sep='|',index=False,columns=['Interactor1','Interactor2'],header=False)
 
     @staticmethod
-    def run(network=None, sources=None, targets=None, alpha=None, output_file=None, container_framework="docker", threshold=None):
-        if not sources or not targets or not network or not output_file:
+    def run(inputs, args, output_file, container_framework="docker"):
+        if not inputs["sources"] or not inputs["targets"] or not inputs["network"] or not output_file:
             raise ValueError('Required local_neighborhood arguments are missing')
 
-        with Path(network).open() as network_f:
+        with Path(inputs["network"]).open() as network_f:
             for line in network_f:
                 line = line.strip()
                 endpoints = line.split("|")
@@ -52,13 +59,13 @@ def run(network=None, sources=None, targets=None, alpha=None, output_file=None,
         # Each volume is a tuple (src, dest)
         volumes = list()
 
-        bind_path, source_file = prepare_volume(sources, work_dir)
+        bind_path, source_file = prepare_volume(inputs["sources"], work_dir)
         volumes.append(bind_path)
 
-        bind_path, target_file = prepare_volume(targets, work_dir)
+        bind_path, target_file = prepare_volume(inputs["targets"], work_dir)
         volumes.append(bind_path)
 
-        bind_path, network_file = prepare_volume(network, work_dir)
+        bind_path, network_file = prepare_volume(inputs["network"], work_dir)
         volumes.append(bind_path)
 
         # ST_RWR does not provide an argument to set the output directory
@@ -77,8 +84,8 @@ def run(network=None, sources=None, targets=None, alpha=None, output_file=None,
                    '--output', mapped_out_prefix]
 
         # Add alpha as an optional argument
-        if alpha is not None:
-            command.extend(['--alpha', str(alpha)])
+        if args.alpha is not None:
+            command.extend(['--alpha', str(args.alpha)])
 
         container_suffix = 'st-rwr:v1'
         out = run_container(container_framework,

From 94b50c81fc1da7577c4f056336cb89411676da8e Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Mon, 14 Jul 2025 12:27:46 -0700
Subject: [PATCH 37/60] refactor: meo, mcf, pl types

---
 spras/meo.py         | 32 ++++++++++++++++++++++++--------
 spras/mincostflow.py | 42 +++++++++++++++++++++---------------------
 spras/pathlinker.py  | 24 ++++++++++++++++--------
 3 files changed, 61 insertions(+), 37 deletions(-)

diff --git a/spras/meo.py b/spras/meo.py
index d4d79bf9f..06f041786 100644
--- a/spras/meo.py
+++ b/spras/meo.py
@@ -1,5 +1,7 @@
 import os
 from pathlib import Path
+from pydantic import BaseModel, ConfigDict
+from typing import Optional
 
 from spras.containers import prepare_volume, run_container_and_log
 from spras.interactome import (
@@ -65,6 +67,21 @@ def write_properties(filename=Path('properties.txt'), edges=None, sources=None,
 
         # Do not need csp.phase, csp.gen.file, or csp.sol.file because MAXCSP is not supported
 
+class MEOParams(BaseModel):
+    max_path_length: Optional[str]
+    "the maximal length of a path from sources and targets to orient."
+
+    local_search: Optional[str]
+    """
+    a "Yes"/"No" parameter that enables MEO's local search functionality.
+    See "Improving approximations with local search" in the associated paper
+    for more information.
+    """
+    
+    rand_restarts: Optional[int]
+    "The number of random restarts to do."
+
+    model_config = ConfigDict(use_attribute_docstrings=True)
 
 """
 MEO can support partially directed graphs
@@ -82,7 +99,7 @@ def write_properties(filename=Path('properties.txt'), edges=None, sources=None,
 """
 
 
-class MEO(PRM):
+class MEO(PRM[MEOParams]):
     required_inputs = ['sources', 'targets', 'edges']
     dois = ["10.1093/nar/gkq1207"]
 
@@ -126,8 +143,7 @@ def generate_inputs(data, filename_map):
     # TODO add parameter validation
     # TODO document required arguments
     @staticmethod
-    def run(edges=None, sources=None, targets=None, output_file=None, max_path_length=None, local_search=None,
-            rand_restarts=None, container_framework="docker"):
+    def run(inputs, args, output_file=None, container_framework="docker"):
         """
         Run Maximum Edge Orientation in the Docker image with the provided parameters.
         The properties file is generated from the provided arguments.
@@ -138,7 +154,7 @@ def run(edges=None, sources=None, targets=None, output_file=None, max_path_lengt
         @param output_file: the name of the output edge file, which will overwrite any existing file with this name
         @param container_framework: choose the container runtime framework, currently supports "docker" or "singularity" (optional)
         """
-        if edges is None or sources is None or targets is None or output_file is None:
+        if inputs["edges"] is None or inputs["sources"] is None or inputs["targets"] is None:
             raise ValueError('Required Maximum Edge Orientation arguments are missing')
 
         work_dir = '/spras'
@@ -146,13 +162,13 @@ def run(edges=None, sources=None, targets=None, output_file=None, max_path_lengt
         # Each volume is a tuple (src, dest)
         volumes = list()
 
-        bind_path, edge_file = prepare_volume(edges, work_dir)
+        bind_path, edge_file = prepare_volume(inputs["edges"], work_dir)
         volumes.append(bind_path)
 
-        bind_path, source_file = prepare_volume(sources, work_dir)
+        bind_path, source_file = prepare_volume(inputs["sources"], work_dir)
         volumes.append(bind_path)
 
-        bind_path, target_file = prepare_volume(targets, work_dir)
+        bind_path, target_file = prepare_volume(inputs["targets"], work_dir)
         volumes.append(bind_path)
 
         out_dir = Path(output_file).parent
@@ -171,7 +187,7 @@ def run(edges=None, sources=None, targets=None, output_file=None, max_path_lengt
         properties_file_local = Path(out_dir, properties_file)
         write_properties(filename=properties_file_local, edges=edge_file, sources=source_file, targets=target_file,
                          edge_output=mapped_output_file, path_output=mapped_path_output,
-                         max_path_length=max_path_length, local_search=local_search, rand_restarts=rand_restarts, framework=container_framework)
+                         max_path_length=args.max_path_length, local_search=args.local_search, rand_restarts=args.rand_restarts, framework=container_framework)
         bind_path, properties_file = prepare_volume(str(properties_file_local), work_dir)
         volumes.append(bind_path)
 
diff --git a/spras/mincostflow.py b/spras/mincostflow.py
index d2d458b02..77f493f14 100644
--- a/spras/mincostflow.py
+++ b/spras/mincostflow.py
@@ -1,4 +1,6 @@
 from pathlib import Path
+from pydantic import BaseModel, ConfigDict
+from typing import Optional
 
 from spras.containers import prepare_volume, run_container_and_log
 from spras.interactome import (
@@ -10,6 +12,15 @@
 
 __all__ = ['MinCostFlow']
 
+class MinCostFlowParams(BaseModel):
+    flow: Optional[float]
+    "amount of flow going through the graph"
+
+    capacity: Optional[float]
+    "amount of capacity allowed on each edge"
+
+    model_config = ConfigDict(use_attribute_docstrings=True)
+
 """
 MinCostFlow deals with fully directed graphs
 - OR Tools MCF is designed for directed graphs
@@ -22,7 +33,7 @@
 - the expected raw input file should have node pairs in the 1st and 2nd columns, with the weight in the 3rd column
 - it can include repeated and bidirectional edges
 """
-class MinCostFlow (PRM):
+class MinCostFlow(PRM[MinCostFlowParams]):
     required_inputs = ['sources', 'targets', 'edges']
     dois = ["10.1038/s41540-020-00167-1"]
 
@@ -60,20 +71,9 @@ def generate_inputs(data, filename_map):
                      header=False)
 
     @staticmethod
-    def run(sources=None, targets=None, edges=None, output_file=None, flow=None, capacity=None, container_framework="docker"):
-        """
-        Run min cost flow with Docker (or singularity)
-        @param sources: input sources (required)
-        @param targets: input targets (required)
-        @param edges: input network file (required)
-        @param output_file: output file name (required)
-        @param flow: amount of flow going through the graph (optional)
-        @param capacity: amount of capacity allowed on each edge (optional)
-        @param container_framework: choose the container runtime framework, currently supports "docker" or "singularity" (optional)
-        """
-
+    def run(inputs, args, output_file, container_framework="docker"):
         # ensures that these parameters are required
-        if not sources or not targets or not edges or not output_file:
+        if not inputs["sources"] or not inputs["targets"] or not inputs["edges"]:
             raise ValueError('Required MinCostFlow arguments are missing')
 
         # the data files will be mapped within this directory within the container
@@ -82,13 +82,13 @@ def run(sources=None, targets=None, edges=None, output_file=None, flow=None, cap
         # the tuple is for mapping the sources, targets, edges, and output
         volumes = list()
 
-        bind_path, sources_file = prepare_volume(sources, work_dir)
+        bind_path, sources_file = prepare_volume(inputs["sources"], work_dir)
         volumes.append(bind_path)
 
-        bind_path, targets_file = prepare_volume(targets, work_dir)
+        bind_path, targets_file = prepare_volume(inputs["targets"], work_dir)
         volumes.append(bind_path)
 
-        bind_path, edges_file = prepare_volume(edges, work_dir)
+        bind_path, edges_file = prepare_volume(inputs["edges"], work_dir)
         volumes.append(bind_path)
 
         # Create a prefix for the output filename and ensure the directory exists
@@ -107,10 +107,10 @@ def run(sources=None, targets=None, edges=None, output_file=None, flow=None, cap
                     '--output', mapped_out_prefix]
 
         # Optional arguments (extend the command if available)
-        if flow is not None:
-            command.extend(['--flow', str(flow)])
-        if capacity is not None:
-            command.extend(['--capacity', str(capacity)])
+        if args.flow is not None:
+            command.extend(['--flow', str(args.flow)])
+        if args.capacity is not None:
+            command.extend(['--capacity', str(args.capacity)])
 
         # choosing to run in docker or singularity container
         container_suffix = "mincostflow"
diff --git a/spras/pathlinker.py b/spras/pathlinker.py
index d0504c489..8852b959a 100644
--- a/spras/pathlinker.py
+++ b/spras/pathlinker.py
@@ -1,5 +1,7 @@
 import warnings
 from pathlib import Path
+from pydantic import BaseModel, ConfigDict
+from typing import Optional
 
 from spras.containers import prepare_volume, run_container_and_log
 from spras.dataset import Dataset
@@ -12,6 +14,12 @@
 
 __all__ = ['PathLinker']
 
+class PathLinkerParams(BaseModel):
+    k: Optional[int]
+    "path length (optional)"
+
+    model_config = ConfigDict(use_attribute_docstrings=True)
+
 """
 Pathlinker will construct a fully directed graph from the provided input file
 - an edge is represented with a head and tail node, which represents the direction of the interation between two nodes
@@ -22,7 +30,7 @@
 - the expected raw input file should have node pairs in the 1st and 2nd columns, with a weight in the 3rd column
 - it can include repeated and bidirectional edges
 """
-class PathLinker(PRM):
+class PathLinker(PRM[PathLinkerParams]):
     required_inputs = ['nodetypes', 'network']
     dois = ["10.1038/npjsba.2016.2", "10.1089/cmb.2012.0274"]
 
@@ -68,20 +76,20 @@ def generate_inputs(data, filename_map):
 
     # Skips parameter validation step
     @staticmethod
-    def run(nodetypes=None, network=None, output_file=None, k=None, container_framework="docker"):
+    def run(inputs, args, output_file, container_framework="docker"):
         """
         Run PathLinker with Docker
         @param nodetypes:  input node types with sources and targets (required)
         @param network:  input network file (required)
         @param output_file: path to the output pathway file (required)
-        @param k: path length (optional)
+        @param k:
         @param container_framework: choose the container runtime framework, currently supports "docker" or "singularity" (optional)
         """
         # Add additional parameter validation
         # Do not require k
         # Use the PathLinker default
         # Could consider setting the default here instead
-        if not nodetypes or not network or not output_file:
+        if not inputs["nodetypes"] or not inputs["network"]:
             raise ValueError('Required PathLinker arguments are missing')
 
         work_dir = '/spras'
@@ -89,10 +97,10 @@ def run(nodetypes=None, network=None, output_file=None, k=None, container_framew
         # Each volume is a tuple (src, dest)
         volumes = list()
 
-        bind_path, node_file = prepare_volume(nodetypes, work_dir)
+        bind_path, node_file = prepare_volume(inputs["nodetypes"], work_dir)
         volumes.append(bind_path)
 
-        bind_path, network_file = prepare_volume(network, work_dir)
+        bind_path, network_file = prepare_volume(inputs["network"], work_dir)
         volumes.append(bind_path)
 
         # PathLinker does not provide an argument to set the output directory
@@ -111,8 +119,8 @@ def run(nodetypes=None, network=None, output_file=None, k=None, container_framew
                    '--output', mapped_out_prefix]
 
         # Add optional argument
-        if k is not None:
-            command.extend(['-k', str(k)])
+        if args.k is not None:
+            command.extend(['-k', str(args.k)])
 
         container_suffix = "pathlinker:v2"
         run_container_and_log('PathLinker',

From 09fa1bac0301b54842f05d284ba3b0a2390e3d11 Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Mon, 14 Jul 2025 12:51:33 -0700
Subject: [PATCH 38/60] chore: begin slowly updating

---
 spras/allpairs.py              |   2 +-
 spras/btb.py                   |   2 +-
 spras/containers.py            |   4 +-
 spras/domino.py                |   8 +-
 spras/meo.py                   |  17 ++---
 spras/mincostflow.py           |   6 +-
 spras/omicsintegrator1.py      |   2 +-
 spras/omicsintegrator2.py      |   3 +-
 spras/pathlinker.py            |   3 +-
 spras/prm.py                   |   3 +-
 spras/rwr.py                   |   2 +-
 spras/strwr.py                 |   2 +-
 test/AllPairs/test_ap.py       |  65 +++++++---------
 test/BowTieBuilder/test_btb.py | 133 ++++++++++++++++-----------------
 test/DOMINO/test_domino.py     |  38 ++++------
 test/MEO/test_meo.py           |  30 ++++----
 16 files changed, 149 insertions(+), 171 deletions(-)

diff --git a/spras/allpairs.py b/spras/allpairs.py
index b1ffe2ee9..15a3b17f7 100644
--- a/spras/allpairs.py
+++ b/spras/allpairs.py
@@ -72,7 +72,7 @@ def generate_inputs(data: Dataset, filename_map):
                                       header=["#Interactor1", "Interactor2", "Weight"])
 
     @staticmethod
-    def run(inputs, args, output_file, container_framework="docker"):
+    def run(inputs, output_file, args=Empty(), container_framework="docker"):
         """
         Run All Pairs Shortest Paths with Docker
         @param nodetypes: input node types with sources and targets (required)
diff --git a/spras/btb.py b/spras/btb.py
index a4098ee08..6ad3afb69 100644
--- a/spras/btb.py
+++ b/spras/btb.py
@@ -65,7 +65,7 @@ def generate_inputs(data, filename_map):
 
     # Skips parameter validation step
     @staticmethod
-    def run(inputs, args, output_file, container_framework="docker"):
+    def run(inputs, output_file, args=Empty(), container_framework="docker"):
         # Tests for pytest (docker container also runs this)
         # Testing out here avoids the trouble that container errors provide
 
diff --git a/spras/containers.py b/spras/containers.py
index 9a1568fdd..314d4bb45 100644
--- a/spras/containers.py
+++ b/spras/containers.py
@@ -369,7 +369,7 @@ def run_container_singularity(container: str, command: List[str], volumes: List[
 
 
 # Because this is called independently for each file, the same local path can be mounted to multiple volumes
-def prepare_volume(filename: Union[str, PurePath], volume_base: Union[str, PurePath]) -> Tuple[Tuple[PurePath, PurePath], str]:
+def prepare_volume(filename: Union[str, os.PathLike], volume_base: Union[str, PurePath]) -> Tuple[Tuple[PurePath, PurePath], str]:
     """
     Makes a file on the local file system accessible within a container by mapping the local (source) path to a new
     container (destination) path and renaming the file to be relative to the destination path.
@@ -385,7 +385,7 @@ def prepare_volume(filename: Union[str, PurePath], volume_base: Union[str, PureP
     if not base_path.is_absolute():
         raise ValueError(f'Volume base must be an absolute path: {volume_base}')
 
-    if isinstance(filename, PurePath):
+    if isinstance(filename, os.PathLike):
         filename = str(filename)
 
     filename_hash = hash_filename(filename, config.config.hash_length)
diff --git a/spras/domino.py b/spras/domino.py
index a70f1a1e3..187e53836 100644
--- a/spras/domino.py
+++ b/spras/domino.py
@@ -13,16 +13,16 @@
 from spras.prm import PRM
 from spras.util import duplicate_edges
 
-__all__ = ['DOMINO', 'pre_domino_id_transform', 'post_domino_id_transform']
+__all__ = ['DOMINO', 'DominoParams', 'pre_domino_id_transform', 'post_domino_id_transform']
 
 ID_PREFIX = 'ENSG0'
 ID_PREFIX_LEN = len(ID_PREFIX)
 
 class DominoParams(BaseModel):
-    module_threshold: Optional[float]
+    module_threshold: Optional[float] = None
     "the p-value threshold for considering a slice as relevant (optional)"
 
-    slice_threshold: Optional[float]
+    slice_threshold: Optional[float] = None
     "the p-value threshold for considering a putative module as final module (optional)"
 
     model_config = ConfigDict(use_attribute_docstrings=True)
@@ -76,7 +76,7 @@ def generate_inputs(data, filename_map):
                         header=['ID_interactor_A', 'ppi', 'ID_interactor_B'])
 
     @staticmethod
-    def run(inputs, args, output_file, container_framework="docker"):
+    def run(inputs, output_file, args=DominoParams(), container_framework="docker"):
         # Let visualization be always true, parallelization be always 1 thread, and use_cache be always false.
         if not inputs["network"] or not inputs["active_genes"]:
             raise ValueError('Required DOMINO arguments are missing')
diff --git a/spras/meo.py b/spras/meo.py
index 06f041786..0451cb4c0 100644
--- a/spras/meo.py
+++ b/spras/meo.py
@@ -11,7 +11,7 @@
 from spras.prm import PRM
 from spras.util import add_rank_column, duplicate_edges, raw_pathway_df
 
-__all__ = ['MEO', 'write_properties']
+__all__ = ['MEO', 'MEOParams', 'write_properties']
 
 # replaces all underscores in the node names with unicode seperator
 # MEO keeps only the substring up to the first underscore when parsing node names
@@ -58,7 +58,8 @@ def write_properties(filename=Path('properties.txt'), edges=None, sources=None,
         if max_path_length is not None:
             f.write(f'max.path.length = {max_path_length}\n')
         if local_search is not None:
-            f.write(f'local.search = {local_search}\n')
+            # Yes/No for this parameter.
+            f.write(f'local.search = {"Yes" if local_search else "No"}\n')
         if rand_restarts is not None:
             f.write(f'rand.restarts = {rand_restarts}\n')
 
@@ -68,17 +69,17 @@ def write_properties(filename=Path('properties.txt'), edges=None, sources=None,
         # Do not need csp.phase, csp.gen.file, or csp.sol.file because MAXCSP is not supported
 
 class MEOParams(BaseModel):
-    max_path_length: Optional[str]
+    max_path_length: Optional[int] = None
     "the maximal length of a path from sources and targets to orient."
 
-    local_search: Optional[str]
+    local_search: Optional[bool] = None
     """
-    a "Yes"/"No" parameter that enables MEO's local search functionality.
+    a boolean parameter that enables MEO's local search functionality.
     See "Improving approximations with local search" in the associated paper
     for more information.
     """
     
-    rand_restarts: Optional[int]
+    rand_restarts: Optional[int] = None
     "The number of random restarts to do."
 
     model_config = ConfigDict(use_attribute_docstrings=True)
@@ -143,7 +144,7 @@ def generate_inputs(data, filename_map):
     # TODO add parameter validation
     # TODO document required arguments
     @staticmethod
-    def run(inputs, args, output_file=None, container_framework="docker"):
+    def run(inputs, args=MEOParams(), output_file=None, container_framework="docker"):
         """
         Run Maximum Edge Orientation in the Docker image with the provided parameters.
         The properties file is generated from the provided arguments.
@@ -151,8 +152,6 @@ def run(inputs, args, output_file=None, container_framework="docker"):
         Does not support MINSAT or MAXCSP.
         Only the edge output file is retained.
         All other output files are deleted.
-        @param output_file: the name of the output edge file, which will overwrite any existing file with this name
-        @param container_framework: choose the container runtime framework, currently supports "docker" or "singularity" (optional)
         """
         if inputs["edges"] is None or inputs["sources"] is None or inputs["targets"] is None:
             raise ValueError('Required Maximum Edge Orientation arguments are missing')
diff --git a/spras/mincostflow.py b/spras/mincostflow.py
index 77f493f14..986c1c8eb 100644
--- a/spras/mincostflow.py
+++ b/spras/mincostflow.py
@@ -13,10 +13,10 @@
 __all__ = ['MinCostFlow']
 
 class MinCostFlowParams(BaseModel):
-    flow: Optional[float]
+    flow: Optional[float] = None
     "amount of flow going through the graph"
 
-    capacity: Optional[float]
+    capacity: Optional[float] = None
     "amount of capacity allowed on each edge"
 
     model_config = ConfigDict(use_attribute_docstrings=True)
@@ -71,7 +71,7 @@ def generate_inputs(data, filename_map):
                      header=False)
 
     @staticmethod
-    def run(inputs, args, output_file, container_framework="docker"):
+    def run(inputs, output_file, args=MinCostFlowParams(), container_framework="docker"):
         # ensures that these parameters are required
         if not inputs["sources"] or not inputs["targets"] or not inputs["edges"]:
             raise ValueError('Required MinCostFlow arguments are missing')
diff --git a/spras/omicsintegrator1.py b/spras/omicsintegrator1.py
index 7a69a01d6..3361f5d2a 100644
--- a/spras/omicsintegrator1.py
+++ b/spras/omicsintegrator1.py
@@ -141,7 +141,7 @@ def generate_inputs(data, filename_map):
     # TODO add reasonable default values
     # TODO document required arguments
     @staticmethod
-    def run(inputs, args, output_file, container_framework="docker"):
+    def run(inputs, output_file, args, container_framework="docker"):
         if inputs["edges"] is None or inputs["prizes"] is None or output_file is None or w is None or b is None or d is None:
             raise ValueError('Required Omics Integrator 1 arguments are missing')
 
diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py
index 42dc466cd..20351833e 100644
--- a/spras/omicsintegrator2.py
+++ b/spras/omicsintegrator2.py
@@ -98,11 +98,10 @@ def generate_inputs(data: Dataset, filename_map):
         edges_df.to_csv(filename_map['edges'], sep='\t', index=False, columns=['Interactor1', 'Interactor2', 'cost'],
                         header=['protein1', 'protein2', 'cost'])
 
-    # TODO add parameter validation
     # TODO add reasonable default values
     # TODO document required arguments
     @staticmethod
-    def run(inputs, args, output_file, container_framework="docker"):
+    def run(inputs, output_file, args, container_framework="docker"):
         """
         Run Omics Integrator 2 in the Docker image with the provided parameters.
         Only the .tsv output file is retained and then renamed.
diff --git a/spras/pathlinker.py b/spras/pathlinker.py
index 8852b959a..3c78ffb84 100644
--- a/spras/pathlinker.py
+++ b/spras/pathlinker.py
@@ -74,9 +74,8 @@ def generate_inputs(data, filename_map):
         edges.to_csv(filename_map["network"],sep="\t",index=False,columns=["Interactor1","Interactor2","Weight"],
                      header=["#Interactor1","Interactor2","Weight"])
 
-    # Skips parameter validation step
     @staticmethod
-    def run(inputs, args, output_file, container_framework="docker"):
+    def run(inputs, output_file, args, container_framework="docker"):
         """
         Run PathLinker with Docker
         @param nodetypes:  input node types with sources and targets (required)
diff --git a/spras/prm.py b/spras/prm.py
index 06d005b2a..1692f11f6 100644
--- a/spras/prm.py
+++ b/spras/prm.py
@@ -1,6 +1,7 @@
 from abc import ABC, abstractmethod
 from pydantic import BaseModel
 from typing import Any, cast, TypeVar, Generic
+import os
 
 from spras.dataset import Dataset
 
@@ -40,7 +41,7 @@ def generate_inputs(data: Dataset, filename_map: dict[str, str]):
 
     @staticmethod
     @abstractmethod
-    def run(inputs: dict[str, str], args: T, output_file: str, container_framework="docker"):
+    def run(inputs: dict[str, str | os.PathLike], output_file: str | os.PathLike, args: T, container_framework="docker"):
         """
         Runs an algorithm with the specified inputs, algorithm params (T),
         the designated output_file, and the desired container_framework.
diff --git a/spras/rwr.py b/spras/rwr.py
index 12fc5d422..12df71e01 100644
--- a/spras/rwr.py
+++ b/spras/rwr.py
@@ -42,7 +42,7 @@ def generate_inputs(data, filename_map):
         edges.to_csv(filename_map['network'],sep='|',index=False,columns=['Interactor1','Interactor2'],header=False)
 
     @staticmethod
-    def run(inputs, args, output_file, container_framework="docker"):
+    def run(inputs, output_file, args, container_framework="docker"):
         if not inputs["nodes"] or not inputs["network"]:
             raise ValueError('Required RWR arguments are missing')
 
diff --git a/spras/strwr.py b/spras/strwr.py
index 6693d7f5e..c603f9196 100644
--- a/spras/strwr.py
+++ b/spras/strwr.py
@@ -43,7 +43,7 @@ def generate_inputs(data, filename_map):
         edges.to_csv(filename_map['network'],sep='|',index=False,columns=['Interactor1','Interactor2'],header=False)
 
     @staticmethod
-    def run(inputs, args, output_file, container_framework="docker"):
+    def run(inputs, output_file, args, container_framework="docker"):
         if not inputs["sources"] or not inputs["targets"] or not inputs["network"] or not output_file:
             raise ValueError('Required local_neighborhood arguments are missing')
 
diff --git a/test/AllPairs/test_ap.py b/test/AllPairs/test_ap.py
index 8d094561f..ee76d0ce7 100644
--- a/test/AllPairs/test_ap.py
+++ b/test/AllPairs/test_ap.py
@@ -45,11 +45,10 @@ def test_allpairs(self):
         out_path = OUT_DIR.joinpath('sample-out.txt')
         out_path.unlink(missing_ok=True)
         # Only include required arguments
-        AllPairs.run(
-            nodetypes=str(TEST_DIR / 'input' / 'sample-in-nodetypes.txt'),
-            network=str(TEST_DIR / 'input' / 'sample-in-net.txt'),
-            directed_flag=str(TEST_DIR / 'input' / 'directed-flag-false.txt'),
-            output_file=str(out_path)
+        AllPairs.run({"nodetypes": str(TEST_DIR / 'input' / 'sample-in-nodetypes.txt'),
+                      "network": str(TEST_DIR / 'input' / 'sample-in-net.txt'),
+                      "directed_flag": str(TEST_DIR / 'input' / 'directed-flag-false.txt')},
+                     output_file=str(out_path)
         )
         assert out_path.exists()
 
@@ -57,9 +56,8 @@ def test_allpairs_missing(self):
         # Test the expected error is raised when required arguments are missing
         with pytest.raises(ValueError):
             # No nodetypes
-            AllPairs.run(
-                network=str(TEST_DIR / 'input' / 'sample-in-net.txt'),
-                output_file=str(OUT_DIR / 'sample-out.txt'))
+            AllPairs.run({"network": str(TEST_DIR / 'input' / 'sample-in-net.txt')},
+                         output_file=str(OUT_DIR / 'sample-out.txt'))
 
     # Only run Singularity test if the binary is available on the system
     # spython is only available on Unix, but do not explicitly skip non-Unix platforms
@@ -68,12 +66,11 @@ def test_allpairs_singularity(self):
         out_path = OUT_DIR / 'sample-out.txt'
         out_path.unlink(missing_ok=True)
         # Only include required arguments and run with Singularity
-        AllPairs.run(
-            nodetypes=str(TEST_DIR / 'input' / 'sample-in-nodetypes.txt'),
-            network=str(TEST_DIR / 'input' / 'sample-in-net.txt'),
-            directed_flag=str(TEST_DIR / 'input' / 'directed-flag-false.txt'),
-            output_file=str(out_path),
-            container_framework="singularity")
+        AllPairs.run({"nodetypes": str(TEST_DIR / 'input' / 'sample-in-nodetypes.txt'),
+                      "network": str(TEST_DIR / 'input' / 'sample-in-net.txt'),
+                      "directed_flag": str(TEST_DIR / 'input' / 'directed-flag-false.txt')},
+                     output_file=str(out_path),
+                     container_framework="singularity")
         assert out_path.exists()
 
     @pytest.mark.skipif(not shutil.which('singularity'), reason='Singularity not found on system')
@@ -82,12 +79,11 @@ def test_allpairs_singularity_unpacked(self):
         out_path.unlink(missing_ok=True)
         # Indicate via config mechanism that we want to unpack the Singularity container
         config.config.unpack_singularity = True
-        AllPairs.run(
-            nodetypes=str(TEST_DIR / 'input/sample-in-nodetypes.txt'),
-            network=str(TEST_DIR / 'input/sample-in-net.txt'),
-            directed_flag=str(TEST_DIR / 'input' / 'directed-flag-false.txt'),
-            output_file=str(out_path),
-            container_framework="singularity")
+        AllPairs.run({"nodetypes": str(TEST_DIR / 'input/sample-in-nodetypes.txt'),
+                      "network": str(TEST_DIR / 'input/sample-in-net.txt'),
+                      "directed_flag": str(TEST_DIR / 'input' / 'directed-flag-false.txt')},
+                     output_file=str(out_path),
+                     container_framework="singularity")
         config.config.unpack_singularity = False
         assert out_path.exists()
 
@@ -104,12 +100,10 @@ def test_allpairs_correctness(self):
         out_path = OUT_DIR / 'correctness-out.txt'
         out_path.unlink(missing_ok=True)
 
-        AllPairs.run(
-            nodetypes=str(TEST_DIR / 'input' / 'correctness-nodetypes.txt'),
-            network=str(TEST_DIR / 'input' / 'correctness-network.txt'),
-            directed_flag=str(TEST_DIR / 'input' / 'directed-flag-false.txt'),
-            output_file=str(OUT_DIR / 'correctness-out.txt')
-        )
+        AllPairs.run({"nodetypes": TEST_DIR / 'input' / 'correctness-nodetypes.txt',
+                      "network": TEST_DIR / 'input' / 'correctness-network.txt',
+                      "directed_flag": TEST_DIR / 'input' / 'directed-flag-false.txt'},
+                     output_file=OUT_DIR / 'correctness-out.txt')
 
         edge_equality_test_util(out_path, EXPECTED_DIR / 'correctness-expected.txt')
 
@@ -117,12 +111,10 @@ def test_allpairs_directed(self):
         out_path = OUT_DIR / 'directed-out.txt'
         out_path.unlink(missing_ok=True)
 
-        AllPairs.run(
-            nodetypes=str(TEST_DIR / 'input' / 'directed-nodetypes.txt'),
-            network=str(TEST_DIR / 'input' / 'directed-network.txt'),
-            directed_flag=str(TEST_DIR / 'input' / 'directed-flag-true.txt'),
-            output_file=str(OUT_DIR / 'directed-out.txt'),
-        )
+        AllPairs.run({"nodetypes": TEST_DIR / 'input' / 'directed-nodetypes.txt',
+                      "network": TEST_DIR / 'input' / 'directed-network.txt',
+                      "directed_flag": TEST_DIR / 'input' / 'directed-flag-true.txt'},
+                     output_file=OUT_DIR / 'directed-out.txt')
 
         edge_equality_test_util(out_path, EXPECTED_DIR.joinpath('directed-expected.txt'))
 
@@ -136,11 +128,10 @@ def test_allpairs_zero_length(self):
         out_path = OUT_DIR / 'zero-length-out.txt'
         out_path.unlink(missing_ok=True)
 
-        AllPairs.run(
-            nodetypes=TEST_DIR / 'input' / 'zero-length-nodetypes.txt',
-            network=TEST_DIR / 'input' / 'zero-length-network.txt',
-            directed_flag=str(TEST_DIR / 'input' / 'directed-flag-false.txt'),
-            output_file=OUT_DIR / 'zero-length-out.txt'
+        AllPairs.run({"nodetypes": TEST_DIR / 'input' / 'zero-length-nodetypes.txt',
+                      "network": TEST_DIR / 'input' / 'zero-length-network.txt',
+                      "directed_flag": TEST_DIR / 'input' / 'directed-flag-false.txt'},
+                     output_file=OUT_DIR / 'zero-length-out.txt'
         )
 
         assert filecmp.cmp(OUT_DIR / 'zero-length-out.txt', EXPECTED_DIR / 'zero-length-expected.txt', shallow=False)
diff --git a/test/BowTieBuilder/test_btb.py b/test/BowTieBuilder/test_btb.py
index d4a458b3c..c65ce4a32 100644
--- a/test/BowTieBuilder/test_btb.py
+++ b/test/BowTieBuilder/test_btb.py
@@ -25,22 +25,19 @@ class TestBowTieBuilder:
     def test_btb_missing(self):
         with pytest.raises(ValueError):
             # No edges
-            BTB.run(
-                           targets=Path(TEST_DIR, 'input', 'target.txt'),
-                           sources=Path(TEST_DIR, 'input', 'source.txt'),
-                           output_file=OUT_FILE_DEFAULT)
+            BTB.run({"targets": Path(TEST_DIR, 'input', 'target.txt'),
+                     "sources": Path(TEST_DIR, 'input', 'source.txt')},
+                    output_file=OUT_FILE_DEFAULT)
         with pytest.raises(ValueError):
             # No source
-            BTB.run(
-                           targets=Path(TEST_DIR, 'input', 'target.txt'),
-                           edges=Path(TEST_DIR, 'input', 'edges.txt'),
-                           output_file=OUT_FILE_DEFAULT)
+            BTB.run({"targets": Path(TEST_DIR, 'input', 'target.txt'),
+                     "edges": Path(TEST_DIR, 'input', 'edges.txt')},
+                    output_file=OUT_FILE_DEFAULT)
         with pytest.raises(ValueError):
             # No target
-            BTB.run(
-                           sources=Path(TEST_DIR, 'input', 'source.txt'),
-                           edges=Path(TEST_DIR, 'input', 'edges.txt'),
-                           output_file=OUT_FILE_DEFAULT)
+            BTB.run({"sources": Path(TEST_DIR, 'input', 'source.txt'),
+                     "edges": Path(TEST_DIR, 'input', 'edges.txt')},
+                    output_file=OUT_FILE_DEFAULT)
 
 
     """
@@ -48,30 +45,30 @@ def test_btb_missing(self):
     """
     def test_btb_file(self):
         with pytest.raises(ValueError):
-            BTB.run(sources=Path(TEST_DIR, 'input', 'unknown.txt'),
-                           targets=Path(TEST_DIR, 'input', 'target.txt'),
-                           edges=Path(TEST_DIR, 'input', 'edges.txt'),
-                           output_file=OUT_FILE_DEFAULT)
+            BTB.run({"sources": Path(TEST_DIR, 'input', 'unknown.txt'),
+                     "targets": Path(TEST_DIR, 'input', 'target.txt'),
+                     "edges": Path(TEST_DIR, 'input', 'edges.txt')},
+                    output_file=OUT_FILE_DEFAULT)
 
     """
     Run the BowTieBuilder algorithm with bad input data
     """
     def test_format_error(self):
         with pytest.raises(IndexError):
-            BTB.run(sources=Path(TEST_DIR, 'input', 'btb-sources.txt'),
-                           targets=Path(TEST_DIR, 'input', 'btb-targets.txt'),
-                           edges=Path(TEST_DIR, 'input', 'bad-edges.txt'),
-                           output_file=OUT_FILE_DEFAULT)
+            BTB.run({"sources": Path(TEST_DIR, 'input', 'btb-sources.txt'),
+                     "targets": Path(TEST_DIR, 'input', 'btb-targets.txt'),
+                     "edges": Path(TEST_DIR, 'input', 'bad-edges.txt')},
+                    output_file=OUT_FILE_DEFAULT)
 
     """
     Run the BowTieBuilder algorithm on the example input files and check the output matches the expected output
     """
     def test_btb(self):
             OUT_FILE_DEFAULT.unlink(missing_ok=True)
-            BTB.run(edges=Path(TEST_DIR, 'input', 'btb-edges.txt'),
-                            sources=Path(TEST_DIR, 'input', 'btb-sources.txt'),
-                            targets=Path(TEST_DIR, 'input', 'btb-targets.txt'),
-                            output_file=OUT_FILE_DEFAULT)
+            BTB.run({"edges": Path(TEST_DIR, 'input', 'btb-edges.txt'),
+                     "sources": Path(TEST_DIR, 'input', 'btb-sources.txt'),
+                     "targets": Path(TEST_DIR, 'input', 'btb-targets.txt')},
+                    output_file=OUT_FILE_DEFAULT)
             assert OUT_FILE_DEFAULT.exists(), 'Output file was not written'
             expected_file = Path(TEST_DIR, 'expected', 'btb-output.txt')
 
@@ -89,10 +86,10 @@ def test_btb(self):
     """
     def test_disjoint(self):
         OUT_FILE_DEFAULT.unlink(missing_ok=True)
-        BTB.run(edges=Path(TEST_DIR, 'input', 'disjoint-edges.txt'),
-                           sources=Path(TEST_DIR, 'input', 'disjoint-sources.txt'),
-                           targets=Path(TEST_DIR, 'input', 'disjoint-targets.txt'),
-                           output_file=OUT_FILE_DEFAULT)
+        BTB.run({"edges": Path(TEST_DIR, 'input', 'disjoint-edges.txt'),
+                 "sources": Path(TEST_DIR, 'input', 'disjoint-sources.txt'),
+                 "targets": Path(TEST_DIR, 'input', 'disjoint-targets.txt')},
+                output_file=OUT_FILE_DEFAULT)
         assert OUT_FILE_DEFAULT.exists(), 'Output file was not written'
         expected_file = Path(TEST_DIR, 'expected', 'disjoint-output.txt')
 
@@ -110,10 +107,10 @@ def test_disjoint(self):
     """
     def test_disjoint2(self):
         OUT_FILE_DEFAULT.unlink(missing_ok=True)
-        BTB.run(edges=Path(TEST_DIR, 'input', 'disjoint2-edges.txt'),
-                           sources=Path(TEST_DIR, 'input', 'disjoint-sources.txt'),
-                           targets=Path(TEST_DIR, 'input', 'disjoint-targets.txt'),
-                           output_file=OUT_FILE_DEFAULT)
+        BTB.run({"edges": Path(TEST_DIR, 'input', 'disjoint2-edges.txt'),
+                 "sources": Path(TEST_DIR, 'input', 'disjoint-sources.txt'),
+                 "targets": Path(TEST_DIR, 'input', 'disjoint-targets.txt')},
+                output_file=OUT_FILE_DEFAULT)
         assert OUT_FILE_DEFAULT.exists(), 'Output file was not written'
         expected_file = Path(TEST_DIR, 'expected', 'disjoint-output.txt')
 
@@ -132,10 +129,10 @@ def test_disjoint2(self):
     def test_missing_file(self):
         with pytest.raises(ValueError):
             with pytest.raises(OSError):
-                BTB.run(edges=Path(TEST_DIR, 'input', 'missing.txt'),
-                            sources=Path(TEST_DIR, 'input', 'btb-sources.txt'),
-                            targets=Path(TEST_DIR, 'input', 'btb-targets.txt'),
-                            output_file=OUT_FILE_DEFAULT)
+                BTB.run({"edges": Path(TEST_DIR, 'input', 'missing.txt'),
+                         "sources": Path(TEST_DIR, 'input', 'btb-sources.txt'),
+                         "targets": Path(TEST_DIR, 'input', 'btb-targets.txt')},
+                        output_file=OUT_FILE_DEFAULT)
 
 
     """
@@ -143,10 +140,10 @@ def test_missing_file(self):
     """
     def test_source_to_source(self):
         OUT_FILE_DEFAULT.unlink(missing_ok=True)
-        BTB.run(edges=Path(TEST_DIR, 'input', 'source-to-source-edges.txt'),
-                           sources=Path(TEST_DIR, 'input', 'btb-sources.txt'),
-                           targets=Path(TEST_DIR, 'input', 'btb-targets.txt'),
-                           output_file=OUT_FILE_DEFAULT)
+        BTB.run({"edges": Path(TEST_DIR, 'input', 'source-to-source-edges.txt'),
+                 "sources": Path(TEST_DIR, 'input', 'btb-sources.txt'),
+                 "targets": Path(TEST_DIR, 'input', 'btb-targets.txt')},
+                output_file=OUT_FILE_DEFAULT)
         assert OUT_FILE_DEFAULT.exists(), 'Output file was not written'
         expected_file = Path(TEST_DIR, 'expected', 'source-to-source-output.txt')
 
@@ -164,10 +161,10 @@ def test_source_to_source(self):
     """
     def test_source_to_source2(self):
         OUT_FILE_DEFAULT.unlink(missing_ok=True)
-        BTB.run(edges=Path(TEST_DIR, 'input', 'source-to-source2-edges.txt'),
-                           sources=Path(TEST_DIR, 'input', 'btb-sources.txt'),
-                           targets=Path(TEST_DIR, 'input', 'btb-targets.txt'),
-                           output_file=OUT_FILE_DEFAULT)
+        BTB.run({"edges": Path(TEST_DIR, 'input', 'source-to-source2-edges.txt'),
+                 "sources": Path(TEST_DIR, 'input', 'btb-sources.txt'),
+                 "targets": Path(TEST_DIR, 'input', 'btb-targets.txt')},
+                output_file=OUT_FILE_DEFAULT)
         assert OUT_FILE_DEFAULT.exists(), 'Output file was not written'
         expected_file = Path(TEST_DIR, 'expected', 'source-to-source2-output.txt')
 
@@ -186,10 +183,10 @@ def test_source_to_source2(self):
 
     def test_source_to_source_disjoint(self):
         OUT_FILE_DEFAULT.unlink(missing_ok=True)
-        BTB.run(edges=Path(TEST_DIR, 'input', 'source-to-source-disjoint-edges.txt'),
-                           sources=Path(TEST_DIR, 'input', 'btb-sources.txt'),
-                           targets=Path(TEST_DIR, 'input', 'btb-targets.txt'),
-                           output_file=OUT_FILE_DEFAULT)
+        BTB.run({"edges": Path(TEST_DIR, 'input', 'source-to-source-disjoint-edges.txt'),
+                 "sources": Path(TEST_DIR, 'input', 'btb-sources.txt'),
+                 "targets": Path(TEST_DIR, 'input', 'btb-targets.txt')},
+                 output_file=OUT_FILE_DEFAULT)
         assert OUT_FILE_DEFAULT.exists(), 'Output file was not written'
         expected_file = Path(TEST_DIR, 'expected', 'source-to-source-disjoint-output.txt')
 
@@ -208,10 +205,10 @@ def test_source_to_source_disjoint(self):
 
     def test_bidirectional(self):
         OUT_FILE_DEFAULT.unlink(missing_ok=True)
-        BTB.run(edges=Path(TEST_DIR, 'input', 'bidirectional-edges.txt'),
-                           sources=Path(TEST_DIR, 'input', 'btb-sources.txt'),
-                           targets=Path(TEST_DIR, 'input', 'btb-targets.txt'),
-                           output_file=OUT_FILE_DEFAULT)
+        BTB.run({"edges": Path(TEST_DIR, 'input', 'bidirectional-edges.txt'),
+                 "sources": Path(TEST_DIR, 'input', 'btb-sources.txt'),
+                 "targets": Path(TEST_DIR, 'input', 'btb-targets.txt')},
+                output_file=OUT_FILE_DEFAULT)
         assert OUT_FILE_DEFAULT.exists(), 'Output file was not written'
         expected_file = Path(TEST_DIR, 'expected', 'bidirectional-output.txt')
 
@@ -230,10 +227,10 @@ def test_bidirectional(self):
 
     def test_target_to_source(self):
         OUT_FILE_DEFAULT.unlink(missing_ok=True)
-        BTB.run(edges=Path(TEST_DIR, 'input', 'target-to-source-edges.txt'),
-                           sources=Path(TEST_DIR, 'input', 'btb-sources.txt'),
-                           targets=Path(TEST_DIR, 'input', 'btb-targets.txt'),
-                           output_file=OUT_FILE_DEFAULT)
+        BTB.run({"edges": Path(TEST_DIR, 'input', 'target-to-source-edges.txt'),
+                 "sources": Path(TEST_DIR, 'input', 'btb-sources.txt'),
+                 "targets": Path(TEST_DIR, 'input', 'btb-targets.txt')},
+                output_file=OUT_FILE_DEFAULT)
         assert OUT_FILE_DEFAULT.exists(), 'Output file was not written'
         expected_file = Path(TEST_DIR, 'expected', 'empty-output.txt')
 
@@ -252,10 +249,10 @@ def test_target_to_source(self):
 
     def test_loop(self):
         OUT_FILE_DEFAULT.unlink(missing_ok=True)
-        BTB.run(edges=Path(TEST_DIR, 'input', 'loop-edges.txt'),
-                           sources=Path(TEST_DIR, 'input', 'btb-sources.txt'),
-                           targets=Path(TEST_DIR, 'input', 'btb-targets.txt'),
-                           output_file=OUT_FILE_DEFAULT)
+        BTB.run({"edges": Path(TEST_DIR, 'input', 'loop-edges.txt'),
+                 "sources": Path(TEST_DIR, 'input', 'btb-sources.txt'),
+                 "targets": Path(TEST_DIR, 'input', 'btb-targets.txt')},
+                output_file=OUT_FILE_DEFAULT)
         assert OUT_FILE_DEFAULT.exists(), 'Output file was not written'
         expected_file = Path(TEST_DIR, 'expected', 'loop-output.txt')
 
@@ -274,10 +271,10 @@ def test_loop(self):
 
     def test_weighted(self):
         OUT_FILE_DEFAULT.unlink(missing_ok=True)
-        BTB.run(edges=Path(TEST_DIR, 'input', 'weighted-edges.txt'),
-                           sources=Path(TEST_DIR, 'input', 'btb-sources.txt'),
-                           targets=Path(TEST_DIR, 'input', 'btb-targets.txt'),
-                           output_file=OUT_FILE_DEFAULT)
+        BTB.run({"edges": Path(TEST_DIR, 'input', 'weighted-edges.txt'),
+                 "sources": Path(TEST_DIR, 'input', 'btb-sources.txt'),
+                 "targets": Path(TEST_DIR, 'input', 'btb-targets.txt')},
+                output_file=OUT_FILE_DEFAULT)
         assert OUT_FILE_DEFAULT.exists(), 'Output file was not written'
         expected_file = Path(TEST_DIR, 'expected', 'weighted-output.txt')
 
@@ -292,10 +289,10 @@ def test_weighted(self):
 
     def test_weight_one(self):
         OUT_FILE_DEFAULT.unlink(missing_ok=True)
-        BTB.run(edges=Path(TEST_DIR, 'input', 'weight-one-edges.txt'),
-                           sources=Path(TEST_DIR, 'input', 'btb-sources.txt'),
-                           targets=Path(TEST_DIR, 'input', 'btb-targets.txt'),
-                           output_file=OUT_FILE_DEFAULT)
+        BTB.run({"edges": Path(TEST_DIR, 'input', 'weight-one-edges.txt'),
+                 "sources": Path(TEST_DIR, 'input', 'btb-sources.txt'),
+                 "targets": Path(TEST_DIR, 'input', 'btb-targets.txt')},
+                output_file=OUT_FILE_DEFAULT)
         assert OUT_FILE_DEFAULT.exists(), 'Output file was not written'
         expected_file = Path(TEST_DIR, 'expected', 'weighted-output.txt')
 
diff --git a/test/DOMINO/test_domino.py b/test/DOMINO/test_domino.py
index 4323ea4c9..62563bdc3 100644
--- a/test/DOMINO/test_domino.py
+++ b/test/DOMINO/test_domino.py
@@ -5,7 +5,7 @@
 import pytest
 
 import spras.config.config as config
-from spras.domino import DOMINO, post_domino_id_transform, pre_domino_id_transform
+from spras.domino import DOMINO, DominoParams, post_domino_id_transform, pre_domino_id_transform
 
 config.init_from_file("config/config.yaml")
 
@@ -28,10 +28,9 @@ def test_domino_required(self):
         # Only include required arguments
         out_path = Path(OUT_FILE_DEFAULT)
         out_path.unlink(missing_ok=True)
-        DOMINO.run(
-            network=TEST_DIR+'input/domino-network.txt',
-            active_genes=TEST_DIR+'input/domino-active-genes.txt',
-            output_file=OUT_FILE_DEFAULT)
+        DOMINO.run({"network": TEST_DIR+'input/domino-network.txt',
+                    "active_genes": TEST_DIR+'input/domino-active-genes.txt'},
+                   output_file=OUT_FILE_DEFAULT)
         # output_file should be empty
         assert out_path.exists()
 
@@ -39,12 +38,10 @@ def test_domino_optional(self):
         # Include optional arguments
         out_path = Path(OUT_FILE_OPTIONAL)
         out_path.unlink(missing_ok=True)
-        DOMINO.run(
-            network=TEST_DIR+'input/domino-network.txt',
-            active_genes=TEST_DIR+'input/domino-active-genes.txt',
-            output_file=OUT_FILE_OPTIONAL,
-            slice_threshold=0.4,
-            module_threshold=0.06)
+        DOMINO.run({"network": TEST_DIR+'input/domino-network.txt',
+                    "active_genes": TEST_DIR+'input/domino-active-genes.txt'},
+                   output_file=OUT_FILE_OPTIONAL,
+                   args=DominoParams(slice_threshold=0.4, module_threshold=0.06))
         # output_file should be empty
         assert out_path.exists()
 
@@ -52,17 +49,15 @@ def test_domino_missing_active_genes(self):
         # Test the expected error is raised when active_genes argument is missing
         with pytest.raises(ValueError):
             # No active_genes
-            DOMINO.run(
-                network=TEST_DIR+'input/domino-network.txt',
-                output_file=OUT_FILE_DEFAULT)
+            DOMINO.run({"network": TEST_DIR+'input/domino-network.txt'},
+                       output_file=OUT_FILE_DEFAULT)
 
     def test_domino_missing_network(self):
         # Test the expected error is raised when network argument is missing
         with pytest.raises(ValueError):
             # No network
-            DOMINO.run(
-                active_genes=TEST_DIR+'input/domino-active-genes.txt',
-                output_file=OUT_FILE_DEFAULT)
+            DOMINO.run({"active_genes": TEST_DIR+'input/domino-active-genes.txt'},
+                       output_file=OUT_FILE_DEFAULT)
 
     # Only run Singularity test if the binary is available on the system
     # spython is only available on Unix, but do not explicitly skip non-Unix platforms
@@ -71,11 +66,10 @@ def test_domino_singularity(self):
         out_path = Path(OUT_FILE_DEFAULT)
         out_path.unlink(missing_ok=True)
         # Only include required arguments and run with Singularity
-        DOMINO.run(
-            network=TEST_DIR+'input/domino-network.txt',
-            active_genes=TEST_DIR+'input/domino-active-genes.txt',
-            output_file=OUT_FILE_DEFAULT,
-            container_framework="singularity")
+        DOMINO.run({"network": TEST_DIR+'input/domino-network.txt',
+                    "active_genes": TEST_DIR+'input/domino-active-genes.txt'},
+                   output_file=OUT_FILE_DEFAULT,
+                   container_framework="singularity")
         assert out_path.exists()
 
     def test_pre_id_transform(self):
diff --git a/test/MEO/test_meo.py b/test/MEO/test_meo.py
index 32958be20..051744ed7 100644
--- a/test/MEO/test_meo.py
+++ b/test/MEO/test_meo.py
@@ -4,7 +4,7 @@
 import pytest
 
 import spras.config.config as config
-from spras.meo import MEO, write_properties
+from spras.meo import MEO, MEOParams, write_properties
 
 config.init_from_file("config/config.yaml")
 
@@ -20,9 +20,9 @@ def test_meo_required(self):
         out_path = Path(OUT_FILE)
         out_path.unlink(missing_ok=True)
         # Only include required arguments
-        MEO.run(edges=TEST_DIR + 'input/meo-edges.txt',
-                sources=TEST_DIR + 'input/meo-sources.txt',
-                targets=TEST_DIR + 'input/meo-targets.txt',
+        MEO.run({"edges": TEST_DIR + 'input/meo-edges.txt',
+                 "sources": TEST_DIR + 'input/meo-sources.txt',
+                 "targets": TEST_DIR + 'input/meo-targets.txt'},
                 output_file=OUT_FILE)
         assert out_path.exists()
 
@@ -30,21 +30,19 @@ def test_meo_all_optional(self):
         out_path = Path(OUT_FILE)
         out_path.unlink(missing_ok=True)
         # Include all optional arguments
-        MEO.run(edges=TEST_DIR + 'input/meo-edges.txt',
-                sources=TEST_DIR + 'input/meo-sources.txt',
-                targets=TEST_DIR + 'input/meo-targets.txt',
-                output_file=OUT_FILE,
-                max_path_length=3,
-                local_search='No',
-                rand_restarts=10)
+        MEO.run({"edges": TEST_DIR + 'input/meo-edges.txt',
+                 "sources": TEST_DIR + 'input/meo-sources.txt',
+                 "targets": TEST_DIR + 'input/meo-targets.txt'},
+                args=MEOParams(max_path_length=3, local_search=False, rand_restarts=10),
+                output_file=OUT_FILE)
         assert out_path.exists()
 
     def test_meo_missing(self):
         # Test the expected error is raised when required arguments are missing
         with pytest.raises(ValueError):
             # No edges
-            MEO.run(sources=TEST_DIR + 'input/meo-sources.txt',
-                    targets=TEST_DIR + 'input/meo-targets.txt',
+            MEO.run({"sources": TEST_DIR + 'input/meo-sources.txt',
+                     "targets": TEST_DIR + 'input/meo-targets.txt'},
                     output_file=OUT_FILE)
 
         with pytest.raises(ValueError):
@@ -62,9 +60,9 @@ def test_meo_singularity(self):
         out_path = Path(OUT_FILE)
         out_path.unlink(missing_ok=True)
         # Only include required arguments and run with Singularity
-        MEO.run(edges=TEST_DIR + 'input/meo-edges.txt',
-                sources=TEST_DIR + 'input/meo-sources.txt',
-                targets=TEST_DIR + 'input/meo-targets.txt',
+        MEO.run({"edges": TEST_DIR + 'input/meo-edges.txt',
+                 "sources": TEST_DIR + 'input/meo-sources.txt',
+                 "targets": TEST_DIR + 'input/meo-targets.txt'},
                 output_file=OUT_FILE,
                 container_framework="singularity")
         assert out_path.exists()

From 32d4b5cbce1e46a0afe5744b778350ab2b7cbae8 Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Mon, 14 Jul 2025 20:03:27 +0000
Subject: [PATCH 39/60] refactor: moving more tests

---
 spras/omicsintegrator1.py         |  34 +++++-----
 spras/omicsintegrator2.py         |  16 +++--
 test/OmicsIntegrator1/test_oi1.py | 109 +++++++++++++++---------------
 test/OmicsIntegrator2/test_oi2.py |  43 +++++-------
 4 files changed, 97 insertions(+), 105 deletions(-)

diff --git a/spras/omicsintegrator1.py b/spras/omicsintegrator1.py
index 3361f5d2a..45465ecd6 100644
--- a/spras/omicsintegrator1.py
+++ b/spras/omicsintegrator1.py
@@ -7,7 +7,7 @@
 from spras.prm import PRM
 from spras.util import add_rank_column, duplicate_edges, raw_pathway_df
 
-__all__ = ['OmicsIntegrator1', 'write_conf']
+__all__ = ['OmicsIntegrator1', 'OmicsIntegrator1Params', 'write_conf']
 
 
 # TODO decide on default number of processes and threads
@@ -38,41 +38,41 @@ def write_conf(filename=Path('config.txt'), w=None, b=None, d=None, mu=None, noi
         f.write('threads = 1\n')
 
 class OmicsIntegrator1Params(BaseModel):
-    dummy_mode: Optional[str]
-    mu_squared: Optional[str]
-    exclude_terms: Optional[str]
+    dummy_mode: Optional[str] = None
+    mu_squared: Optional[bool] = None
+    exclude_terms: Optional[bool] = None
 
-    noisy_edges: Optional[str]
+    noisy_edges: Optional[int] = None
     "How many times you would like to add noise to the given edge values and re-run the algorithm."
 
-    shuffled_prizes: Optional[int]
+    shuffled_prizes: Optional[int] = None
     "shuffled_prizes: How many times the algorithm should shuffle the prizes and re-run"
 
-    random_terminals: Optional[int]
+    random_terminals: Optional[int] = None
     "How many times to apply the given prizes to random nodes in the interactome"
 
-    seed: Optional[str]
+    seed: Optional[int] = None
     "the randomness seed to use"
 
-    w: Optional[float]
+    w: int
     "the number of trees"
 
-    b: Optional[str]
+    b: float
     "the trade-off between including more terminals and using less reliable edges"
 
-    d: Optional[str]
+    d: int
     "controls the maximum path-length from v0 to terminal nodes"
 
-    mu: Optional[float]
+    mu: Optional[float] = None
     "controls the degree-based negative prizes (defualt 0.0)"
 
-    noise: Optional[str]
+    noise: Optional[float] = None
     "Standard Deviation of the gaussian noise added to edges in Noisy Edges Randomizations"
 
-    g: Optional[str]
+    g: Optional[float] = None
     "(Gamma) multiplicative edge penalty from degree of endpoints"
 
-    r: Optional[str]
+    r: Optional[float] = None
     "msgsteiner parameter that adds random noise to edges, which is rarely needed because the Forest --noisyEdges option is recommended instead (default 0)"
 
     model_config = ConfigDict(use_attribute_docstrings=True)
@@ -142,7 +142,7 @@ def generate_inputs(data, filename_map):
     # TODO document required arguments
     @staticmethod
     def run(inputs, output_file, args, container_framework="docker"):
-        if inputs["edges"] is None or inputs["prizes"] is None or output_file is None or w is None or b is None or d is None:
+        if inputs["edges"] is None or inputs["prizes"] is None or output_file is None:
             raise ValueError('Required Omics Integrator 1 arguments are missing')
 
         work_dir = '/spras'
@@ -195,7 +195,7 @@ def run(inputs, output_file, args, container_framework="docker"):
         if args.dummy_mode is not None and args.dummy_mode:
             # for custom dummy modes, add the file
             if args.dummy_mode == 'file':
-                command.extend(['--dummyMode', inputs["dummy_file"]])
+                command.extend(['--dummyMode', str(inputs["dummy_file"])])
             # else pass in the dummy_mode and let oi1 handle it
             else:
                 command.extend(['--dummyMode', args.dummy_mode])
diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py
index 20351833e..944bf1bf7 100644
--- a/spras/omicsintegrator2.py
+++ b/spras/omicsintegrator2.py
@@ -10,7 +10,7 @@
 from spras.prm import PRM
 from spras.util import add_rank_column, duplicate_edges
 
-__all__ = ['OmicsIntegrator2']
+__all__ = ['OmicsIntegrator2', 'OmicsIntegrator2Params']
 
 class OmicsIntegrator2Params(BaseModel):
     w: float = 6
@@ -22,16 +22,16 @@ class OmicsIntegrator2Params(BaseModel):
     g: float = 20
     "Gamma: multiplicative edge penalty from degree of endpoints"
 
-    noise: Optional[str]
+    noise: Optional[float] = None
     "Standard Deviation of the gaussian noise added to edges in Noisy Edges Randomizations."
 
-    noisy_edges: Optional[int]
+    noisy_edges: Optional[int] = None
     "An integer specifying how many times to add noise to the given edge values and re-run."
 
-    random_terminals: Optional[str]
+    random_terminals: Optional[int] = None
     "An integer specifying how many times to apply your given prizes to random nodes in the interactome and re-run"
 
-    dummy_mode: Optional[str]
+    dummy_mode: Optional[str] = None
     """
     Tells the program which nodes in the interactome to connect the dummy node to. (default: terminals)
         "terminals" = connect to all terminals
@@ -39,9 +39,11 @@ class OmicsIntegrator2Params(BaseModel):
         "all" = connect to all nodes in the interactome.
     """
 
-    seed: Optional[str]
+    seed: Optional[int] = None
     "The random seed to use for this run."
 
+    model_config = ConfigDict(use_attribute_docstrings=True)
+
 """
 Omics Integrator 2 will construct a fully undirected graph from the provided input file
 - in the algorithm, it uses nx.Graph() objects, which are undirected
@@ -101,7 +103,7 @@ def generate_inputs(data: Dataset, filename_map):
     # TODO add reasonable default values
     # TODO document required arguments
     @staticmethod
-    def run(inputs, output_file, args, container_framework="docker"):
+    def run(inputs, output_file, args=OmicsIntegrator2Params(), container_framework="docker"):
         """
         Run Omics Integrator 2 in the Docker image with the provided parameters.
         Only the .tsv output file is retained and then renamed.
diff --git a/test/OmicsIntegrator1/test_oi1.py b/test/OmicsIntegrator1/test_oi1.py
index a484c0af3..fad4627e0 100644
--- a/test/OmicsIntegrator1/test_oi1.py
+++ b/test/OmicsIntegrator1/test_oi1.py
@@ -4,7 +4,7 @@
 import pytest
 
 import spras.config.config as config
-from spras.omicsintegrator1 import OmicsIntegrator1, write_conf
+from spras.omicsintegrator1 import OmicsIntegrator1, OmicsIntegrator1Params, write_conf
 
 config.init_from_file("config/config.yaml")
 
@@ -20,79 +20,74 @@ def test_oi1_required(self):
         out_path = Path(OUT_FILE)
         out_path.unlink(missing_ok=True)
         # Only include required arguments
-        OmicsIntegrator1.run(edges=TEST_DIR+'input/oi1-edges.txt',
-                             prizes=TEST_DIR+'input/oi1-prizes.txt',
+        OmicsIntegrator1.run({"edges": TEST_DIR+'input/oi1-edges.txt',
+                              "prizes": TEST_DIR+'input/oi1-prizes.txt'},
                              output_file=OUT_FILE,
-                             w=5,
-                             b=1,
-                             d=10)
+                             args=OmicsIntegrator1Params(w=5, b=1, d=10))
         assert out_path.exists()
 
     def test_oi1_some_optional(self):
         out_path = Path(OUT_FILE)
         out_path.unlink(missing_ok=True)
         # Include optional argument
-        OmicsIntegrator1.run(edges=TEST_DIR+'input/oi1-edges.txt',
-                             prizes=TEST_DIR+'input/oi1-prizes.txt',
+        OmicsIntegrator1.run({"edges": TEST_DIR+'input/oi1-edges.txt',
+                              "prizes": TEST_DIR+'input/oi1-prizes.txt'},
                              output_file=OUT_FILE,
-                             w=5,
-                             b=1,
-                             d=10,
-                             noise=0.333,
-                             g=0.001,
-                             r=0)
+                             args=OmicsIntegrator1Params(w=5, b=1, d=10, noise=0.333, g=0.001, r=0))
         assert out_path.exists()
 
     def test_oi1_all_optional(self):
         out_path = Path(OUT_FILE)
         out_path.unlink(missing_ok=True)
         # Include all optional arguments
-        OmicsIntegrator1.run(edges=TEST_DIR+'input/oi1-edges.txt',
-                             prizes=TEST_DIR+'input/oi1-prizes.txt',
-                             dummy_nodes=None,
-                             dummy_mode='terminals',
-                             mu_squared=True,
-                             exclude_terms=True,
+        OmicsIntegrator1.run({"edges": TEST_DIR+'input/oi1-edges.txt',
+                              "prizes": TEST_DIR+'input/oi1-prizes.txt'},
                              output_file=OUT_FILE,
-                             noisy_edges=0,
-                             shuffled_prizes=0,
-                             random_terminals=0,
-                             seed=1,
-                             w=5,
-                             b=1,
-                             d=10,
-                             mu=0,
-                             noise=0.333,
-                             g=0.001,
-                             r=0)
+                             args=OmicsIntegrator1Params(
+                                 dummy_mode='terminals',
+                                 mu_squared=True,
+                                 exclude_terms=True,
+                                 noisy_edges=0,
+                                 shuffled_prizes=0,
+                                 random_terminals=0,
+                                 seed=1,
+                                 w=5,
+                                 b=1,
+                                 d=10,
+                                 mu=0,
+                                 noise=0.333,
+                                 g=0.001,
+                                 r=0))
         assert out_path.exists()
 
     def test_oi1_dummy_file(self):
         out_path = Path(OUT_FILE)
         out_path.unlink(missing_ok=True)
         # Include optional argument
-        OmicsIntegrator1.run(edges=TEST_DIR+'input/oi1-edges.txt',
-                             prizes=TEST_DIR+'input/oi1-prizes.txt',
-                             dummy_nodes=TEST_DIR + 'input/oi1-dummy.txt',
-                             dummy_mode='file',
+        OmicsIntegrator1.run({"edges": TEST_DIR+'input/oi1-edges.txt',
+                              "prizes": TEST_DIR+'input/oi1-prizes.txt',
+                              "dummy_nodes": TEST_DIR + 'input/oi1-dummy.txt'},
                              output_file=OUT_FILE,
-                             w=5,
-                             b=1,
-                             d=10,
-                             noise=0.333,
-                             g=0.001,
-                             r=0)
+                             args=OmicsIntegrator1Params(
+                                dummy_mode='file',
+                                w=5,
+                                b=1,
+                                d=10,
+                                noise=0.333,
+                                g=0.001,
+                                r=0))
         assert out_path.exists()
 
     def test_oi1_missing(self):
         # Test the expected error is raised when required arguments are missing
         with pytest.raises(ValueError):
             # No edges
-            OmicsIntegrator1.run(prizes=TEST_DIR + 'input/oi1-prizes.txt',
+            OmicsIntegrator1.run({"prizes": TEST_DIR + 'input/oi1-prizes.txt'},
                                  output_file=TEST_DIR+'output/test_optimalForest.sif',
-                                 w=5,
-                                 b=1,
-                                 d=10)
+                                 args=OmicsIntegrator1Params(
+                                    w=5,
+                                    b=1,
+                                    d=10))
         with pytest.raises(ValueError):
             # No w
             write_conf(Path('.'),
@@ -103,13 +98,14 @@ def test_oi1_missing_dummy(self):
         # Test the expected error is raised when the dummy_nodes file is missing and the dummy_mode is 'file'
         with pytest.raises(ValueError):
             # No edges
-            OmicsIntegrator1.run(edges=TEST_DIR+'input/oi1-edges.txt',
-                                 prizes=TEST_DIR + 'input/oi1-prizes.txt',
+            OmicsIntegrator1.run({"edges": TEST_DIR+'input/oi1-edges.txt',
+                                  "prizes": TEST_DIR + 'input/oi1-prizes.txt'},
                                  output_file=TEST_DIR+'output/test_optimalForest.sif',
-                                 w=5,
-                                 b=1,
-                                 d=10,
-                                 dummy_mode='file')
+                                 args=OmicsIntegrator1Params(
+                                    w=5,
+                                    b=1,
+                                    d=10,
+                                    dummy_mode='file'))
 
     # Only run Singularity test if the binary is available on the system
     # spython is only available on Unix, but do not explicitly skip non-Unix platforms
@@ -118,11 +114,12 @@ def test_oi1_singularity(self):
         out_path = Path(OUT_FILE)
         out_path.unlink(missing_ok=True)
         # Only include required arguments and run with Singularity
-        OmicsIntegrator1.run(edges=TEST_DIR + 'input/oi1-edges.txt',
-                             prizes=TEST_DIR + 'input/oi1-prizes.txt',
+        OmicsIntegrator1.run({"edges": TEST_DIR + 'input/oi1-edges.txt',
+                              "prizes": TEST_DIR + 'input/oi1-prizes.txt'},
                              output_file=OUT_FILE,
-                             w=5,
-                             b=1,
-                             d=10,
+                             args=OmicsIntegrator1Params(
+                                w=5,
+                                b=1,
+                                d=10),
                              container_framework="singularity")
         assert out_path.exists()
diff --git a/test/OmicsIntegrator2/test_oi2.py b/test/OmicsIntegrator2/test_oi2.py
index 13f7f30b6..0239d5e5f 100644
--- a/test/OmicsIntegrator2/test_oi2.py
+++ b/test/OmicsIntegrator2/test_oi2.py
@@ -4,7 +4,7 @@
 import pytest
 
 import spras.config.config as config
-from spras.omicsintegrator2 import OmicsIntegrator2
+from spras.omicsintegrator2 import OmicsIntegrator2, OmicsIntegrator2Params
 
 config.init_from_file("config/config.yaml")
 
@@ -21,51 +21,44 @@ class TestOmicsIntegrator2:
     def test_oi2_required(self):
         # Only include required arguments
         OUT_FILE.unlink(missing_ok=True)
-        OmicsIntegrator2.run(edges=EDGE_FILE,
-                             prizes=PRIZE_FILE,
+        OmicsIntegrator2.run({"edges": EDGE_FILE,
+                              "prizes": PRIZE_FILE},
                              output_file=OUT_FILE)
         assert OUT_FILE.exists()
 
     def test_oi2_some_optional(self):
         # Include optional argument
         OUT_FILE.unlink(missing_ok=True)
-        OmicsIntegrator2.run(edges=EDGE_FILE,
-                             prizes=PRIZE_FILE,
+        OmicsIntegrator2.run({"edges": EDGE_FILE,
+                              "prizes": PRIZE_FILE},
                              output_file=OUT_FILE,
-                             g=0)
+                             args=OmicsIntegrator2Params(g=0))
         assert OUT_FILE.exists()
 
     def test_oi2_all_optional(self):
         # Include all optional arguments
         OUT_FILE.unlink(missing_ok=True)
-        OmicsIntegrator2.run(edges=EDGE_FILE,
-                             prizes=PRIZE_FILE,
+        OmicsIntegrator2.run({"edges": EDGE_FILE,
+                              "prizes": PRIZE_FILE},
                              output_file=OUT_FILE,
-                             w=5,
-                             b=1,
-                             g=3,
-                             noise=0.1,
-                             noisy_edges=0,
-                             random_terminals=0,
-                             dummy_mode='terminals',
-                             seed=2)
+                             args=OmicsIntegrator2Params(w=5,
+                                                         b=1,
+                                                         g=3,
+                                                         noise=0.1,
+                                                         noisy_edges=0,
+                                                         random_terminals=0,
+                                                         dummy_mode='terminals',
+                                                         seed=2))
         assert OUT_FILE.exists()
 
-    def test_oi2_missing(self):
-        # Test the expected error is raised when required arguments are missing
-        with pytest.raises(ValueError):
-            # No output_file
-            OmicsIntegrator2.run(edges=EDGE_FILE,
-                                 prizes=PRIZE_FILE)
-
     # Only run Singularity test if the binary is available on the system
     # spython is only available on Unix, but do not explicitly skip non-Unix platforms
     @pytest.mark.skipif(not shutil.which('singularity'), reason='Singularity not found on system')
     def test_oi2_singularity(self):
         # Only include required arguments
         OUT_FILE.unlink(missing_ok=True)
-        OmicsIntegrator2.run(edges=EDGE_FILE,
-                             prizes=PRIZE_FILE,
+        OmicsIntegrator2.run({"edges": EDGE_FILE,
+                              "prizes": PRIZE_FILE},
                              output_file=OUT_FILE,
                              container_framework="singularity")
         assert OUT_FILE.exists()

From 9b539e99fe7f4f53549e4e295fe0d3bf6bce39ed Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Mon, 14 Jul 2025 20:28:50 +0000
Subject: [PATCH 40/60] fix: correct params

---
 spras/config/util.py               |  3 +-
 spras/domino.py                    |  2 +-
 spras/meo.py                       |  5 ++-
 spras/mincostflow.py               |  5 ++-
 spras/omicsintegrator1.py          |  3 +-
 spras/omicsintegrator2.py          |  2 +-
 spras/pathlinker.py                |  9 +++--
 spras/prm.py                       |  5 ++-
 spras/rwr.py                       | 11 +++--
 spras/strwr.py                     | 12 ++++--
 test/DOMINO/test_domino.py         |  7 +++-
 test/MinCostFlow/test_mcf.py       | 65 ++++++++++++++----------------
 test/OmicsIntegrator2/test_oi2.py  |  9 ++---
 test/PathLinker/test_pathlinker.py | 36 +++++++----------
 test/RWR/test_RWR.py               | 26 ++++++------
 test/ST_RWR/test_STRWR.py          | 34 ++++++++--------
 16 files changed, 120 insertions(+), 114 deletions(-)

diff --git a/spras/config/util.py b/spras/config/util.py
index c23374a50..32f19076f 100644
--- a/spras/config/util.py
+++ b/spras/config/util.py
@@ -1,7 +1,8 @@
 from enum import Enum
-from pydantic import BaseModel, ConfigDict
 from typing import Any
 
+from pydantic import BaseModel, ConfigDict
+
 
 # https://stackoverflow.com/a/76883868/7589775
 class CaseInsensitiveEnum(str, Enum):
diff --git a/spras/domino.py b/spras/domino.py
index 187e53836..86f3c0563 100644
--- a/spras/domino.py
+++ b/spras/domino.py
@@ -1,9 +1,9 @@
 import json
 from pathlib import Path
+from typing import Optional
 
 import pandas as pd
 from pydantic import BaseModel, ConfigDict
-from typing import Optional
 
 from spras.containers import prepare_volume, run_container_and_log
 from spras.interactome import (
diff --git a/spras/meo.py b/spras/meo.py
index 0451cb4c0..30e81d87e 100644
--- a/spras/meo.py
+++ b/spras/meo.py
@@ -1,8 +1,9 @@
 import os
 from pathlib import Path
-from pydantic import BaseModel, ConfigDict
 from typing import Optional
 
+from pydantic import BaseModel, ConfigDict
+
 from spras.containers import prepare_volume, run_container_and_log
 from spras.interactome import (
     add_directionality_constant,
@@ -78,7 +79,7 @@ class MEOParams(BaseModel):
     See "Improving approximations with local search" in the associated paper
     for more information.
     """
-    
+
     rand_restarts: Optional[int] = None
     "The number of random restarts to do."
 
diff --git a/spras/mincostflow.py b/spras/mincostflow.py
index 986c1c8eb..eab80c631 100644
--- a/spras/mincostflow.py
+++ b/spras/mincostflow.py
@@ -1,7 +1,8 @@
 from pathlib import Path
-from pydantic import BaseModel, ConfigDict
 from typing import Optional
 
+from pydantic import BaseModel, ConfigDict
+
 from spras.containers import prepare_volume, run_container_and_log
 from spras.interactome import (
     convert_undirected_to_directed,
@@ -10,7 +11,7 @@
 from spras.prm import PRM
 from spras.util import add_rank_column, duplicate_edges, raw_pathway_df
 
-__all__ = ['MinCostFlow']
+__all__ = ['MinCostFlow', 'MinCostFlowParams']
 
 class MinCostFlowParams(BaseModel):
     flow: Optional[float] = None
diff --git a/spras/omicsintegrator1.py b/spras/omicsintegrator1.py
index 45465ecd6..d8226f735 100644
--- a/spras/omicsintegrator1.py
+++ b/spras/omicsintegrator1.py
@@ -1,7 +1,8 @@
 from pathlib import Path
-from pydantic import BaseModel, ConfigDict
 from typing import Optional
 
+from pydantic import BaseModel, ConfigDict
+
 from spras.containers import prepare_volume, run_container_and_log
 from spras.interactome import reinsert_direction_col_mixed
 from spras.prm import PRM
diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py
index 944bf1bf7..41aec9ee1 100644
--- a/spras/omicsintegrator2.py
+++ b/spras/omicsintegrator2.py
@@ -1,8 +1,8 @@
 from pathlib import Path
-from pydantic import BaseModel, ConfigDict
 from typing import Optional
 
 import pandas as pd
+from pydantic import BaseModel, ConfigDict
 
 from spras.containers import prepare_volume, run_container_and_log
 from spras.dataset import Dataset
diff --git a/spras/pathlinker.py b/spras/pathlinker.py
index 3c78ffb84..167403cef 100644
--- a/spras/pathlinker.py
+++ b/spras/pathlinker.py
@@ -1,8 +1,9 @@
 import warnings
 from pathlib import Path
-from pydantic import BaseModel, ConfigDict
 from typing import Optional
 
+from pydantic import BaseModel, ConfigDict
+
 from spras.containers import prepare_volume, run_container_and_log
 from spras.dataset import Dataset
 from spras.interactome import (
@@ -12,10 +13,10 @@
 from spras.prm import PRM
 from spras.util import duplicate_edges, raw_pathway_df
 
-__all__ = ['PathLinker']
+__all__ = ['PathLinker', 'PathLinkerParams']
 
 class PathLinkerParams(BaseModel):
-    k: Optional[int]
+    k: Optional[int] = None
     "path length (optional)"
 
     model_config = ConfigDict(use_attribute_docstrings=True)
@@ -75,7 +76,7 @@ def generate_inputs(data, filename_map):
                      header=["#Interactor1","Interactor2","Weight"])
 
     @staticmethod
-    def run(inputs, output_file, args, container_framework="docker"):
+    def run(inputs, output_file, args=PathLinkerParams(), container_framework="docker"):
         """
         Run PathLinker with Docker
         @param nodetypes:  input node types with sources and targets (required)
diff --git a/spras/prm.py b/spras/prm.py
index 1692f11f6..73c94454a 100644
--- a/spras/prm.py
+++ b/spras/prm.py
@@ -1,7 +1,8 @@
+import os
 from abc import ABC, abstractmethod
+from typing import Any, Generic, TypeVar, cast
+
 from pydantic import BaseModel
-from typing import Any, cast, TypeVar, Generic
-import os
 
 from spras.dataset import Dataset
 
diff --git a/spras/rwr.py b/spras/rwr.py
index 12df71e01..ba78589ec 100644
--- a/spras/rwr.py
+++ b/spras/rwr.py
@@ -1,8 +1,8 @@
 from pathlib import Path
-from pydantic import BaseModel, ConfigDict
 from typing import Optional
 
 import pandas as pd
+from pydantic import BaseModel, ConfigDict
 
 from spras.containers import prepare_volume, run_container
 from spras.dataset import Dataset
@@ -10,11 +10,14 @@
 from spras.prm import PRM
 from spras.util import add_rank_column, duplicate_edges, raw_pathway_df
 
-__all__ = ['RWR']
+__all__ = ['RWR', 'RWRParams']
 
 class RWRParams(BaseModel):
-    threshold: Optional[int]
-    alpha: Optional[float]
+    threshold: int
+    "The number of nodes to return"
+
+    alpha: Optional[float] = None
+    "The chance of a restart during the random walk"
 
     model_config = ConfigDict(use_attribute_docstrings=True)
 
diff --git a/spras/strwr.py b/spras/strwr.py
index c603f9196..37590e7c6 100644
--- a/spras/strwr.py
+++ b/spras/strwr.py
@@ -1,7 +1,8 @@
 from pathlib import Path
+from typing import Optional
+
 import pandas as pd
 from pydantic import BaseModel, ConfigDict
-from typing import Optional
 
 from spras.containers import prepare_volume, run_container
 from spras.dataset import Dataset
@@ -9,11 +10,14 @@
 from spras.prm import PRM
 from spras.util import add_rank_column, duplicate_edges, raw_pathway_df
 
-__all__ = ['ST_RWR']
+__all__ = ['ST_RWR', 'ST_RWRParams']
 
 class ST_RWRParams(BaseModel):
-    threshold: Optional[int]
-    alpha: Optional[float]
+    threshold: int
+    "The number of nodes to return"
+
+    alpha: Optional[float] = None
+    "The chance of a restart during the random walk"
 
     model_config = ConfigDict(use_attribute_docstrings=True)
 
diff --git a/test/DOMINO/test_domino.py b/test/DOMINO/test_domino.py
index 62563bdc3..e84c0df8b 100644
--- a/test/DOMINO/test_domino.py
+++ b/test/DOMINO/test_domino.py
@@ -5,7 +5,12 @@
 import pytest
 
 import spras.config.config as config
-from spras.domino import DOMINO, DominoParams, post_domino_id_transform, pre_domino_id_transform
+from spras.domino import (
+    DOMINO,
+    DominoParams,
+    post_domino_id_transform,
+    pre_domino_id_transform,
+)
 
 config.init_from_file("config/config.yaml")
 
diff --git a/test/MinCostFlow/test_mcf.py b/test/MinCostFlow/test_mcf.py
index c777a665d..1c9c61a60 100644
--- a/test/MinCostFlow/test_mcf.py
+++ b/test/MinCostFlow/test_mcf.py
@@ -4,7 +4,7 @@
 import pytest
 
 import spras.config.config as config
-from spras.mincostflow import MinCostFlow
+from spras.mincostflow import MinCostFlow, MinCostFlowParams
 
 config.init_from_file("config/config.yaml")
 
@@ -21,9 +21,9 @@ def test_mincostflow_required(self, graph):
         out_path = Path(OUT_FILE)
         out_path.unlink(missing_ok=True)
 
-        MinCostFlow.run(sources=TEST_DIR + 'input/' + graph + '/sources.txt',
-                        targets=TEST_DIR + 'input/' + graph + '/targets.txt',
-                        edges=TEST_DIR + 'input/' + graph + '/edges.txt',
+        MinCostFlow.run({"sources": TEST_DIR + 'input/' + graph + '/sources.txt',
+                         "targets": TEST_DIR + 'input/' + graph + '/targets.txt',
+                         "edges": TEST_DIR + 'input/' + graph + '/edges.txt'},
                         output_file=OUT_FILE)
         assert out_path.exists()
         # TODO: assert for the output .equals expected_output instead of only testing
@@ -34,11 +34,11 @@ def test_mincostflow_missing_capacity(self, graph):
         out_path = Path(OUT_FILE)
         out_path.unlink(missing_ok=True)
 
-        MinCostFlow.run(sources=TEST_DIR + 'input/' + graph + '/sources.txt',
-                        targets=TEST_DIR + 'input/' + graph + '/targets.txt',
-                        edges=TEST_DIR + 'input/' + graph + '/edges.txt',
+        MinCostFlow.run({"sources": TEST_DIR + 'input/' + graph + '/sources.txt',
+                         "targets": TEST_DIR + 'input/' + graph + '/targets.txt',
+                         "edges": TEST_DIR + 'input/' + graph + '/edges.txt'},
                         output_file=OUT_FILE,
-                        flow=1)
+                        args=MinCostFlowParams(flow=1))
         assert out_path.exists()
 
     @pytest.mark.parametrize('graph', ['graph1'])
@@ -46,11 +46,11 @@ def test_mincostflow_missing_flow(self, graph):
         out_path = Path(OUT_FILE)
         out_path.unlink(missing_ok=True)
 
-        MinCostFlow.run(sources=TEST_DIR + 'input/' + graph + '/sources.txt',
-                        targets=TEST_DIR + 'input/' + graph + '/targets.txt',
-                        edges=TEST_DIR + 'input/' + graph + '/edges.txt',
+        MinCostFlow.run({"sources": TEST_DIR + 'input/' + graph + '/sources.txt',
+                         "targets": TEST_DIR + 'input/' + graph + '/targets.txt',
+                         "edges": TEST_DIR + 'input/' + graph + '/edges.txt'},
                         output_file=OUT_FILE,
-                        capacity=1)
+                        args=MinCostFlowParams(capacity=1))
         assert out_path.exists()
 
     @pytest.mark.parametrize('graph', ['graph1'])
@@ -59,24 +59,22 @@ def test_mincostflow_too_much_flow(self, graph):
         out_path.unlink(missing_ok=True)
 
         with pytest.raises(RuntimeError):
-            MinCostFlow.run(sources=TEST_DIR + 'input/' + graph + '/sources.txt',
-                            targets=TEST_DIR + 'input/' + graph + '/targets.txt',
-                            edges=TEST_DIR + 'input/' + graph + '/edges.txt',
+            MinCostFlow.run({"sources": TEST_DIR + 'input/' + graph + '/sources.txt',
+                             "targets": TEST_DIR + 'input/' + graph + '/targets.txt',
+                             "edges": TEST_DIR + 'input/' + graph + '/edges.txt'},
                             output_file=OUT_FILE,
-                            flow=50,
-                            capacity=1)
+                            args=MinCostFlowParams(flow=50, capacity=1))
 
     @pytest.mark.parametrize('graph', ['graph1'])
     def test_mincostflow_no_flow(self, graph):
         out_path = Path(OUT_FILE)
         out_path.unlink(missing_ok=True)
 
-        MinCostFlow.run(sources=TEST_DIR + 'input/' + graph + '/sources.txt',
-                        targets=TEST_DIR + 'input/' + graph + '/targets.txt',
-                        edges=TEST_DIR + 'input/' + graph + '/edges.txt',
+        MinCostFlow.run({"sources": TEST_DIR + 'input/' + graph + '/sources.txt',
+                         "targets": TEST_DIR + 'input/' + graph + '/targets.txt',
+                         "edges": TEST_DIR + 'input/' + graph + '/edges.txt'},
                         output_file=OUT_FILE,
-                        flow=0,
-                        capacity=1)
+                        args=MinCostFlowParams(flow=0, capacity=1))
         assert out_path.exists()
 
     @pytest.mark.parametrize('graph', ['graph1'])
@@ -84,20 +82,19 @@ def test_mincostflow_all_optional(self, graph):
         out_path = Path(OUT_FILE)
         out_path.unlink(missing_ok=True)
         # Include all optional arguments
-        MinCostFlow.run(sources=TEST_DIR + 'input/' + graph + '/sources.txt',
-                        targets=TEST_DIR + 'input/' + graph + '/targets.txt',
-                        edges=TEST_DIR + 'input/' + graph + '/edges.txt',
+        MinCostFlow.run({"sources": TEST_DIR + 'input/' + graph + '/sources.txt',
+                         "targets": TEST_DIR + 'input/' + graph + '/targets.txt',
+                         "edges": TEST_DIR + 'input/' + graph + '/edges.txt'},
                         output_file=OUT_FILE,
-                        flow=1,
-                        capacity=1)
+                        args=MinCostFlowParams(flow=1, capacity=1))
         assert out_path.exists()
 
     @pytest.mark.parametrize('graph', ['graph1'])
     def test_mincostflow_missing(self, graph):
         # Test the expected error is raised when required arguments are missing
         with pytest.raises(ValueError):
-            MinCostFlow.run(sources=TEST_DIR + 'input/' + graph + '/sources.txt',
-                            targets=TEST_DIR + 'input/' + graph + '/targets.txt',
+            MinCostFlow.run({"sources": TEST_DIR + 'input/' + graph + '/sources.txt',
+                             "targets": TEST_DIR + 'input/' + graph + '/targets.txt'},
                             output_file=OUT_FILE)
 
     @pytest.mark.parametrize('graph', ['graph1'])
@@ -106,12 +103,10 @@ def test_mincostflow_singularity(self, graph):
         out_path = Path(OUT_FILE)
         out_path.unlink(missing_ok=True)
         # Include all optional arguments
-        MinCostFlow.run(sources=TEST_DIR + 'input/' + graph + '/sources.txt',
-                        targets=TEST_DIR + 'input/' + graph + '/targets.txt',
-                        edges=TEST_DIR + 'input/' + graph + '/edges.txt',
+        MinCostFlow.run({"sources": TEST_DIR + 'input/' + graph + '/sources.txt',
+                         "targets": TEST_DIR + 'input/' + graph + '/targets.txt',
+                         "edges": TEST_DIR + 'input/' + graph + '/edges.txt'},
                         output_file=OUT_FILE,
-                        flow=1,
-                        capacity=1,
+                        args=MinCostFlowParams(flow=1, capacity=1),
                         container_framework="singularity")
         assert out_path.exists()
-
diff --git a/test/OmicsIntegrator2/test_oi2.py b/test/OmicsIntegrator2/test_oi2.py
index 0239d5e5f..172197efd 100644
--- a/test/OmicsIntegrator2/test_oi2.py
+++ b/test/OmicsIntegrator2/test_oi2.py
@@ -8,11 +8,10 @@
 
 config.init_from_file("config/config.yaml")
 
-TEST_DIR = 'test/OmicsIntegrator2/'
-EDGE_FILE = TEST_DIR+'input/oi2-edges.txt'
-PRIZE_FILE = TEST_DIR+'input/oi2-prizes.txt'
-OUT_FILE = Path(TEST_DIR, 'output', 'test.tsv')
-
+TEST_DIR = Path('test', 'OmicsIntegrator2')
+EDGE_FILE = TEST_DIR / 'input' / 'oi2-edges.txt'
+PRIZE_FILE = TEST_DIR / 'input' / 'oi2-prizes.txt'
+OUT_FILE = TEST_DIR / 'output' / 'test.tsv'
 
 class TestOmicsIntegrator2:
     """
diff --git a/test/PathLinker/test_pathlinker.py b/test/PathLinker/test_pathlinker.py
index ed9f10670..67e4b598f 100644
--- a/test/PathLinker/test_pathlinker.py
+++ b/test/PathLinker/test_pathlinker.py
@@ -4,7 +4,7 @@
 import pytest
 
 import spras.config.config as config
-from spras.pathlinker import PathLinker
+from spras.pathlinker import PathLinker, PathLinkerParams
 
 config.init_from_file("config/config.yaml")
 
@@ -21,33 +21,28 @@ def test_pathlinker_required(self):
         out_path = Path(OUT_FILE_DEFAULT)
         out_path.unlink(missing_ok=True)
         # Only include required arguments
-        PathLinker.run(
-            nodetypes=TEST_DIR+'input/sample-in-nodetypes.txt',
-            network=TEST_DIR+'input/sample-in-net.txt',
-            output_file=OUT_FILE_DEFAULT
-        )
+        PathLinker.run({"nodetypes": TEST_DIR+'input/sample-in-nodetypes.txt',
+                        "network": TEST_DIR+'input/sample-in-net.txt'},
+                       output_file=OUT_FILE_DEFAULT)
         assert out_path.exists()
 
     def test_pathlinker_optional(self):
         out_path = Path(OUT_FILE_100)
         out_path.unlink(missing_ok=True)
         # Include optional argument
-        PathLinker.run(
-            nodetypes=TEST_DIR+'input/sample-in-nodetypes.txt',
-            network=TEST_DIR+'input/sample-in-net.txt',
-            output_file=OUT_FILE_100,
-            k=100
-        )
+        PathLinker.run({"nodetypes": TEST_DIR+'input/sample-in-nodetypes.txt',
+                        "network": TEST_DIR+'input/sample-in-net.txt'},
+                       output_file=OUT_FILE_100,
+                       args=PathLinkerParams(k=100))
         assert out_path.exists()
 
     def test_pathlinker_missing(self):
         # Test the expected error is raised when required arguments are missing
         with pytest.raises(ValueError):
             # No nodetypes
-            PathLinker.run(
-                network=TEST_DIR + 'input/sample-in-net.txt',
-                output_file=OUT_FILE_100,
-                k=100)
+            PathLinker.run({"network": TEST_DIR + 'input/sample-in-net.txt'},
+                           output_file=OUT_FILE_100,
+                           args=PathLinkerParams(k=100))
 
     # Only run Singularity test if the binary is available on the system
     # spython is only available on Unix, but do not explicitly skip non-Unix platforms
@@ -56,9 +51,8 @@ def test_pathlinker_singularity(self):
         out_path = Path(OUT_FILE_DEFAULT)
         out_path.unlink(missing_ok=True)
         # Only include required arguments and run with Singularity
-        PathLinker.run(
-            nodetypes=TEST_DIR+'input/sample-in-nodetypes.txt',
-            network=TEST_DIR+'input/sample-in-net.txt',
-            output_file=OUT_FILE_DEFAULT,
-            container_framework="singularity")
+        PathLinker.run({"nodetypes": TEST_DIR+'input/sample-in-nodetypes.txt',
+                        "network": TEST_DIR+'input/sample-in-net.txt'},
+                       output_file=OUT_FILE_DEFAULT,
+                       container_framework="singularity")
         assert out_path.exists()
diff --git a/test/RWR/test_RWR.py b/test/RWR/test_RWR.py
index b0316ded0..70eb06845 100644
--- a/test/RWR/test_RWR.py
+++ b/test/RWR/test_RWR.py
@@ -5,7 +5,7 @@
 import pytest
 
 import spras.config.config as config
-from spras.rwr import RWR
+from spras.rwr import RWR, RWRParams
 
 config.init_from_file("config/config.yaml")
 
@@ -19,9 +19,9 @@ class TestRWR:
     """
     def test_rwr(self):
         OUT_FILE.unlink(missing_ok=True)
-        RWR.run(network=Path(TEST_DIR, 'input', 'rwr-network.txt'),
-                nodes=Path(TEST_DIR, 'input','rwr-nodes.txt'),
-                alpha=0.85,
+        RWR.run({"network": Path(TEST_DIR, 'input', 'rwr-network.txt'),
+                 "nodes": Path(TEST_DIR, 'input','rwr-nodes.txt')},
+                args=RWRParams(alpha=0.85, threshold=200),
                 output_file=OUT_FILE)
         assert OUT_FILE.exists(), 'Output file was not written'
         expected_file = Path(TEST_DIR, 'expected_output', 'rwr-output.txt')
@@ -32,9 +32,9 @@ def test_rwr(self):
     """
     def test_missing_file(self):
         with pytest.raises(OSError):
-            RWR.run(network=Path(TEST_DIR, 'input', 'missing.txt'),
-                    nodes=Path(TEST_DIR, 'input','rwr-nodes.txt'),
-                    alpha=0.85,
+            RWR.run({"network": Path(TEST_DIR, 'input', 'missing.txt'),
+                     "nodes": Path(TEST_DIR, 'input','rwr-nodes.txt')},
+                    args=RWRParams(alpha=0.85, threshold=200),
                     output_file=OUT_FILE)
 
     """
@@ -42,9 +42,9 @@ def test_missing_file(self):
     """
     def test_format_error(self):
         with pytest.raises(ValueError):
-            RWR.run(network=Path(TEST_DIR, 'input', 'rwr-bad-network.txt'),
-                    nodes=Path(TEST_DIR, 'input','rwr-nodes.txt'),
-                    alpha=0.85,
+            RWR.run({"network": Path(TEST_DIR, 'input', 'rwr-bad-network.txt'),
+                     "nodes": Path(TEST_DIR, 'input','rwr-nodes.txt')},
+                    args=RWRParams(alpha=0.85, threshold=200),
                     output_file=OUT_FILE)
 
     # Only run Singularity test if the binary is available on the system
@@ -53,9 +53,9 @@ def test_format_error(self):
     def test_rwr_singularity(self):
         OUT_FILE.unlink(missing_ok=True)
         # Only include required arguments and run with Singularity
-        RWR.run(network=Path(TEST_DIR, 'input', 'rwr-network.txt'),
-                nodes=Path(TEST_DIR, 'input','rwr-nodes.txt'),
-                alpha=0.85,
+        RWR.run({"network": Path(TEST_DIR, 'input', 'rwr-network.txt'),
+                 "nodes": Path(TEST_DIR, 'input','rwr-nodes.txt')},
+                args=RWRParams(alpha=0.85, threshold=200),
                 output_file=OUT_FILE,
                 container_framework="singularity")
         assert OUT_FILE.exists()
diff --git a/test/ST_RWR/test_STRWR.py b/test/ST_RWR/test_STRWR.py
index 898b24055..ea0c2bda0 100644
--- a/test/ST_RWR/test_STRWR.py
+++ b/test/ST_RWR/test_STRWR.py
@@ -5,7 +5,7 @@
 import pytest
 
 import spras.config.config as config
-from spras.strwr import ST_RWR
+from spras.strwr import ST_RWR, ST_RWRParams
 
 config.init_from_file("config/config.yaml")
 
@@ -20,10 +20,10 @@ class TestSTRWR:
     """
     def test_strwr(self):
         OUT_FILE.unlink(missing_ok=True)
-        ST_RWR.run(network=Path(TEST_DIR, 'input', 'strwr-network.txt'),
-                   sources=Path(TEST_DIR, 'input', 'strwr-sources.txt'),
-                   targets=Path(TEST_DIR, 'input','strwr-targets.txt'),
-                   alpha=0.85,
+        ST_RWR.run({"network": Path(TEST_DIR, 'input', 'strwr-network.txt'),
+                    "sources": Path(TEST_DIR, 'input', 'strwr-sources.txt'),
+                    "targets": Path(TEST_DIR, 'input','strwr-targets.txt')},
+                   args=ST_RWRParams(alpha=0.85, threshold=200),
                    output_file=OUT_FILE)
         assert OUT_FILE.exists(), 'Output file was not written'
         expected_file = Path(TEST_DIR, 'expected_output', 'strwr-output.txt')
@@ -34,10 +34,10 @@ def test_strwr(self):
     """
     def test_missing_file(self):
         with pytest.raises(OSError):
-            ST_RWR.run(network=Path(TEST_DIR, 'input', 'missing.txt'),
-                       sources=Path(TEST_DIR, 'input', 'strwr-sources.txt'),
-                       targets=Path(TEST_DIR, 'input','strwr-targets.txt'),
-                       alpha=0.85,
+            ST_RWR.run({"network": Path(TEST_DIR, 'input', 'missing.txt'),
+                        "sources": Path(TEST_DIR, 'input', 'strwr-sources.txt'),
+                        "targets": Path(TEST_DIR, 'input','strwr-targets.txt')},
+                       args=ST_RWRParams(alpha=0.85, threshold=200),
                        output_file=OUT_FILE)
 
     """
@@ -45,10 +45,10 @@ def test_missing_file(self):
     """
     def test_format_error(self):
         with pytest.raises(ValueError):
-            ST_RWR.run(network=Path(TEST_DIR, 'input', 'strwr-bad-network.txt'),
-                       sources=Path(TEST_DIR, 'input', 'strwr-sources.txt'),
-                       targets=Path(TEST_DIR, 'input','strwr-targets.txt'),
-                       alpha=0.85,
+            ST_RWR.run({"network": Path(TEST_DIR, 'input', 'strwr-bad-network.txt'),
+                        "sources": Path(TEST_DIR, 'input', 'strwr-sources.txt'),
+                        "targets": Path(TEST_DIR, 'input','strwr-targets.txt')},
+                       args=ST_RWRParams(alpha=0.85, threshold=200),
                        output_file=OUT_FILE)
 
     # Only run Singularity test if the binary is available on the system
@@ -57,10 +57,10 @@ def test_format_error(self):
     def test_strwr_singularity(self):
         OUT_FILE.unlink(missing_ok=True)
         # Only include required arguments and run with Singularity
-        ST_RWR.run(network=Path(TEST_DIR, 'input', 'strwr-network.txt'),
-                   sources=Path(TEST_DIR, 'input', 'strwr-sources.txt'),
-                   targets=Path(TEST_DIR, 'input','strwr-targets.txt'),
-                   alpha=0.85,
+        ST_RWR.run({"network": Path(TEST_DIR, 'input', 'strwr-network.txt'),
+                    "sources": Path(TEST_DIR, 'input', 'strwr-sources.txt'),
+                    "targets": Path(TEST_DIR, 'input','strwr-targets.txt')},
+                   args=ST_RWRParams(alpha=0.85, threshold=200),
                    output_file=OUT_FILE,
                    container_framework="singularity")
         assert OUT_FILE.exists()

From da6771166f36fb9d1a1f9d8b651296de02546fd1 Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Mon, 14 Jul 2025 20:47:08 +0000
Subject: [PATCH 41/60] fix: specify default args out of run

---
 spras/allpairs.py         |  2 +-
 spras/btb.py              |  2 +-
 spras/domino.py           |  7 +++++--
 spras/meo.py              |  5 ++++-
 spras/mincostflow.py      |  5 ++++-
 spras/omicsintegrator1.py |  8 ++++----
 spras/omicsintegrator2.py |  5 ++++-
 spras/pathlinker.py       | 17 +++++++----------
 8 files changed, 30 insertions(+), 21 deletions(-)

diff --git a/spras/allpairs.py b/spras/allpairs.py
index 15a3b17f7..670d3f721 100644
--- a/spras/allpairs.py
+++ b/spras/allpairs.py
@@ -72,7 +72,7 @@ def generate_inputs(data: Dataset, filename_map):
                                       header=["#Interactor1", "Interactor2", "Weight"])
 
     @staticmethod
-    def run(inputs, output_file, args=Empty(), container_framework="docker"):
+    def run(inputs, output_file, args=None, container_framework="docker"):
         """
         Run All Pairs Shortest Paths with Docker
         @param nodetypes: input node types with sources and targets (required)
diff --git a/spras/btb.py b/spras/btb.py
index 6ad3afb69..81474bdb2 100644
--- a/spras/btb.py
+++ b/spras/btb.py
@@ -65,7 +65,7 @@ def generate_inputs(data, filename_map):
 
     # Skips parameter validation step
     @staticmethod
-    def run(inputs, output_file, args=Empty(), container_framework="docker"):
+    def run(inputs, output_file, args=None, container_framework="docker"):
         # Tests for pytest (docker container also runs this)
         # Testing out here avoids the trouble that container errors provide
 
diff --git a/spras/domino.py b/spras/domino.py
index 86f3c0563..16a70a788 100644
--- a/spras/domino.py
+++ b/spras/domino.py
@@ -76,7 +76,10 @@ def generate_inputs(data, filename_map):
                         header=['ID_interactor_A', 'ppi', 'ID_interactor_B'])
 
     @staticmethod
-    def run(inputs, output_file, args=DominoParams(), container_framework="docker"):
+    def run(inputs, output_file, args=None, container_framework="docker"):
+        if not args:
+            args = DominoParams()
+        
         # Let visualization be always true, parallelization be always 1 thread, and use_cache be always false.
         if not inputs["network"] or not inputs["active_genes"]:
             raise ValueError('Required DOMINO arguments are missing')
@@ -152,7 +155,7 @@ def run(inputs, output_file, args=DominoParams(), container_framework="docker"):
         # Clean up DOMINO intermediate and pickle files
         slices_file.unlink(missing_ok=True)
         Path(out_dir, 'network.slices.pkl').unlink(missing_ok=True)
-        Path(network + '.pkl').unlink(missing_ok=True)
+        Path(f"{inputs['network']}.pkl").unlink(missing_ok=True)
 
     @staticmethod
     def parse_output(raw_pathway_file, standardized_pathway_file, params):
diff --git a/spras/meo.py b/spras/meo.py
index 30e81d87e..02edf07af 100644
--- a/spras/meo.py
+++ b/spras/meo.py
@@ -145,7 +145,7 @@ def generate_inputs(data, filename_map):
     # TODO add parameter validation
     # TODO document required arguments
     @staticmethod
-    def run(inputs, args=MEOParams(), output_file=None, container_framework="docker"):
+    def run(inputs, output_file=None, args=None, container_framework="docker"):
         """
         Run Maximum Edge Orientation in the Docker image with the provided parameters.
         The properties file is generated from the provided arguments.
@@ -154,6 +154,9 @@ def run(inputs, args=MEOParams(), output_file=None, container_framework="docker"
         Only the edge output file is retained.
         All other output files are deleted.
         """
+        if not args:
+            args = MEOParams()
+
         if inputs["edges"] is None or inputs["sources"] is None or inputs["targets"] is None:
             raise ValueError('Required Maximum Edge Orientation arguments are missing')
 
diff --git a/spras/mincostflow.py b/spras/mincostflow.py
index eab80c631..b2267f800 100644
--- a/spras/mincostflow.py
+++ b/spras/mincostflow.py
@@ -72,7 +72,10 @@ def generate_inputs(data, filename_map):
                      header=False)
 
     @staticmethod
-    def run(inputs, output_file, args=MinCostFlowParams(), container_framework="docker"):
+    def run(inputs, output_file, args=None, container_framework="docker"):
+        if not args:
+            args = MinCostFlowParams()
+        
         # ensures that these parameters are required
         if not inputs["sources"] or not inputs["targets"] or not inputs["edges"]:
             raise ValueError('Required MinCostFlow arguments are missing')
diff --git a/spras/omicsintegrator1.py b/spras/omicsintegrator1.py
index d8226f735..9152e80a6 100644
--- a/spras/omicsintegrator1.py
+++ b/spras/omicsintegrator1.py
@@ -40,8 +40,8 @@ def write_conf(filename=Path('config.txt'), w=None, b=None, d=None, mu=None, noi
 
 class OmicsIntegrator1Params(BaseModel):
     dummy_mode: Optional[str] = None
-    mu_squared: Optional[bool] = None
-    exclude_terms: Optional[bool] = None
+    mu_squared: bool = False
+    exclude_terms: bool = False
 
     noisy_edges: Optional[int] = None
     "How many times you would like to add noise to the given edge values and re-run the algorithm."
@@ -202,9 +202,9 @@ def run(inputs, output_file, args, container_framework="docker"):
                 command.extend(['--dummyMode', args.dummy_mode])
 
         # Add optional arguments
-        if args.mu_squared is not None and args.mu_squared:
+        if args.mu_squared:
             command.extend(['--musquared'])
-        if args.exclude_terms is not None and args.exclude_terms:
+        if args.exclude_terms:
             command.extend(['--excludeTerms'])
         if args.noisy_edges is not None:
             command.extend(['--noisyEdges', str(args.noisy_edges)])
diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py
index 41aec9ee1..fb420de8e 100644
--- a/spras/omicsintegrator2.py
+++ b/spras/omicsintegrator2.py
@@ -103,7 +103,7 @@ def generate_inputs(data: Dataset, filename_map):
     # TODO add reasonable default values
     # TODO document required arguments
     @staticmethod
-    def run(inputs, output_file, args=OmicsIntegrator2Params(), container_framework="docker"):
+    def run(inputs, output_file, args=None, container_framework="docker"):
         """
         Run Omics Integrator 2 in the Docker image with the provided parameters.
         Only the .tsv output file is retained and then renamed.
@@ -111,6 +111,9 @@ def run(inputs, output_file, args=OmicsIntegrator2Params(), container_framework=
         @param output_file: the name of the output file, which will overwrite any existing file with this name
         @param container_framework: choose the container runtime framework, currently supports "docker" or "singularity" (optional)
         """
+        if not args:
+            args = OmicsIntegrator2Params()
+
         if inputs["edges"] is None or inputs["prizes"] is None:
             raise ValueError('Required Omics Integrator 2 arguments are missing')
 
diff --git a/spras/pathlinker.py b/spras/pathlinker.py
index 167403cef..d5ac385f4 100644
--- a/spras/pathlinker.py
+++ b/spras/pathlinker.py
@@ -16,8 +16,8 @@
 __all__ = ['PathLinker', 'PathLinkerParams']
 
 class PathLinkerParams(BaseModel):
-    k: Optional[int] = None
-    "path length (optional)"
+    k: int = 100
+    "path length"
 
     model_config = ConfigDict(use_attribute_docstrings=True)
 
@@ -76,7 +76,7 @@ def generate_inputs(data, filename_map):
                      header=["#Interactor1","Interactor2","Weight"])
 
     @staticmethod
-    def run(inputs, output_file, args=PathLinkerParams(), container_framework="docker"):
+    def run(inputs, output_file, args=None, container_framework="docker"):
         """
         Run PathLinker with Docker
         @param nodetypes:  input node types with sources and targets (required)
@@ -85,10 +85,9 @@ def run(inputs, output_file, args=PathLinkerParams(), container_framework="docke
         @param k:
         @param container_framework: choose the container runtime framework, currently supports "docker" or "singularity" (optional)
         """
-        # Add additional parameter validation
-        # Do not require k
-        # Use the PathLinker default
-        # Could consider setting the default here instead
+        if not args:
+            args = PathLinkerParams()
+
         if not inputs["nodetypes"] or not inputs["network"]:
             raise ValueError('Required PathLinker arguments are missing')
 
@@ -118,9 +117,7 @@ def run(inputs, output_file, args=PathLinkerParams(), container_framework="docke
                    node_file,
                    '--output', mapped_out_prefix]
 
-        # Add optional argument
-        if args.k is not None:
-            command.extend(['-k', str(args.k)])
+        command.extend(['-k', str(args.k)])
 
         container_suffix = "pathlinker:v2"
         run_container_and_log('PathLinker',

From 45cfe87a46a6850a3f1ce6d890b25c3b47eff781 Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Mon, 14 Jul 2025 21:03:20 +0000
Subject: [PATCH 42/60] fix: more defaults

---
 spras/domino.py           |  2 +-
 spras/mincostflow.py      |  2 +-
 spras/omicsintegrator1.py | 16 ++++++----------
 spras/omicsintegrator2.py | 10 +++++-----
 spras/pathlinker.py       |  8 --------
 5 files changed, 13 insertions(+), 25 deletions(-)

diff --git a/spras/domino.py b/spras/domino.py
index 16a70a788..110b11ab3 100644
--- a/spras/domino.py
+++ b/spras/domino.py
@@ -79,7 +79,7 @@ def generate_inputs(data, filename_map):
     def run(inputs, output_file, args=None, container_framework="docker"):
         if not args:
             args = DominoParams()
-        
+
         # Let visualization be always true, parallelization be always 1 thread, and use_cache be always false.
         if not inputs["network"] or not inputs["active_genes"]:
             raise ValueError('Required DOMINO arguments are missing')
diff --git a/spras/mincostflow.py b/spras/mincostflow.py
index b2267f800..2673d91e2 100644
--- a/spras/mincostflow.py
+++ b/spras/mincostflow.py
@@ -75,7 +75,7 @@ def generate_inputs(data, filename_map):
     def run(inputs, output_file, args=None, container_framework="docker"):
         if not args:
             args = MinCostFlowParams()
-        
+
         # ensures that these parameters are required
         if not inputs["sources"] or not inputs["targets"] or not inputs["edges"]:
             raise ValueError('Required MinCostFlow arguments are missing')
diff --git a/spras/omicsintegrator1.py b/spras/omicsintegrator1.py
index 9152e80a6..74f55bff7 100644
--- a/spras/omicsintegrator1.py
+++ b/spras/omicsintegrator1.py
@@ -43,13 +43,13 @@ class OmicsIntegrator1Params(BaseModel):
     mu_squared: bool = False
     exclude_terms: bool = False
 
-    noisy_edges: Optional[int] = None
+    noisy_edges: int = 0
     "How many times you would like to add noise to the given edge values and re-run the algorithm."
 
-    shuffled_prizes: Optional[int] = None
+    shuffled_prizes: int = 0
     "shuffled_prizes: How many times the algorithm should shuffle the prizes and re-run"
 
-    random_terminals: Optional[int] = None
+    random_terminals: int = 0
     "How many times to apply the given prizes to random nodes in the interactome"
 
     seed: Optional[int] = None
@@ -140,7 +140,6 @@ def generate_inputs(data, filename_map):
 
     # TODO add support for knockout argument
     # TODO add reasonable default values
-    # TODO document required arguments
     @staticmethod
     def run(inputs, output_file, args, container_framework="docker"):
         if inputs["edges"] is None or inputs["prizes"] is None or output_file is None:
@@ -206,12 +205,9 @@ def run(inputs, output_file, args, container_framework="docker"):
             command.extend(['--musquared'])
         if args.exclude_terms:
             command.extend(['--excludeTerms'])
-        if args.noisy_edges is not None:
-            command.extend(['--noisyEdges', str(args.noisy_edges)])
-        if args.shuffled_prizes is not None:
-            command.extend(['--shuffledPrizes', str(args.shuffled_prizes)])
-        if args.random_terminals is not None:
-            command.extend(['--randomTerminals', str(args.random_terminals)])
+        command.extend(['--noisyEdges', str(args.noisy_edges)])
+        command.extend(['--shuffledPrizes', str(args.shuffled_prizes)])
+        command.extend(['--randomTerminals', str(args.random_terminals)])
         if args.seed is not None:
             command.extend(['--seed', str(args.seed)])
 
diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py
index fb420de8e..f0a2d9c52 100644
--- a/spras/omicsintegrator2.py
+++ b/spras/omicsintegrator2.py
@@ -1,8 +1,9 @@
+import time
 from pathlib import Path
 from typing import Optional
 
 import pandas as pd
-from pydantic import BaseModel, ConfigDict
+from pydantic import BaseModel, ConfigDict, Field
 
 from spras.containers import prepare_volume, run_container_and_log
 from spras.dataset import Dataset
@@ -39,8 +40,8 @@ class OmicsIntegrator2Params(BaseModel):
         "all" = connect to all nodes in the interactome.
     """
 
-    seed: Optional[int] = None
-    "The random seed to use for this run."
+    seed: int = Field(default_factory=lambda _: int(time.time() * 1000))
+    "The random seed to use for this run. Defaults to the current UNIX timestamp."
 
     model_config = ConfigDict(use_attribute_docstrings=True)
 
@@ -153,8 +154,7 @@ def run(inputs, output_file, args=None, container_framework="docker"):
         if args.dummy_mode is not None:
             # This argument does not follow the other naming conventions
             command.extend(['--dummyMode', str(args.dummy_mode)])
-        if args.seed is not None:
-            command.extend(['--seed', str(args.seed)])
+        command.extend(['--seed', str(args.seed)])
 
         container_suffix = "omics-integrator-2:v2"
         run_container_and_log('Omics Integrator 2',
diff --git a/spras/pathlinker.py b/spras/pathlinker.py
index d5ac385f4..9b6fe964c 100644
--- a/spras/pathlinker.py
+++ b/spras/pathlinker.py
@@ -77,14 +77,6 @@ def generate_inputs(data, filename_map):
 
     @staticmethod
     def run(inputs, output_file, args=None, container_framework="docker"):
-        """
-        Run PathLinker with Docker
-        @param nodetypes:  input node types with sources and targets (required)
-        @param network:  input network file (required)
-        @param output_file: path to the output pathway file (required)
-        @param k:
-        @param container_framework: choose the container runtime framework, currently supports "docker" or "singularity" (optional)
-        """
         if not args:
             args = PathLinkerParams()
 

From e0808570331316b5dbfd5af5bc4d2f4702635bb8 Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Mon, 14 Jul 2025 22:13:13 +0000
Subject: [PATCH 43/60] feat: begin algorithm parsing

---
 config/config.yaml         |  4 +--
 spras/config/algorithms.py | 63 ++++++++++++++++++++++++++++++++++++++
 spras/config/schema.py     | 21 ++-----------
 spras/runner.py            | 43 ++++++++++++++------------
 4 files changed, 91 insertions(+), 40 deletions(-)
 create mode 100644 spras/config/algorithms.py

diff --git a/config/config.yaml b/config/config.yaml
index 8092b9eb9..5d23946d4 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -81,8 +81,8 @@ algorithms:
         rand_restarts: 10
 
   - name: "mincostflow"
-    params:
-      include: true
+    include: true
+    runs:
       run1:
         flow: 1 # The flow must be an int
         capacity: 1
diff --git a/spras/config/algorithms.py b/spras/config/algorithms.py
new file mode 100644
index 000000000..9e78788f4
--- /dev/null
+++ b/spras/config/algorithms.py
@@ -0,0 +1,63 @@
+"""
+Dynamic construction of algoithm parameters with runtime type information for
+parameter combinations. This has been isolated from schema.py as it is not declarative,
+and rather mainly contains validators and lower-level pydantic code.
+"""
+from typing import Any, cast, Union
+
+from spras.runner import algorithms
+from pydantic import BaseModel, create_model
+
+__all__ = ['AlgorithmUnion']
+
+def construct_algorithm_model(name: str, model: type[BaseModel]) -> type[BaseModel]:
+    """
+    Dynamically constructs a parameter-combination model based on the original args model.
+    This is the most 'hacky' part of this code, but, thanks to pydantic, we almost*
+    avoid reflection and preserve rich type information.
+    """
+    # First, we need to take our 'model' and coerce it to permit parameter combinations.
+    # This assumes that all of the keys are flattened, so we only get a structure like so:
+    # class AlgorithmParams(BaseModel):
+    #   key1: int
+    #   key2: list[str]
+    #   ...
+    # and we want to transform this to:
+    # class AlgorithmParamsCombination(BaseModel):
+    #   key1: list[int]
+    #   key2: list[list[str]]
+    # This function does not worry about getting the cartesian product of this.
+
+    # Map our fields to a list (assuming we have no nested keys)
+    mapped_list_field: dict[str, type[list[Any]]] = {name: list[field.annotation] for name, field in model.model_fields.items()}
+
+    # Runtime assertion check: mapped_list_field does not contain any `__-prefixed` fields
+    for key in mapped_list_field.keys():
+        assert not key.startswith("__"), f"A private key has been passed from {name}'s argument schema." + \
+            "This should have been caught by the Snakemake CI step."
+
+    # Pass this as kwargs to create_model, which usually takes in parameters field_name=type.
+    # This is the asterisk (*) from the docstring: we do need to cast create_model, since otherwise
+    # the type-checker complains that we may have had a key that starts with __ in mapped_list_fields.
+    # The above assertion prevents this.
+    run_model = (cast(Any, create_model))(
+        f'{name}RunModel',
+        **mapped_list_field
+    )
+    
+    # Here is an example of how this would look like inside config.yaml
+    # name: pathlinker
+    # include: true
+    # runs:
+    #   run1:
+    #     (from run_model)
+    #   ...
+    return create_model(
+        f'{name}Model',
+        name=name,
+        include=bool,
+        runs=dict[str, run_model]
+    )
+
+algorithm_models: list[type[BaseModel]] = [construct_algorithm_model(name, model) for name, (_, model) in algorithms.items()]
+AlgorithmUnion = Union[tuple(algorithm_models)]
diff --git a/spras/config/schema.py b/spras/config/schema.py
index 623c9dd9b..7a42673d6 100644
--- a/spras/config/schema.py
+++ b/spras/config/schema.py
@@ -11,13 +11,13 @@
 """
 
 import re
-from typing import Annotated, Optional
+from typing import Annotated
 
 from pydantic import AfterValidator, BaseModel, ConfigDict, Field
 
+from spras.config.algorithms import AlgorithmUnion
 from spras.config.util import CaseInsensitiveEnum
 
-
 class SummaryAnalysis(BaseModel):
     include: bool
 
@@ -87,21 +87,6 @@ class ContainerRegistry(BaseModel):
 
     model_config = ConfigDict(extra='forbid')
 
-class AlgorithmParams(BaseModel):
-    include: bool
-    directed: Optional[bool] = None
-
-    # TODO: use array of runs instead. We currently rely on the
-    # extra parameters here to extract the algorithm parameter information,
-    # which is why this deviates from the usual ConfigDict(extra='forbid').
-    model_config = ConfigDict(extra='allow')
-
-class Algorithm(BaseModel):
-    name: str
-    params: AlgorithmParams
-
-    model_config = ConfigDict(extra='forbid')
-
 class Dataset(BaseModel):
     label: Annotated[str, AfterValidator(label_validator("Dataset"))]
     node_files: list[str]
@@ -139,7 +124,7 @@ class RawConfig(BaseModel):
         description="The length of the hash used to identify a parameter combination",
         default=DEFAULT_HASH_LENGTH)
 
-    algorithms: list[Algorithm]
+    algorithms: list[AlgorithmUnion] # type: ignore - pydantic allows this.
     datasets: list[Dataset]
     gold_standards: list[GoldStandard] = []
     analysis: Analysis = Analysis()
diff --git a/spras/runner.py b/spras/runner.py
index a023a9606..843b3cf46 100644
--- a/spras/runner.py
+++ b/spras/runner.py
@@ -1,35 +1,38 @@
 from typing import Any
 
+from pydantic import BaseModel
+
 # supported algorithm imports
 from spras.allpairs import AllPairs
 from spras.btb import BowTieBuilder
 from spras.dataset import Dataset
-from spras.domino import DOMINO
-from spras.meo import MEO
-from spras.mincostflow import MinCostFlow
-from spras.omicsintegrator1 import OmicsIntegrator1
-from spras.omicsintegrator2 import OmicsIntegrator2
-from spras.pathlinker import PathLinker
+from spras.config.util import Empty
+from spras.domino import DOMINO, DominoParams
+from spras.meo import MEO, MEOParams
+from spras.mincostflow import MinCostFlow, MinCostFlowParams
+from spras.omicsintegrator1 import OmicsIntegrator1, OmicsIntegrator1Params
+from spras.omicsintegrator2 import OmicsIntegrator2, OmicsIntegrator2Params
+from spras.pathlinker import PathLinker, PathLinkerParams
 from spras.prm import PRM
-from spras.rwr import RWR
-from spras.strwr import ST_RWR
+from spras.rwr import RWR, RWRParams
+from spras.strwr import ST_RWR, ST_RWRParams
 
-algorithms: dict[str, type[PRM]] = {
-    "allpairs": AllPairs,
-    "bowtiebuilder": BowTieBuilder,
-    "domino": DOMINO,
-    "meo": MEO,
-    "mincostflow": MinCostFlow,
-    "omicsintegrator1": OmicsIntegrator1,
-    "omicsintegrator2": OmicsIntegrator2,
-    "pathlinker": PathLinker,
-    "rwr": RWR,
-    "strwr": ST_RWR,
+algorithms: dict[str, tuple[type[PRM], type[BaseModel]]] = {
+    "allpairs": (AllPairs, Empty),
+    "bowtiebuilder": (BowTieBuilder, Empty),
+    "domino": (DOMINO, DominoParams),
+    "meo": (MEO, MEOParams),
+    "mincostflow": (MinCostFlow, MinCostFlowParams),
+    "omicsintegrator1": (OmicsIntegrator1, OmicsIntegrator1Params),
+    "omicsintegrator2": (OmicsIntegrator2, OmicsIntegrator2Params),
+    "pathlinker": (PathLinker, PathLinkerParams),
+    "rwr": (RWR, RWRParams),
+    "strwr": (ST_RWR, ST_RWRParams),
 }
 
 def get_algorithm(algorithm: str) -> type[PRM]:
     try:
-        return algorithms[algorithm.lower()]
+        return algorithms[algorithm.lower()][0]
     except KeyError as exc:
         raise NotImplementedError(f'{algorithm} is not currently supported.') from exc
 

From 53f55e27a7bc1040c77c8941746156f497c214e4 Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Mon, 14 Jul 2025 22:59:58 +0000
Subject: [PATCH 44/60] fix: clean up type errors, begin nondetermnism

---
 spras/config/algorithms.py | 13 ++++++-------
 spras/config/schema.py     |  4 ++++
 spras/config/util.py       | 30 ++++++++++++++++++++++++++++--
 spras/domino.py            |  5 +++--
 spras/omicsintegrator1.py  |  7 ++++---
 5 files changed, 45 insertions(+), 14 deletions(-)

diff --git a/spras/config/algorithms.py b/spras/config/algorithms.py
index 9e78788f4..bc7b896fc 100644
--- a/spras/config/algorithms.py
+++ b/spras/config/algorithms.py
@@ -3,7 +3,7 @@
 parameter combinations. This has been isolated from schema.py as it is not declarative,
 and rather mainly contains validators and lower-level pydantic code.
 """
-from typing import Any, cast, Union
+from typing import Any, cast, Union, Literal
 
 from spras.runner import algorithms
 from pydantic import BaseModel, create_model
@@ -13,8 +13,8 @@
 def construct_algorithm_model(name: str, model: type[BaseModel]) -> type[BaseModel]:
     """
     Dynamically constructs a parameter-combination model based on the original args model.
-    This is the most 'hacky' part of this code, but, thanks to pydantic, we almost*
-    avoid reflection and preserve rich type information.
+    This is the most 'hacky' part of this code, but, thanks to pydantic, we avoid reflection
+    and preserve rich type information at runtime.
     """
     # First, we need to take our 'model' and coerce it to permit parameter combinations.
     # This assumes that all of the keys are flattened, so we only get a structure like so:
@@ -37,9 +37,8 @@ def construct_algorithm_model(name: str, model: type[BaseModel]) -> type[BaseMod
             "This should have been caught by the Snakemake CI step."
 
     # Pass this as kwargs to create_model, which usually takes in parameters field_name=type.
-    # This is the asterisk (*) from the docstring: we do need to cast create_model, since otherwise
-    # the type-checker complains that we may have had a key that starts with __ in mapped_list_fields.
-    # The above assertion prevents this.
+    # We do need to cast create_model, since otherwise the type-checker complains that we may
+    # have had a key that starts with __ in mapped_list_fields. The above assertion prevents this.
     run_model = (cast(Any, create_model))(
         f'{name}RunModel',
         **mapped_list_field
@@ -54,7 +53,7 @@ def construct_algorithm_model(name: str, model: type[BaseModel]) -> type[BaseMod
     #   ...
     return create_model(
         f'{name}Model',
-        name=name,
+        name=Literal[name],
         include=bool,
         runs=dict[str, run_model]
     )
diff --git a/spras/config/schema.py b/spras/config/schema.py
index 7a42673d6..76404b387 100644
--- a/spras/config/schema.py
+++ b/spras/config/schema.py
@@ -124,6 +124,7 @@ class RawConfig(BaseModel):
         description="The length of the hash used to identify a parameter combination",
         default=DEFAULT_HASH_LENGTH)
 
+    # See algorithms.py for more information about AlgorithmUnion
     algorithms: list[AlgorithmUnion] # type: ignore - pydantic allows this.
     datasets: list[Dataset]
     gold_standards: list[GoldStandard] = []
@@ -132,3 +133,6 @@ class RawConfig(BaseModel):
     reconstruction_settings: ReconstructionSettings
 
     model_config = ConfigDict(extra='forbid')
+
+# AlgorithmUnion is dynamically constructed.
+RawConfig.model_rebuild()
diff --git a/spras/config/util.py b/spras/config/util.py
index 32f19076f..0ed99a26e 100644
--- a/spras/config/util.py
+++ b/spras/config/util.py
@@ -1,7 +1,14 @@
+"""
+General config utilities. This is the only config file
+that should be imported by algorithms, and algorithms should
+only import this config file.
+"""
+
 from enum import Enum
+import time
 from typing import Any
 
-from pydantic import BaseModel, ConfigDict
+from pydantic import BaseModel, ConfigDict, Field
 
 
 # https://stackoverflow.com/a/76883868/7589775
@@ -23,6 +30,25 @@ def _missing_(cls, value: Any):
 
 class Empty(BaseModel):
     """
-    The empty base model. Used for specifying that an algorithm takes no parameters.
+    The empty base model. Used for specifying that an algorithm takes no parameters,
+    yet are deterministic.
     """
     model_config = ConfigDict(extra="forbid")
+
+class NondeterministicModel(BaseModel):
+    """
+    A nondeterministic model. Any seedless nondeterministic algorithm should extend this.
+    Internally, this inserts a _time parameter that can be serialized but not
+    deserialized, and will affect the hash.
+    """
+
+    # We don't make this a PrivateAttr for reasons explained in the doc comment.
+    time: float = Field(default_factory=time.time, alias="_time")
+    """
+    The internal _time parameter. This is a parameter only given to nondeterminsitic
+    algorithms that provide no randomness seed. While this should be unset,
+    we allow specifying `_time` for users that want to re-use outputs of runs,
+    though this explicitly breaks the 'immutability' promise of runs.
+    """
+
+    model_config = ConfigDict(use_attribute_docstrings=True)
diff --git a/spras/domino.py b/spras/domino.py
index 30ccc8a84..a9ce7a43b 100644
--- a/spras/domino.py
+++ b/spras/domino.py
@@ -3,9 +3,10 @@
 from typing import Optional
 
 import pandas as pd
-from pydantic import BaseModel, ConfigDict
+from pydantic import ConfigDict
 
 from spras.containers import prepare_volume, run_container_and_log
+from spras.config.util import NondeterministicModel
 from spras.interactome import (
     add_constant,
     reinsert_direction_col_undirected,
@@ -18,7 +19,7 @@
 ID_PREFIX = 'ENSG0'
 ID_PREFIX_LEN = len(ID_PREFIX)
 
-class DominoParams(BaseModel):
+class DominoParams(NondeterministicModel):
     module_threshold: Optional[float] = None
     "the p-value threshold for considering a slice as relevant (optional)"
 
diff --git a/spras/omicsintegrator1.py b/spras/omicsintegrator1.py
index 74f55bff7..ddb934bb5 100644
--- a/spras/omicsintegrator1.py
+++ b/spras/omicsintegrator1.py
@@ -1,7 +1,8 @@
 from pathlib import Path
+import time
 from typing import Optional
 
-from pydantic import BaseModel, ConfigDict
+from pydantic import BaseModel, ConfigDict, Field
 
 from spras.containers import prepare_volume, run_container_and_log
 from spras.interactome import reinsert_direction_col_mixed
@@ -52,8 +53,8 @@ class OmicsIntegrator1Params(BaseModel):
     random_terminals: int = 0
     "How many times to apply the given prizes to random nodes in the interactome"
 
-    seed: Optional[int] = None
-    "the randomness seed to use"
+    seed: int = Field(default_factory=lambda _: int(time.time() * 1000))
+    "The random seed to use for this run. Defaults to the current UNIX timestamp."
 
     w: int
     "the number of trees"

From 2c938ed09708359ebaaf82733646868998df759f Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Mon, 14 Jul 2025 16:05:36 -0700
Subject: [PATCH 45/60] fix: add spras.config to pyproject

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index bedbe1628..b18ef12c5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -74,4 +74,4 @@ select = [
 # py-modules tells setuptools which directory is our actual module
 py-modules = ["spras"]
 # packages tells setuptools what the exported package is called (ie allows import spras)
-packages = ["spras", "spras.analysis"]
+packages = ["spras", "spras.analysis", "spras.config"]

From a4e265d2a2930b3b7b2e44a2907d64be9a08f09c Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Mon, 14 Jul 2025 23:15:07 +0000
Subject: [PATCH 46/60] chore: begin little utility

---
 config/config.yaml    |  4 ++--
 util/update_schema.py | 13 +++++++++++++
 2 files changed, 15 insertions(+), 2 deletions(-)
 create mode 100644 util/update_schema.py

diff --git a/config/config.yaml b/config/config.yaml
index 5d23946d4..7bb58dcdf 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -73,8 +73,8 @@ algorithms:
         g: 3
 
   - name: "meo"
-    params:
-      include: true
+    include: true
+    runs:
       run1:
         max_path_length: 3
         local_search: "Yes"
diff --git a/util/update_schema.py b/util/update_schema.py
new file mode 100644
index 000000000..c6a7bedca
--- /dev/null
+++ b/util/update_schema.py
@@ -0,0 +1,13 @@
+"""
+Updates config/schema.json.
+This should be done whenever a new algorithm is introduced,
+or the config is otherwise directly changed.
+"""
+
+import json
+from pathlib import Path
+
+from spras.config.schema import RawConfig
+
+config_schema = RawConfig.model_json_schema()
+Path('config/schema.json').write_text(json.dumps(config_schema, indent=2))

From 145b2ec9c16b736a7e2939cd257f69f0abc456cf Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Mon, 14 Jul 2025 16:46:12 -0700
Subject: [PATCH 47/60] chore: mv container schema changes over

---
 config/config.yaml               | 50 +++++++++++++------------
 spras/config/config.py           | 23 ++----------
 spras/config/container_schema.py | 64 ++++++++++++++++++++++++++++++++
 spras/config/schema.py           |  6 +--
 spras/containers.py              | 20 +++++-----
 5 files changed, 105 insertions(+), 58 deletions(-)
 create mode 100644 spras/config/container_schema.py

diff --git a/config/config.yaml b/config/config.yaml
index 7bb58dcdf..a834131e6 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -3,30 +3,32 @@
 # The length of the hash used to identify a parameter combination
 hash_length: 7
 
-# Specify the container framework used by each PRM wrapper. Valid options include:
-# - docker (default if not specified)
-# - singularity -- Also known as apptainer, useful in HPC/HTC environments where docker isn't allowed
-# - dsub -- experimental with limited support, used for running on Google Cloud with the All of Us cloud environment.
-#   - There is no support for other environments at the moment.
-container_framework: docker
-
-# Only used if container_framework is set to singularity, this will unpack the singularity containers
-# to the local filesystem. This is useful when PRM containers need to run inside another container,
-# such as would be the case in an HTCondor/OSPool environment.
-# NOTE: This unpacks singularity containers to the local filesystem, which will take up space in a way
-# that persists after the workflow is complete. To clean up the unpacked containers, the user must
-# manually delete them. For convenience, these unpacked files will exist in the current working directory
-# under `unpacked`.
-unpack_singularity: false
-
-# Allow the user to configure which container registry containers should be pulled from
-# Note that this assumes container names are consistent across registries, and that the
-# registry being passed doesn't require authentication for pull actions
-container_registry:
-  base_url: docker.io
-  # The owner or project of the registry
-  # For example, "reedcompbio" if the image is available as docker.io/reedcompbio/allpairs
-  owner: reedcompbio
+# Collection of container options
+containers:
+  # Specify the container framework used by each PRM wrapper. Valid options include:
+  # - docker (default if not specified)
+  # - singularity -- Also known as apptainer, useful in HPC/HTC environments where docker isn't allowed
+  # - dsub -- experimental with limited support, used for running on Google Cloud with the All of Us cloud environment.
+  #   - There is no support for other environments at the moment.
+  framework: docker
+
+  # Only used if container_framework is set to singularity, this will unpack the singularity containers
+  # to the local filesystem. This is useful when PRM containers need to run inside another container,
+  # such as would be the case in an HTCondor/OSPool environment.
+  # NOTE: This unpacks singularity containers to the local filesystem, which will take up space in a way
+  # that persists after the workflow is complete. To clean up the unpacked containers, the user must
+  # manually delete them. For convenience, these unpacked files will exist in the current working directory
+  # under `unpacked`.
+  unpack_singularity: false
+
+  # Allow the user to configure which container registry containers should be pulled from
+  # Note that this assumes container names are consistent across registries, and that the
+  # registry being passed doesn't require authentication for pull actions
+  registry:
+    base_url: docker.io
+    # The owner or project of the registry
+    # For example, "reedcompbio" if the image is available as docker.io/reedcompbio/allpairs
+    owner: reedcompbio
 
 # This list of algorithms should be generated by a script which checks the filesystem for installs.
 # It shouldn't be changed by mere mortals. (alternatively, we could add a path to executable for each algorithm
diff --git a/spras/config/config.py b/spras/config/config.py
index c6ac8f8e0..6d6ee4b7e 100644
--- a/spras/config/config.py
+++ b/spras/config/config.py
@@ -23,13 +23,12 @@
 import numpy as np
 import yaml
 
-from spras.config.schema import Analysis, ContainerFramework, RawConfig
+from spras.config.container_schema import ProcessedContainerOptions
+from spras.config.schema import Analysis, RawConfig
 from spras.util import NpHashEncoder, hash_params_sha1_base32
 
 config = None
 
-DEFAULT_CONTAINER_PREFIX = "docker.io/reedcompbio"
-
 # This will get called in the Snakefile, instantiating the singleton with the raw config
 def init_global(config_dict):
     global config
@@ -67,9 +66,7 @@ def __init__(self, raw_config: dict[str, Any]):
         # Directory used for storing output
         self.out_dir = parsed_raw_config.reconstruction_settings.locations.reconstruction_dir
         # Container framework used by PRMs. Valid options are "docker", "dsub", and "singularity"
-        self.container_framework = None
-        # The container prefix (host and organization) to use for images. Default is "docker.io/reedcompbio"
-        self.container_prefix: str = DEFAULT_CONTAINER_PREFIX
+        self.container_settings = ProcessedContainerOptions.from_container_settings(parsed_raw_config.containers, parsed_raw_config.hash_length)
         # A Boolean specifying whether to unpack singularity containers. Default is False
         self.unpack_singularity = False
         # A dictionary to store configured datasets against which SPRAS will be run
@@ -275,22 +272,8 @@ def process_analysis(self, raw_config: RawConfig):
             self.analysis_include_evaluation_aggregate_algo = False
 
     def process_config(self, raw_config: RawConfig):
-        # Set up a few top-level config variables
         self.out_dir = raw_config.reconstruction_settings.locations.reconstruction_dir
 
-        if raw_config.container_framework == ContainerFramework.dsub:
-            warnings.warn("'dsub' framework integration is experimental and may not be fully supported.", stacklevel=2)
-        self.container_framework = raw_config.container_framework
-
-        # Unpack settings for running in singularity mode. Needed when running PRM containers if already in a container.
-        if raw_config.unpack_singularity and self.container_framework != "singularity":
-            warnings.warn("unpack_singularity is set to True, but the container framework is not singularity. This setting will have no effect.", stacklevel=2)
-        self.unpack_singularity = raw_config.unpack_singularity
-
-        # Grab registry from the config, and if none is provided default to docker
-        if raw_config.container_registry and raw_config.container_registry.base_url != "" and raw_config.container_registry.owner != "":
-            self.container_prefix = raw_config.container_registry.base_url + "/" + raw_config.container_registry.owner
-
         self.process_datasets(raw_config)
         self.process_algorithms(raw_config)
         self.process_analysis(raw_config)
diff --git a/spras/config/container_schema.py b/spras/config/container_schema.py
new file mode 100644
index 000000000..d515e0de0
--- /dev/null
+++ b/spras/config/container_schema.py
@@ -0,0 +1,64 @@
+"""
+The separate container schema specification file.
+For information about pydantic, see schema.py.
+
+We move this to a separate file to allow `containers.py` to explicitly take in
+this subsection of the configuration.
+"""
+
+from dataclasses import dataclass
+from pydantic import BaseModel, ConfigDict, Field
+from typing import Optional
+import warnings
+
+from spras.config.util import CaseInsensitiveEnum
+
+DEFAULT_CONTAINER_PREFIX = "docker.io/reedcompbio"
+
+class ContainerFramework(CaseInsensitiveEnum):
+    docker = 'docker'
+    # TODO: add apptainer variant once #260 gets merged
+    singularity = 'singularity'
+    dsub = 'dsub'
+
+class ContainerRegistry(BaseModel):
+    base_url: str
+    owner: str = Field(description="The owner or project of the registry")
+
+    model_config = ConfigDict(extra='forbid')
+
+class ContainerSettings(BaseModel):
+    framework: ContainerFramework = ContainerFramework.docker
+    unpack_singularity: bool = False
+    registry: ContainerRegistry
+    hash_length: Optional[int] = None
+
+@dataclass
+class ProcessedContainerOptions:
+    container_framework: ContainerFramework
+    unpack_singularity: bool
+    container_prefix: str
+    hash_length: int
+
+    @staticmethod
+    def from_container_settings(settings: ContainerSettings, default_hash_length: int) -> "ProcessedContainerOptions":
+        if settings.framework == ContainerFramework.dsub:
+            warnings.warn("'dsub' framework integration is experimental and may not be fully supported.", stacklevel=2)
+        container_framework = settings.framework
+
+        # Unpack settings for running in singularity mode. Needed when running PRM containers if already in a container.
+        if settings.unpack_singularity and container_framework != "singularity":
+            warnings.warn("unpack_singularity is set to True, but the container framework is not singularity. This setting will have no effect.", stacklevel=2)
+        unpack_singularity = settings.unpack_singularity
+
+        # Grab registry from the config, and if none is provided default to docker
+        container_prefix = DEFAULT_CONTAINER_PREFIX
+        if settings.registry and settings.registry.base_url != "" and settings.registry.owner != "":
+            container_prefix = settings.registry.base_url + "/" + settings.registry.owner
+        
+        return ProcessedContainerOptions(
+            container_framework=container_framework,
+            unpack_singularity=unpack_singularity,
+            container_prefix=container_prefix,
+            hash_length=settings.hash_length or default_hash_length
+        )
diff --git a/spras/config/schema.py b/spras/config/schema.py
index 76404b387..7657a41a0 100644
--- a/spras/config/schema.py
+++ b/spras/config/schema.py
@@ -16,6 +16,7 @@
 from pydantic import AfterValidator, BaseModel, ConfigDict, Field
 
 from spras.config.algorithms import AlgorithmUnion
+from spras.config.container_schema import ContainerSettings
 from spras.config.util import CaseInsensitiveEnum
 
 class SummaryAnalysis(BaseModel):
@@ -115,10 +116,7 @@ class ReconstructionSettings(BaseModel):
     model_config = ConfigDict(extra='forbid')
 
 class RawConfig(BaseModel):
-    # TODO: move these container values to a nested container key
-    container_framework: ContainerFramework = ContainerFramework.docker
-    unpack_singularity: bool = False
-    container_registry: ContainerRegistry
+    containers: ContainerSettings
 
     hash_length: int = Field(
         description="The length of the hash used to identify a parameter combination",
diff --git a/spras/containers.py b/spras/containers.py
index 314d4bb45..e41d4737e 100644
--- a/spras/containers.py
+++ b/spras/containers.py
@@ -8,7 +8,7 @@
 import docker
 import docker.errors
 
-import spras.config.config as config
+from spras.config.container_schema import ProcessedContainerOptions
 from spras.logging import indent
 from spras.util import hash_filename
 
@@ -131,7 +131,7 @@ def env_to_items(environment: dict[str, str]) -> Iterator[str]:
 # TODO consider a better default environment variable
 # Follow docker-py's naming conventions (https://docker-py.readthedocs.io/en/stable/containers.html)
 # Technically the argument is an image, not a container, but we use container here.
-def run_container(framework: str, container_suffix: str, command: List[str], volumes: List[Tuple[PurePath, PurePath]], working_dir: str, environment: Optional[dict[str, str]] = None):
+def run_container(framework: str, container_suffix: str, command: List[str], volumes: List[Tuple[PurePath, PurePath]], working_dir: str, config: ProcessedContainerOptions, environment: Optional[dict[str, str]] = None):
     """
     Runs a command in the container using Singularity or Docker
     @param framework: singularity or docker
@@ -144,17 +144,17 @@ def run_container(framework: str, container_suffix: str, command: List[str], vol
     """
     normalized_framework = framework.casefold()
 
-    container = config.config.container_prefix + "/" + container_suffix
+    container = config.container_prefix + "/" + container_suffix
     if normalized_framework == 'docker':
         return run_container_docker(container, command, volumes, working_dir, environment)
     elif normalized_framework == 'singularity':
-        return run_container_singularity(container, command, volumes, working_dir, environment)
+        return run_container_singularity(container, command, volumes, working_dir, config, environment)
     elif normalized_framework == 'dsub':
         return run_container_dsub(container, command, volumes, working_dir, environment)
     else:
         raise ValueError(f'{framework} is not a recognized container framework. Choose "docker", "dsub", or "singularity".')
 
-def run_container_and_log(name: str, framework: str, container_suffix: str, command: List[str], volumes: List[Tuple[PurePath, PurePath]], working_dir: str, environment: Optional[dict[str, str]] = None):
+def run_container_and_log(name: str, framework: str, container_suffix: str, command: List[str], volumes: List[Tuple[PurePath, PurePath]], working_dir: str, config: ProcessedContainerOptions, environment: Optional[dict[str, str]] = None):
     """
     Runs a command in the container using Singularity or Docker with associated pretty printed messages.
     @param name: the display name of the running container for logging purposes
@@ -171,7 +171,7 @@ def run_container_and_log(name: str, framework: str, container_suffix: str, comm
 
     print('Running {} on container framework "{}" on env {} with command: {}'.format(name, framework, list(env_to_items(environment)), ' '.join(command)), flush=True)
     try:
-        out = run_container(framework=framework, container_suffix=container_suffix, command=command, volumes=volumes, working_dir=working_dir, environment=environment)
+        out = run_container(framework=framework, container_suffix=container_suffix, command=command, volumes=volumes, working_dir=working_dir, config=config, environment=environment)
         if out is not None:
             if isinstance(out, list):
                 out = ''.join(out)
@@ -290,7 +290,7 @@ def run_container_docker(container: str, command: List[str], volumes: List[Tuple
     return out
 
 
-def run_container_singularity(container: str, command: List[str], volumes: List[Tuple[PurePath, PurePath]], working_dir: str, environment: Optional[dict[str, str]] = None):
+def run_container_singularity(container: str, command: List[str], volumes: List[Tuple[PurePath, PurePath]], working_dir: str, config: ProcessedContainerOptions, environment: Optional[dict[str, str]] = None):
     """
     Runs a command in the container using Singularity.
     Only available on Linux.
@@ -329,7 +329,7 @@ def run_container_singularity(container: str, command: List[str], volumes: List[
     singularity_options.extend(['--env', ",".join(env_to_items(environment))])
 
     # Handle unpacking singularity image if needed. Potentially needed for running nested unprivileged containers
-    if config.config.unpack_singularity:
+    if config.unpack_singularity:
         # Split the string by "/"
         path_elements = container.split("/")
 
@@ -369,7 +369,7 @@ def run_container_singularity(container: str, command: List[str], volumes: List[
 
 
 # Because this is called independently for each file, the same local path can be mounted to multiple volumes
-def prepare_volume(filename: Union[str, os.PathLike], volume_base: Union[str, PurePath]) -> Tuple[Tuple[PurePath, PurePath], str]:
+def prepare_volume(filename: Union[str, os.PathLike], volume_base: Union[str, PurePath], config: ProcessedContainerOptions) -> Tuple[Tuple[PurePath, PurePath], str]:
     """
     Makes a file on the local file system accessible within a container by mapping the local (source) path to a new
     container (destination) path and renaming the file to be relative to the destination path.
@@ -388,7 +388,7 @@ def prepare_volume(filename: Union[str, os.PathLike], volume_base: Union[str, Pu
     if isinstance(filename, os.PathLike):
         filename = str(filename)
 
-    filename_hash = hash_filename(filename, config.config.hash_length)
+    filename_hash = hash_filename(filename, config.hash_length)
     dest = PurePosixPath(base_path, filename_hash)
 
     abs_filename = Path(filename).resolve()

From 5effe6980aebcb57dfd61b588ab39c93dcc33cbd Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Tue, 15 Jul 2025 00:00:07 +0000
Subject: [PATCH 48/60] feat: initial schema

---
 config/schema.json               | 1153 ++++++++++++++++++++++++++++++
 spras/allpairs.py                |   20 +-
 spras/btb.py                     |   15 +-
 spras/config/config.py           |    4 +-
 spras/config/container_schema.py |   20 +-
 spras/containers.py              |   26 +-
 spras/prm.py                     |    5 +-
 7 files changed, 1196 insertions(+), 47 deletions(-)
 create mode 100644 config/schema.json

diff --git a/config/schema.json b/config/schema.json
new file mode 100644
index 000000000..f99541d51
--- /dev/null
+++ b/config/schema.json
@@ -0,0 +1,1153 @@
+{
+  "$defs": {
+    "Analysis": {
+      "additionalProperties": false,
+      "properties": {
+        "summary": {
+          "$ref": "#/$defs/SummaryAnalysis",
+          "default": {
+            "include": false
+          }
+        },
+        "cytoscape": {
+          "$ref": "#/$defs/CytoscapeAnalysis",
+          "default": {
+            "include": false
+          }
+        },
+        "ml": {
+          "$ref": "#/$defs/MlAnalysis",
+          "default": {
+            "include": false,
+            "aggregate_per_algorithm": false,
+            "components": 2,
+            "labels": true,
+            "linkage": "ward",
+            "metric": "euclidean"
+          }
+        },
+        "evaluation": {
+          "$ref": "#/$defs/EvaluationAnalysis",
+          "default": {
+            "include": false,
+            "aggregate_per_algorithm": false
+          }
+        }
+      },
+      "title": "Analysis",
+      "type": "object"
+    },
+    "ContainerFramework": {
+      "enum": [
+        "docker",
+        "singularity",
+        "dsub"
+      ],
+      "title": "ContainerFramework",
+      "type": "string"
+    },
+    "ContainerRegistry": {
+      "additionalProperties": false,
+      "properties": {
+        "base_url": {
+          "title": "Base Url",
+          "type": "string"
+        },
+        "owner": {
+          "description": "The owner or project of the registry",
+          "title": "Owner",
+          "type": "string"
+        }
+      },
+      "required": [
+        "base_url",
+        "owner"
+      ],
+      "title": "ContainerRegistry",
+      "type": "object"
+    },
+    "ContainerSettings": {
+      "properties": {
+        "framework": {
+          "$ref": "#/$defs/ContainerFramework",
+          "default": "docker"
+        },
+        "unpack_singularity": {
+          "default": false,
+          "title": "Unpack Singularity",
+          "type": "boolean"
+        },
+        "registry": {
+          "$ref": "#/$defs/ContainerRegistry"
+        },
+        "hash_length": {
+          "default": 7,
+          "title": "Hash Length",
+          "type": "integer"
+        }
+      },
+      "required": [
+        "registry"
+      ],
+      "title": "ContainerSettings",
+      "type": "object"
+    },
+    "CytoscapeAnalysis": {
+      "additionalProperties": false,
+      "properties": {
+        "include": {
+          "title": "Include",
+          "type": "boolean"
+        }
+      },
+      "required": [
+        "include"
+      ],
+      "title": "CytoscapeAnalysis",
+      "type": "object"
+    },
+    "Dataset": {
+      "additionalProperties": false,
+      "properties": {
+        "label": {
+          "title": "Label",
+          "type": "string"
+        },
+        "node_files": {
+          "items": {
+            "type": "string"
+          },
+          "title": "Node Files",
+          "type": "array"
+        },
+        "edge_files": {
+          "items": {
+            "type": "string"
+          },
+          "title": "Edge Files",
+          "type": "array"
+        },
+        "other_files": {
+          "items": {
+            "type": "string"
+          },
+          "title": "Other Files",
+          "type": "array"
+        },
+        "data_dir": {
+          "title": "Data Dir",
+          "type": "string"
+        }
+      },
+      "required": [
+        "label",
+        "node_files",
+        "edge_files",
+        "other_files",
+        "data_dir"
+      ],
+      "title": "Dataset",
+      "type": "object"
+    },
+    "EvaluationAnalysis": {
+      "additionalProperties": false,
+      "properties": {
+        "include": {
+          "title": "Include",
+          "type": "boolean"
+        },
+        "aggregate_per_algorithm": {
+          "default": false,
+          "title": "Aggregate Per Algorithm",
+          "type": "boolean"
+        }
+      },
+      "required": [
+        "include"
+      ],
+      "title": "EvaluationAnalysis",
+      "type": "object"
+    },
+    "GoldStandard": {
+      "additionalProperties": false,
+      "properties": {
+        "label": {
+          "title": "Label",
+          "type": "string"
+        },
+        "node_files": {
+          "items": {
+            "type": "string"
+          },
+          "title": "Node Files",
+          "type": "array"
+        },
+        "data_dir": {
+          "title": "Data Dir",
+          "type": "string"
+        },
+        "dataset_labels": {
+          "items": {
+            "type": "string"
+          },
+          "title": "Dataset Labels",
+          "type": "array"
+        }
+      },
+      "required": [
+        "label",
+        "node_files",
+        "data_dir",
+        "dataset_labels"
+      ],
+      "title": "GoldStandard",
+      "type": "object"
+    },
+    "Locations": {
+      "additionalProperties": false,
+      "properties": {
+        "reconstruction_dir": {
+          "title": "Reconstruction Dir",
+          "type": "string"
+        }
+      },
+      "required": [
+        "reconstruction_dir"
+      ],
+      "title": "Locations",
+      "type": "object"
+    },
+    "MlAnalysis": {
+      "additionalProperties": false,
+      "properties": {
+        "include": {
+          "title": "Include",
+          "type": "boolean"
+        },
+        "aggregate_per_algorithm": {
+          "default": false,
+          "title": "Aggregate Per Algorithm",
+          "type": "boolean"
+        },
+        "components": {
+          "default": 2,
+          "title": "Components",
+          "type": "integer"
+        },
+        "labels": {
+          "default": true,
+          "title": "Labels",
+          "type": "boolean"
+        },
+        "linkage": {
+          "$ref": "#/$defs/MlLinkage",
+          "default": "ward"
+        },
+        "metric": {
+          "$ref": "#/$defs/MlMetric",
+          "default": "euclidean"
+        }
+      },
+      "required": [
+        "include"
+      ],
+      "title": "MlAnalysis",
+      "type": "object"
+    },
+    "MlLinkage": {
+      "enum": [
+        "ward",
+        "complete",
+        "average",
+        "single"
+      ],
+      "title": "MlLinkage",
+      "type": "string"
+    },
+    "MlMetric": {
+      "enum": [
+        "euclidean",
+        "manhattan",
+        "cosine"
+      ],
+      "title": "MlMetric",
+      "type": "string"
+    },
+    "ReconstructionSettings": {
+      "additionalProperties": false,
+      "properties": {
+        "locations": {
+          "$ref": "#/$defs/Locations"
+        }
+      },
+      "required": [
+        "locations"
+      ],
+      "title": "ReconstructionSettings",
+      "type": "object"
+    },
+    "SummaryAnalysis": {
+      "additionalProperties": false,
+      "properties": {
+        "include": {
+          "title": "Include",
+          "type": "boolean"
+        }
+      },
+      "required": [
+        "include"
+      ],
+      "title": "SummaryAnalysis",
+      "type": "object"
+    },
+    "allpairsModel": {
+      "properties": {
+        "name": {
+          "const": "allpairs",
+          "title": "Name",
+          "type": "string"
+        },
+        "include": {
+          "title": "Include",
+          "type": "boolean"
+        },
+        "runs": {
+          "additionalProperties": {
+            "$ref": "#/$defs/allpairsRunModel"
+          },
+          "title": "Runs",
+          "type": "object"
+        }
+      },
+      "required": [
+        "name",
+        "include",
+        "runs"
+      ],
+      "title": "allpairsModel",
+      "type": "object"
+    },
+    "allpairsRunModel": {
+      "properties": {},
+      "title": "allpairsRunModel",
+      "type": "object"
+    },
+    "bowtiebuilderModel": {
+      "properties": {
+        "name": {
+          "const": "bowtiebuilder",
+          "title": "Name",
+          "type": "string"
+        },
+        "include": {
+          "title": "Include",
+          "type": "boolean"
+        },
+        "runs": {
+          "additionalProperties": {
+            "$ref": "#/$defs/bowtiebuilderRunModel"
+          },
+          "title": "Runs",
+          "type": "object"
+        }
+      },
+      "required": [
+        "name",
+        "include",
+        "runs"
+      ],
+      "title": "bowtiebuilderModel",
+      "type": "object"
+    },
+    "bowtiebuilderRunModel": {
+      "properties": {},
+      "title": "bowtiebuilderRunModel",
+      "type": "object"
+    },
+    "dominoModel": {
+      "properties": {
+        "name": {
+          "const": "domino",
+          "title": "Name",
+          "type": "string"
+        },
+        "include": {
+          "title": "Include",
+          "type": "boolean"
+        },
+        "runs": {
+          "additionalProperties": {
+            "$ref": "#/$defs/dominoRunModel"
+          },
+          "title": "Runs",
+          "type": "object"
+        }
+      },
+      "required": [
+        "name",
+        "include",
+        "runs"
+      ],
+      "title": "dominoModel",
+      "type": "object"
+    },
+    "dominoRunModel": {
+      "properties": {
+        "time": {
+          "items": {
+            "type": "number"
+          },
+          "title": "Time",
+          "type": "array"
+        },
+        "module_threshold": {
+          "items": {
+            "anyOf": [
+              {
+                "type": "number"
+              },
+              {
+                "type": "null"
+              }
+            ]
+          },
+          "title": "Module Threshold",
+          "type": "array"
+        },
+        "slice_threshold": {
+          "items": {
+            "anyOf": [
+              {
+                "type": "number"
+              },
+              {
+                "type": "null"
+              }
+            ]
+          },
+          "title": "Slice Threshold",
+          "type": "array"
+        }
+      },
+      "required": [
+        "time",
+        "module_threshold",
+        "slice_threshold"
+      ],
+      "title": "dominoRunModel",
+      "type": "object"
+    },
+    "meoModel": {
+      "properties": {
+        "name": {
+          "const": "meo",
+          "title": "Name",
+          "type": "string"
+        },
+        "include": {
+          "title": "Include",
+          "type": "boolean"
+        },
+        "runs": {
+          "additionalProperties": {
+            "$ref": "#/$defs/meoRunModel"
+          },
+          "title": "Runs",
+          "type": "object"
+        }
+      },
+      "required": [
+        "name",
+        "include",
+        "runs"
+      ],
+      "title": "meoModel",
+      "type": "object"
+    },
+    "meoRunModel": {
+      "properties": {
+        "max_path_length": {
+          "items": {
+            "anyOf": [
+              {
+                "type": "integer"
+              },
+              {
+                "type": "null"
+              }
+            ]
+          },
+          "title": "Max Path Length",
+          "type": "array"
+        },
+        "local_search": {
+          "items": {
+            "anyOf": [
+              {
+                "type": "boolean"
+              },
+              {
+                "type": "null"
+              }
+            ]
+          },
+          "title": "Local Search",
+          "type": "array"
+        },
+        "rand_restarts": {
+          "items": {
+            "anyOf": [
+              {
+                "type": "integer"
+              },
+              {
+                "type": "null"
+              }
+            ]
+          },
+          "title": "Rand Restarts",
+          "type": "array"
+        }
+      },
+      "required": [
+        "max_path_length",
+        "local_search",
+        "rand_restarts"
+      ],
+      "title": "meoRunModel",
+      "type": "object"
+    },
+    "mincostflowModel": {
+      "properties": {
+        "name": {
+          "const": "mincostflow",
+          "title": "Name",
+          "type": "string"
+        },
+        "include": {
+          "title": "Include",
+          "type": "boolean"
+        },
+        "runs": {
+          "additionalProperties": {
+            "$ref": "#/$defs/mincostflowRunModel"
+          },
+          "title": "Runs",
+          "type": "object"
+        }
+      },
+      "required": [
+        "name",
+        "include",
+        "runs"
+      ],
+      "title": "mincostflowModel",
+      "type": "object"
+    },
+    "mincostflowRunModel": {
+      "properties": {
+        "flow": {
+          "items": {
+            "anyOf": [
+              {
+                "type": "number"
+              },
+              {
+                "type": "null"
+              }
+            ]
+          },
+          "title": "Flow",
+          "type": "array"
+        },
+        "capacity": {
+          "items": {
+            "anyOf": [
+              {
+                "type": "number"
+              },
+              {
+                "type": "null"
+              }
+            ]
+          },
+          "title": "Capacity",
+          "type": "array"
+        }
+      },
+      "required": [
+        "flow",
+        "capacity"
+      ],
+      "title": "mincostflowRunModel",
+      "type": "object"
+    },
+    "omicsintegrator1Model": {
+      "properties": {
+        "name": {
+          "const": "omicsintegrator1",
+          "title": "Name",
+          "type": "string"
+        },
+        "include": {
+          "title": "Include",
+          "type": "boolean"
+        },
+        "runs": {
+          "additionalProperties": {
+            "$ref": "#/$defs/omicsintegrator1RunModel"
+          },
+          "title": "Runs",
+          "type": "object"
+        }
+      },
+      "required": [
+        "name",
+        "include",
+        "runs"
+      ],
+      "title": "omicsintegrator1Model",
+      "type": "object"
+    },
+    "omicsintegrator1RunModel": {
+      "properties": {
+        "dummy_mode": {
+          "items": {
+            "anyOf": [
+              {
+                "type": "string"
+              },
+              {
+                "type": "null"
+              }
+            ]
+          },
+          "title": "Dummy Mode",
+          "type": "array"
+        },
+        "mu_squared": {
+          "items": {
+            "type": "boolean"
+          },
+          "title": "Mu Squared",
+          "type": "array"
+        },
+        "exclude_terms": {
+          "items": {
+            "type": "boolean"
+          },
+          "title": "Exclude Terms",
+          "type": "array"
+        },
+        "noisy_edges": {
+          "items": {
+            "type": "integer"
+          },
+          "title": "Noisy Edges",
+          "type": "array"
+        },
+        "shuffled_prizes": {
+          "items": {
+            "type": "integer"
+          },
+          "title": "Shuffled Prizes",
+          "type": "array"
+        },
+        "random_terminals": {
+          "items": {
+            "type": "integer"
+          },
+          "title": "Random Terminals",
+          "type": "array"
+        },
+        "seed": {
+          "items": {
+            "type": "integer"
+          },
+          "title": "Seed",
+          "type": "array"
+        },
+        "w": {
+          "items": {
+            "type": "integer"
+          },
+          "title": "W",
+          "type": "array"
+        },
+        "b": {
+          "items": {
+            "type": "number"
+          },
+          "title": "B",
+          "type": "array"
+        },
+        "d": {
+          "items": {
+            "type": "integer"
+          },
+          "title": "D",
+          "type": "array"
+        },
+        "mu": {
+          "items": {
+            "anyOf": [
+              {
+                "type": "number"
+              },
+              {
+                "type": "null"
+              }
+            ]
+          },
+          "title": "Mu",
+          "type": "array"
+        },
+        "noise": {
+          "items": {
+            "anyOf": [
+              {
+                "type": "number"
+              },
+              {
+                "type": "null"
+              }
+            ]
+          },
+          "title": "Noise",
+          "type": "array"
+        },
+        "g": {
+          "items": {
+            "anyOf": [
+              {
+                "type": "number"
+              },
+              {
+                "type": "null"
+              }
+            ]
+          },
+          "title": "G",
+          "type": "array"
+        },
+        "r": {
+          "items": {
+            "anyOf": [
+              {
+                "type": "number"
+              },
+              {
+                "type": "null"
+              }
+            ]
+          },
+          "title": "R",
+          "type": "array"
+        }
+      },
+      "required": [
+        "dummy_mode",
+        "mu_squared",
+        "exclude_terms",
+        "noisy_edges",
+        "shuffled_prizes",
+        "random_terminals",
+        "seed",
+        "w",
+        "b",
+        "d",
+        "mu",
+        "noise",
+        "g",
+        "r"
+      ],
+      "title": "omicsintegrator1RunModel",
+      "type": "object"
+    },
+    "omicsintegrator2Model": {
+      "properties": {
+        "name": {
+          "const": "omicsintegrator2",
+          "title": "Name",
+          "type": "string"
+        },
+        "include": {
+          "title": "Include",
+          "type": "boolean"
+        },
+        "runs": {
+          "additionalProperties": {
+            "$ref": "#/$defs/omicsintegrator2RunModel"
+          },
+          "title": "Runs",
+          "type": "object"
+        }
+      },
+      "required": [
+        "name",
+        "include",
+        "runs"
+      ],
+      "title": "omicsintegrator2Model",
+      "type": "object"
+    },
+    "omicsintegrator2RunModel": {
+      "properties": {
+        "w": {
+          "items": {
+            "type": "number"
+          },
+          "title": "W",
+          "type": "array"
+        },
+        "b": {
+          "items": {
+            "type": "number"
+          },
+          "title": "B",
+          "type": "array"
+        },
+        "g": {
+          "items": {
+            "type": "number"
+          },
+          "title": "G",
+          "type": "array"
+        },
+        "noise": {
+          "items": {
+            "anyOf": [
+              {
+                "type": "number"
+              },
+              {
+                "type": "null"
+              }
+            ]
+          },
+          "title": "Noise",
+          "type": "array"
+        },
+        "noisy_edges": {
+          "items": {
+            "anyOf": [
+              {
+                "type": "integer"
+              },
+              {
+                "type": "null"
+              }
+            ]
+          },
+          "title": "Noisy Edges",
+          "type": "array"
+        },
+        "random_terminals": {
+          "items": {
+            "anyOf": [
+              {
+                "type": "integer"
+              },
+              {
+                "type": "null"
+              }
+            ]
+          },
+          "title": "Random Terminals",
+          "type": "array"
+        },
+        "dummy_mode": {
+          "items": {
+            "anyOf": [
+              {
+                "type": "string"
+              },
+              {
+                "type": "null"
+              }
+            ]
+          },
+          "title": "Dummy Mode",
+          "type": "array"
+        },
+        "seed": {
+          "items": {
+            "type": "integer"
+          },
+          "title": "Seed",
+          "type": "array"
+        }
+      },
+      "required": [
+        "w",
+        "b",
+        "g",
+        "noise",
+        "noisy_edges",
+        "random_terminals",
+        "dummy_mode",
+        "seed"
+      ],
+      "title": "omicsintegrator2RunModel",
+      "type": "object"
+    },
+    "pathlinkerModel": {
+      "properties": {
+        "name": {
+          "const": "pathlinker",
+          "title": "Name",
+          "type": "string"
+        },
+        "include": {
+          "title": "Include",
+          "type": "boolean"
+        },
+        "runs": {
+          "additionalProperties": {
+            "$ref": "#/$defs/pathlinkerRunModel"
+          },
+          "title": "Runs",
+          "type": "object"
+        }
+      },
+      "required": [
+        "name",
+        "include",
+        "runs"
+      ],
+      "title": "pathlinkerModel",
+      "type": "object"
+    },
+    "pathlinkerRunModel": {
+      "properties": {
+        "k": {
+          "items": {
+            "type": "integer"
+          },
+          "title": "K",
+          "type": "array"
+        }
+      },
+      "required": [
+        "k"
+      ],
+      "title": "pathlinkerRunModel",
+      "type": "object"
+    },
+    "rwrModel": {
+      "properties": {
+        "name": {
+          "const": "rwr",
+          "title": "Name",
+          "type": "string"
+        },
+        "include": {
+          "title": "Include",
+          "type": "boolean"
+        },
+        "runs": {
+          "additionalProperties": {
+            "$ref": "#/$defs/rwrRunModel"
+          },
+          "title": "Runs",
+          "type": "object"
+        }
+      },
+      "required": [
+        "name",
+        "include",
+        "runs"
+      ],
+      "title": "rwrModel",
+      "type": "object"
+    },
+    "rwrRunModel": {
+      "properties": {
+        "threshold": {
+          "items": {
+            "type": "integer"
+          },
+          "title": "Threshold",
+          "type": "array"
+        },
+        "alpha": {
+          "items": {
+            "anyOf": [
+              {
+                "type": "number"
+              },
+              {
+                "type": "null"
+              }
+            ]
+          },
+          "title": "Alpha",
+          "type": "array"
+        }
+      },
+      "required": [
+        "threshold",
+        "alpha"
+      ],
+      "title": "rwrRunModel",
+      "type": "object"
+    },
+    "strwrModel": {
+      "properties": {
+        "name": {
+          "const": "strwr",
+          "title": "Name",
+          "type": "string"
+        },
+        "include": {
+          "title": "Include",
+          "type": "boolean"
+        },
+        "runs": {
+          "additionalProperties": {
+            "$ref": "#/$defs/strwrRunModel"
+          },
+          "title": "Runs",
+          "type": "object"
+        }
+      },
+      "required": [
+        "name",
+        "include",
+        "runs"
+      ],
+      "title": "strwrModel",
+      "type": "object"
+    },
+    "strwrRunModel": {
+      "properties": {
+        "threshold": {
+          "items": {
+            "type": "integer"
+          },
+          "title": "Threshold",
+          "type": "array"
+        },
+        "alpha": {
+          "items": {
+            "anyOf": [
+              {
+                "type": "number"
+              },
+              {
+                "type": "null"
+              }
+            ]
+          },
+          "title": "Alpha",
+          "type": "array"
+        }
+      },
+      "required": [
+        "threshold",
+        "alpha"
+      ],
+      "title": "strwrRunModel",
+      "type": "object"
+    }
+  },
+  "additionalProperties": false,
+  "properties": {
+    "containers": {
+      "$ref": "#/$defs/ContainerSettings"
+    },
+    "hash_length": {
+      "default": 7,
+      "description": "The length of the hash used to identify a parameter combination",
+      "title": "Hash Length",
+      "type": "integer"
+    },
+    "algorithms": {
+      "items": {
+        "anyOf": [
+          {
+            "$ref": "#/$defs/allpairsModel"
+          },
+          {
+            "$ref": "#/$defs/bowtiebuilderModel"
+          },
+          {
+            "$ref": "#/$defs/dominoModel"
+          },
+          {
+            "$ref": "#/$defs/meoModel"
+          },
+          {
+            "$ref": "#/$defs/mincostflowModel"
+          },
+          {
+            "$ref": "#/$defs/omicsintegrator1Model"
+          },
+          {
+            "$ref": "#/$defs/omicsintegrator2Model"
+          },
+          {
+            "$ref": "#/$defs/pathlinkerModel"
+          },
+          {
+            "$ref": "#/$defs/rwrModel"
+          },
+          {
+            "$ref": "#/$defs/strwrModel"
+          }
+        ]
+      },
+      "title": "Algorithms",
+      "type": "array"
+    },
+    "datasets": {
+      "items": {
+        "$ref": "#/$defs/Dataset"
+      },
+      "title": "Datasets",
+      "type": "array"
+    },
+    "gold_standards": {
+      "default": [],
+      "items": {
+        "$ref": "#/$defs/GoldStandard"
+      },
+      "title": "Gold Standards",
+      "type": "array"
+    },
+    "analysis": {
+      "$ref": "#/$defs/Analysis",
+      "default": {
+        "summary": {
+          "include": false
+        },
+        "cytoscape": {
+          "include": false
+        },
+        "ml": {
+          "aggregate_per_algorithm": false,
+          "components": 2,
+          "include": false,
+          "labels": true,
+          "linkage": "ward",
+          "metric": "euclidean"
+        },
+        "evaluation": {
+          "aggregate_per_algorithm": false,
+          "include": false
+        }
+      }
+    },
+    "reconstruction_settings": {
+      "$ref": "#/$defs/ReconstructionSettings"
+    }
+  },
+  "required": [
+    "containers",
+    "algorithms",
+    "datasets",
+    "reconstruction_settings"
+  ],
+  "title": "RawConfig",
+  "type": "object"
+}
\ No newline at end of file
diff --git a/spras/allpairs.py b/spras/allpairs.py
index 670d3f721..bba5df467 100644
--- a/spras/allpairs.py
+++ b/spras/allpairs.py
@@ -1,6 +1,7 @@
 import warnings
 from pathlib import Path
 
+from spras.config.container_schema import ProcessedContainerSettings
 from spras.config.util import Empty
 from spras.containers import prepare_volume, run_container_and_log
 from spras.dataset import Dataset
@@ -72,14 +73,7 @@ def generate_inputs(data: Dataset, filename_map):
                                       header=["#Interactor1", "Interactor2", "Weight"])
 
     @staticmethod
-    def run(inputs, output_file, args=None, container_framework="docker"):
-        """
-        Run All Pairs Shortest Paths with Docker
-        @param nodetypes: input node types with sources and targets (required)
-        @param network: input network file (required)
-        @param container_framework: choose the container runtime framework, currently supports "docker" or "singularity" (optional)
-        @param output_file: path to the output pathway file (required)
-        """
+    def run(inputs, output_file, args=None, container_settings=ProcessedContainerSettings()):
         if not inputs["nodetypes"] or not inputs["network"] or not inputs["directed_flag"]:
             raise ValueError('Required All Pairs Shortest Paths arguments are missing')
 
@@ -88,15 +82,15 @@ def run(inputs, output_file, args=None, container_framework="docker"):
         # Each volume is a tuple (src, dest)
         volumes = list()
 
-        bind_path, node_file = prepare_volume(inputs["nodetypes"], work_dir)
+        bind_path, node_file = prepare_volume(inputs["nodetypes"], work_dir, container_settings)
         volumes.append(bind_path)
 
-        bind_path, network_file = prepare_volume(inputs["network"], work_dir)
+        bind_path, network_file = prepare_volume(inputs["network"], work_dir, container_settings)
         volumes.append(bind_path)
 
         # Create the parent directories for the output file if needed
         Path(output_file).parent.mkdir(parents=True, exist_ok=True)
-        bind_path, mapped_out_file = prepare_volume(output_file, work_dir)
+        bind_path, mapped_out_file = prepare_volume(output_file, work_dir, container_settings)
         volumes.append(bind_path)
 
         command = ['python',
@@ -110,11 +104,11 @@ def run(inputs, output_file, args=None, container_framework="docker"):
         container_suffix = "allpairs:v4"
         run_container_and_log(
             'All Pairs Shortest Paths',
-            container_framework,
             container_suffix,
             command,
             volumes,
-            work_dir)
+            work_dir,
+            container_settings)
 
     @staticmethod
     def parse_output(raw_pathway_file, standardized_pathway_file, params):
diff --git a/spras/btb.py b/spras/btb.py
index 81474bdb2..7f7a1b944 100644
--- a/spras/btb.py
+++ b/spras/btb.py
@@ -1,5 +1,6 @@
 from pathlib import Path
 
+from spras.config.container_schema import ProcessedContainerSettings
 from spras.config.util import Empty
 from spras.containers import prepare_volume, run_container_and_log
 from spras.interactome import (
@@ -65,7 +66,7 @@ def generate_inputs(data, filename_map):
 
     # Skips parameter validation step
     @staticmethod
-    def run(inputs, output_file, args=None, container_framework="docker"):
+    def run(inputs, output_file, args=None, container_settings=ProcessedContainerSettings()):
         # Tests for pytest (docker container also runs this)
         # Testing out here avoids the trouble that container errors provide
 
@@ -93,19 +94,19 @@ def run(inputs, output_file, args=None, container_framework="docker"):
         # Each volume is a tuple (src, dest)
         volumes = list()
 
-        bind_path, source_file = prepare_volume(inputs["sources"], work_dir)
+        bind_path, source_file = prepare_volume(inputs["sources"], work_dir, container_settings)
         volumes.append(bind_path)
 
-        bind_path, target_file = prepare_volume(inputs["targets"], work_dir)
+        bind_path, target_file = prepare_volume(inputs["targets"], work_dir, container_settings)
         volumes.append(bind_path)
 
-        bind_path, edges_file = prepare_volume(inputs["edges"], work_dir)
+        bind_path, edges_file = prepare_volume(inputs["edges"], work_dir, container_settings)
         volumes.append(bind_path)
 
         # Use its --output argument to set the output file prefix to specify an absolute path and prefix
         out_dir = Path(output_file).parent
         out_dir.mkdir(parents=True, exist_ok=True)
-        bind_path, mapped_out_dir = prepare_volume(str(out_dir), work_dir)
+        bind_path, mapped_out_dir = prepare_volume(str(out_dir), work_dir, container_settings)
         volumes.append(bind_path)
         mapped_out_prefix = mapped_out_dir + '/raw-pathway.txt'  # Use posix path inside the container
 
@@ -122,11 +123,11 @@ def run(inputs, output_file, args=None, container_framework="docker"):
 
         container_suffix = "bowtiebuilder:v2"
         run_container_and_log('BowTieBuilder',
-                              container_framework,
                               container_suffix,
                               command,
                               volumes,
-                              work_dir)
+                              work_dir,
+                              container_settings)
         # Output is already written to raw-pathway.txt file
 
 
diff --git a/spras/config/config.py b/spras/config/config.py
index 6d6ee4b7e..252d6ccf5 100644
--- a/spras/config/config.py
+++ b/spras/config/config.py
@@ -23,7 +23,7 @@
 import numpy as np
 import yaml
 
-from spras.config.container_schema import ProcessedContainerOptions
+from spras.config.container_schema import ProcessedContainerSettings
 from spras.config.schema import Analysis, RawConfig
 from spras.util import NpHashEncoder, hash_params_sha1_base32
 
@@ -66,7 +66,7 @@ def __init__(self, raw_config: dict[str, Any]):
         # Directory used for storing output
         self.out_dir = parsed_raw_config.reconstruction_settings.locations.reconstruction_dir
         # Container framework used by PRMs. Valid options are "docker", "dsub", and "singularity"
-        self.container_settings = ProcessedContainerOptions.from_container_settings(parsed_raw_config.containers, parsed_raw_config.hash_length)
+        self.container_settings = ProcessedContainerSettings.from_container_settings(parsed_raw_config.containers, parsed_raw_config.hash_length)
         # A Boolean specifying whether to unpack singularity containers. Default is False
         self.unpack_singularity = False
         # A dictionary to store configured datasets against which SPRAS will be run
diff --git a/spras/config/container_schema.py b/spras/config/container_schema.py
index d515e0de0..9688a9b51 100644
--- a/spras/config/container_schema.py
+++ b/spras/config/container_schema.py
@@ -31,17 +31,17 @@ class ContainerSettings(BaseModel):
     framework: ContainerFramework = ContainerFramework.docker
     unpack_singularity: bool = False
     registry: ContainerRegistry
-    hash_length: Optional[int] = None
+    hash_length: int = 7
 
 @dataclass
-class ProcessedContainerOptions:
-    container_framework: ContainerFramework
-    unpack_singularity: bool
-    container_prefix: str
-    hash_length: int
+class ProcessedContainerSettings:
+    framework: ContainerFramework = ContainerFramework.docker
+    unpack_singularity: bool = False
+    prefix: str = DEFAULT_CONTAINER_PREFIX
+    hash_length: int = 7
 
     @staticmethod
-    def from_container_settings(settings: ContainerSettings, default_hash_length: int) -> "ProcessedContainerOptions":
+    def from_container_settings(settings: ContainerSettings, default_hash_length: int) -> "ProcessedContainerSettings":
         if settings.framework == ContainerFramework.dsub:
             warnings.warn("'dsub' framework integration is experimental and may not be fully supported.", stacklevel=2)
         container_framework = settings.framework
@@ -56,9 +56,9 @@ def from_container_settings(settings: ContainerSettings, default_hash_length: in
         if settings.registry and settings.registry.base_url != "" and settings.registry.owner != "":
             container_prefix = settings.registry.base_url + "/" + settings.registry.owner
         
-        return ProcessedContainerOptions(
-            container_framework=container_framework,
+        return ProcessedContainerSettings(
+            framework=container_framework,
             unpack_singularity=unpack_singularity,
-            container_prefix=container_prefix,
+            prefix=container_prefix,
             hash_length=settings.hash_length or default_hash_length
         )
diff --git a/spras/containers.py b/spras/containers.py
index e41d4737e..d065b2ea8 100644
--- a/spras/containers.py
+++ b/spras/containers.py
@@ -8,7 +8,7 @@
 import docker
 import docker.errors
 
-from spras.config.container_schema import ProcessedContainerOptions
+from spras.config.container_schema import ProcessedContainerSettings
 from spras.logging import indent
 from spras.util import hash_filename
 
@@ -131,47 +131,47 @@ def env_to_items(environment: dict[str, str]) -> Iterator[str]:
 # TODO consider a better default environment variable
 # Follow docker-py's naming conventions (https://docker-py.readthedocs.io/en/stable/containers.html)
 # Technically the argument is an image, not a container, but we use container here.
-def run_container(framework: str, container_suffix: str, command: List[str], volumes: List[Tuple[PurePath, PurePath]], working_dir: str, config: ProcessedContainerOptions, environment: Optional[dict[str, str]] = None):
+def run_container(container_suffix: str, command: List[str], volumes: List[Tuple[PurePath, PurePath]], working_dir: str, container_settings: ProcessedContainerSettings, environment: Optional[dict[str, str]] = None):
     """
     Runs a command in the container using Singularity or Docker
-    @param framework: singularity or docker
     @param container_suffix: name of the DockerHub container without the 'docker://' prefix
     @param command: command to run in the container
     @param volumes: a list of volumes to mount where each item is a (source, destination) tuple
     @param working_dir: the working directory in the container
+    @param container_settings: the settings to use to run the container
     @param environment: environment variables to set in the container
     @return: output from Singularity execute or Docker run
     """
-    normalized_framework = framework.casefold()
+    normalized_framework = container_settings.framework.casefold()
 
-    container = config.container_prefix + "/" + container_suffix
+    container = container_settings.prefix + "/" + container_suffix
     if normalized_framework == 'docker':
         return run_container_docker(container, command, volumes, working_dir, environment)
     elif normalized_framework == 'singularity':
-        return run_container_singularity(container, command, volumes, working_dir, config, environment)
+        return run_container_singularity(container, command, volumes, working_dir, container_settings, environment)
     elif normalized_framework == 'dsub':
         return run_container_dsub(container, command, volumes, working_dir, environment)
     else:
-        raise ValueError(f'{framework} is not a recognized container framework. Choose "docker", "dsub", or "singularity".')
+        raise ValueError(f'{container_settings.framework} is not a recognized container framework. Choose "docker", "dsub", or "singularity".')
 
-def run_container_and_log(name: str, framework: str, container_suffix: str, command: List[str], volumes: List[Tuple[PurePath, PurePath]], working_dir: str, config: ProcessedContainerOptions, environment: Optional[dict[str, str]] = None):
+def run_container_and_log(name: str, container_suffix: str, command: List[str], volumes: List[Tuple[PurePath, PurePath]], working_dir: str, container_settings: ProcessedContainerSettings, environment: Optional[dict[str, str]] = None):
     """
     Runs a command in the container using Singularity or Docker with associated pretty printed messages.
     @param name: the display name of the running container for logging purposes
-    @param framework: singularity or docker
     @param container_suffix: name of the DockerHub container without the 'docker://' prefix
     @param command: command to run in the container
     @param volumes: a list of volumes to mount where each item is a (source, destination) tuple
     @param working_dir: the working directory in the container
+    @param container_settings: the container settings to use
     @param environment: environment variables to set in the container
     @return: output from Singularity execute or Docker run
     """
     if not environment:
         environment = {'SPRAS': 'True'}
 
-    print('Running {} on container framework "{}" on env {} with command: {}'.format(name, framework, list(env_to_items(environment)), ' '.join(command)), flush=True)
+    print('Running {} on container framework "{}" on env {} with command: {}'.format(name, container_settings.framework, list(env_to_items(environment)), ' '.join(command)), flush=True)
     try:
-        out = run_container(framework=framework, container_suffix=container_suffix, command=command, volumes=volumes, working_dir=working_dir, config=config, environment=environment)
+        out = run_container(container_suffix=container_suffix, command=command, volumes=volumes, working_dir=working_dir, container_settings=container_settings, environment=environment)
         if out is not None:
             if isinstance(out, list):
                 out = ''.join(out)
@@ -290,7 +290,7 @@ def run_container_docker(container: str, command: List[str], volumes: List[Tuple
     return out
 
 
-def run_container_singularity(container: str, command: List[str], volumes: List[Tuple[PurePath, PurePath]], working_dir: str, config: ProcessedContainerOptions, environment: Optional[dict[str, str]] = None):
+def run_container_singularity(container: str, command: List[str], volumes: List[Tuple[PurePath, PurePath]], working_dir: str, config: ProcessedContainerSettings, environment: Optional[dict[str, str]] = None):
     """
     Runs a command in the container using Singularity.
     Only available on Linux.
@@ -369,7 +369,7 @@ def run_container_singularity(container: str, command: List[str], volumes: List[
 
 
 # Because this is called independently for each file, the same local path can be mounted to multiple volumes
-def prepare_volume(filename: Union[str, os.PathLike], volume_base: Union[str, PurePath], config: ProcessedContainerOptions) -> Tuple[Tuple[PurePath, PurePath], str]:
+def prepare_volume(filename: Union[str, os.PathLike], volume_base: Union[str, PurePath], config: ProcessedContainerSettings) -> Tuple[Tuple[PurePath, PurePath], str]:
     """
     Makes a file on the local file system accessible within a container by mapping the local (source) path to a new
     container (destination) path and renaming the file to be relative to the destination path.
diff --git a/spras/prm.py b/spras/prm.py
index 73c94454a..d52214083 100644
--- a/spras/prm.py
+++ b/spras/prm.py
@@ -4,6 +4,7 @@
 
 from pydantic import BaseModel
 
+from spras.config.container_schema import ProcessedContainerSettings
 from spras.dataset import Dataset
 
 T = TypeVar('T', bound=BaseModel)
@@ -42,10 +43,10 @@ def generate_inputs(data: Dataset, filename_map: dict[str, str]):
 
     @staticmethod
     @abstractmethod
-    def run(inputs: dict[str, str | os.PathLike], output_file: str | os.PathLike, args: T, container_framework="docker"):
+    def run(inputs: dict[str, str | os.PathLike], output_file: str | os.PathLike, args: T, container_settings: ProcessedContainerSettings):
         """
         Runs an algorithm with the specified inputs, algorithm params (T),
-        the designated output_file, and the desired container_framework.
+        the designated output_file, and the desired container_settings.
         """
         raise NotImplementedError
 

From 398350e68a3f1776e829c5f7f4823560cd73f7b8 Mon Sep 17 00:00:00 2001
From: "Tristan F." <LeoDog896@hotmail.com>
Date: Tue, 15 Jul 2025 08:22:58 -0700
Subject: [PATCH 49/60] feat: more algs schema handling

---
 config/config.yaml         |   30 +-
 config/schema.json         | 1017 ++++++++++++++++++++++++------------
 spras/config/algorithms.py |   57 +-
 3 files changed, 756 insertions(+), 348 deletions(-)

diff --git a/config/config.yaml b/config/config.yaml
index a834131e6..3e2127d53 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -1,3 +1,5 @@
+# yaml-language-server: $schema=./schema.json
+
 # Global workflow control
 
 # The length of the hash used to identify a parameter combination
@@ -50,14 +52,14 @@ containers:
 
 algorithms:
   - name: "pathlinker"
-    params:
-      include: true
+    include: true
+    runs:
       run1:
         k: range(100,201,100)
 
   - name: "omicsintegrator1"
-    params:
-      include: true
+    include: true
+    runs:
       run1:
         b: [5, 6]
         w: np.linspace(0,5,2)
@@ -65,8 +67,8 @@ algorithms:
         dummy_mode: "file" # Or "terminals", "all", "others"
 
   - name: "omicsintegrator2"
-    params:
-      include: true
+    include: true
+    runs:
       run1:
         b: 4
         g: 0
@@ -79,7 +81,7 @@ algorithms:
     runs:
       run1:
         max_path_length: 3
-        local_search: "Yes"
+        local_search: true
         rand_restarts: 10
 
   - name: "mincostflow"
@@ -90,8 +92,7 @@ algorithms:
         capacity: 1
 
   - name: "allpairs"
-    params:
-      include: true
+    include: true
 
   - name: "domino"
     params:
@@ -101,22 +102,21 @@ algorithms:
         module_threshold: 0.05
 
   - name: "strwr"
-    params:
-      include: true
+    include: true
+    runs:
       run1:
         alpha: [0.85]
         threshold: [100, 200]
 
   - name: "rwr"
-    params:
-      include: true
+    include: true
+    runs:
       run1:
         alpha: [0.85]
         threshold: [100, 200]
 
   - name: "bowtiebuilder"
-    params:
-      include: true
+    include: true
 
 # Here we specify which pathways to run and other file location information.
 # DataLoader.py can currently only load a single dataset
diff --git a/config/schema.json b/config/schema.json
index f99541d51..01494a4ea 100644
--- a/config/schema.json
+++ b/config/schema.json
@@ -393,47 +393,77 @@
     },
     "dominoRunModel": {
       "properties": {
-        "time": {
-          "items": {
-            "type": "number"
-          },
-          "title": "Time",
-          "type": "array"
+        "_time": {
+          "anyOf": [
+            {
+              "type": "number"
+            },
+            {
+              "items": {
+                "type": "number"
+              },
+              "type": "array"
+            },
+            {
+              "type": "string"
+            }
+          ],
+          "description": "The internal _time parameter. This is a parameter only given to nondeterminsitic\nalgorithms that provide no randomness seed. While this should be unset,\nwe allow specifying `_time` for users that want to re-use outputs of runs,\nthough this explicitly breaks the 'immutability' promise of runs.",
+          "title": "Time"
         },
         "module_threshold": {
-          "items": {
-            "anyOf": [
-              {
-                "type": "number"
+          "anyOf": [
+            {
+              "type": "number"
+            },
+            {
+              "items": {
+                "anyOf": [
+                  {
+                    "type": "number"
+                  },
+                  {
+                    "type": "null"
+                  }
+                ]
               },
-              {
-                "type": "null"
-              }
-            ]
-          },
-          "title": "Module Threshold",
-          "type": "array"
+              "type": "array"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "description": "the p-value threshold for considering a slice as relevant (optional)",
+          "title": "Module Threshold"
         },
         "slice_threshold": {
-          "items": {
-            "anyOf": [
-              {
-                "type": "number"
+          "anyOf": [
+            {
+              "type": "number"
+            },
+            {
+              "items": {
+                "anyOf": [
+                  {
+                    "type": "number"
+                  },
+                  {
+                    "type": "null"
+                  }
+                ]
               },
-              {
-                "type": "null"
-              }
-            ]
-          },
-          "title": "Slice Threshold",
-          "type": "array"
+              "type": "array"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "description": "the p-value threshold for considering a putative module as final module (optional)",
+          "title": "Slice Threshold"
         }
       },
-      "required": [
-        "time",
-        "module_threshold",
-        "slice_threshold"
-      ],
       "title": "dominoRunModel",
       "type": "object"
     },
@@ -467,53 +497,84 @@
     "meoRunModel": {
       "properties": {
         "max_path_length": {
-          "items": {
-            "anyOf": [
-              {
-                "type": "integer"
+          "anyOf": [
+            {
+              "type": "integer"
+            },
+            {
+              "items": {
+                "anyOf": [
+                  {
+                    "type": "integer"
+                  },
+                  {
+                    "type": "null"
+                  }
+                ]
               },
-              {
-                "type": "null"
-              }
-            ]
-          },
-          "title": "Max Path Length",
-          "type": "array"
+              "type": "array"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "description": "the maximal length of a path from sources and targets to orient.",
+          "title": "Max Path Length"
         },
         "local_search": {
-          "items": {
-            "anyOf": [
-              {
-                "type": "boolean"
+          "anyOf": [
+            {
+              "type": "boolean"
+            },
+            {
+              "items": {
+                "anyOf": [
+                  {
+                    "type": "boolean"
+                  },
+                  {
+                    "type": "null"
+                  }
+                ]
               },
-              {
-                "type": "null"
-              }
-            ]
-          },
-          "title": "Local Search",
-          "type": "array"
+              "type": "array"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "description": "a boolean parameter that enables MEO's local search functionality.\nSee \"Improving approximations with local search\" in the associated paper\nfor more information.",
+          "title": "Local Search"
         },
         "rand_restarts": {
-          "items": {
-            "anyOf": [
-              {
-                "type": "integer"
+          "anyOf": [
+            {
+              "type": "integer"
+            },
+            {
+              "items": {
+                "anyOf": [
+                  {
+                    "type": "integer"
+                  },
+                  {
+                    "type": "null"
+                  }
+                ]
               },
-              {
-                "type": "null"
-              }
-            ]
-          },
-          "title": "Rand Restarts",
-          "type": "array"
+              "type": "array"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "description": "The number of random restarts to do.",
+          "title": "Rand Restarts"
         }
       },
-      "required": [
-        "max_path_length",
-        "local_search",
-        "rand_restarts"
-      ],
       "title": "meoRunModel",
       "type": "object"
     },
@@ -547,38 +608,58 @@
     "mincostflowRunModel": {
       "properties": {
         "flow": {
-          "items": {
-            "anyOf": [
-              {
-                "type": "number"
+          "anyOf": [
+            {
+              "type": "number"
+            },
+            {
+              "items": {
+                "anyOf": [
+                  {
+                    "type": "number"
+                  },
+                  {
+                    "type": "null"
+                  }
+                ]
               },
-              {
-                "type": "null"
-              }
-            ]
-          },
-          "title": "Flow",
-          "type": "array"
+              "type": "array"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "description": "amount of flow going through the graph",
+          "title": "Flow"
         },
         "capacity": {
-          "items": {
-            "anyOf": [
-              {
-                "type": "number"
+          "anyOf": [
+            {
+              "type": "number"
+            },
+            {
+              "items": {
+                "anyOf": [
+                  {
+                    "type": "number"
+                  },
+                  {
+                    "type": "null"
+                  }
+                ]
               },
-              {
-                "type": "null"
-              }
-            ]
-          },
-          "title": "Capacity",
-          "type": "array"
+              "type": "array"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "description": "amount of capacity allowed on each edge",
+          "title": "Capacity"
         }
       },
-      "required": [
-        "flow",
-        "capacity"
-      ],
       "title": "mincostflowRunModel",
       "type": "object"
     },
@@ -612,154 +693,298 @@
     "omicsintegrator1RunModel": {
       "properties": {
         "dummy_mode": {
-          "items": {
-            "anyOf": [
-              {
-                "type": "string"
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "items": {
+                "anyOf": [
+                  {
+                    "type": "string"
+                  },
+                  {
+                    "type": "null"
+                  }
+                ]
               },
-              {
-                "type": "null"
-              }
-            ]
-          },
-          "title": "Dummy Mode",
-          "type": "array"
+              "type": "array"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Dummy Mode"
         },
         "mu_squared": {
-          "items": {
-            "type": "boolean"
-          },
-          "title": "Mu Squared",
-          "type": "array"
+          "anyOf": [
+            {
+              "type": "boolean"
+            },
+            {
+              "items": {
+                "type": "boolean"
+              },
+              "type": "array"
+            }
+          ],
+          "default": false,
+          "title": "Mu Squared"
         },
         "exclude_terms": {
-          "items": {
-            "type": "boolean"
-          },
-          "title": "Exclude Terms",
-          "type": "array"
+          "anyOf": [
+            {
+              "type": "boolean"
+            },
+            {
+              "items": {
+                "type": "boolean"
+              },
+              "type": "array"
+            }
+          ],
+          "default": false,
+          "title": "Exclude Terms"
         },
         "noisy_edges": {
-          "items": {
-            "type": "integer"
-          },
-          "title": "Noisy Edges",
-          "type": "array"
+          "anyOf": [
+            {
+              "type": "integer"
+            },
+            {
+              "items": {
+                "type": "integer"
+              },
+              "type": "array"
+            },
+            {
+              "type": "string"
+            }
+          ],
+          "default": 0,
+          "description": "How many times you would like to add noise to the given edge values and re-run the algorithm.",
+          "title": "Noisy Edges"
         },
         "shuffled_prizes": {
-          "items": {
-            "type": "integer"
-          },
-          "title": "Shuffled Prizes",
-          "type": "array"
+          "anyOf": [
+            {
+              "type": "integer"
+            },
+            {
+              "items": {
+                "type": "integer"
+              },
+              "type": "array"
+            },
+            {
+              "type": "string"
+            }
+          ],
+          "default": 0,
+          "description": "shuffled_prizes: How many times the algorithm should shuffle the prizes and re-run",
+          "title": "Shuffled Prizes"
         },
         "random_terminals": {
-          "items": {
-            "type": "integer"
-          },
-          "title": "Random Terminals",
-          "type": "array"
+          "anyOf": [
+            {
+              "type": "integer"
+            },
+            {
+              "items": {
+                "type": "integer"
+              },
+              "type": "array"
+            },
+            {
+              "type": "string"
+            }
+          ],
+          "default": 0,
+          "description": "How many times to apply the given prizes to random nodes in the interactome",
+          "title": "Random Terminals"
         },
         "seed": {
-          "items": {
-            "type": "integer"
-          },
-          "title": "Seed",
-          "type": "array"
+          "anyOf": [
+            {
+              "type": "integer"
+            },
+            {
+              "items": {
+                "type": "integer"
+              },
+              "type": "array"
+            },
+            {
+              "type": "string"
+            }
+          ],
+          "description": "The random seed to use for this run. Defaults to the current UNIX timestamp.",
+          "title": "Seed"
         },
         "w": {
-          "items": {
-            "type": "integer"
-          },
-          "title": "W",
-          "type": "array"
+          "anyOf": [
+            {
+              "type": "integer"
+            },
+            {
+              "items": {
+                "type": "integer"
+              },
+              "type": "array"
+            },
+            {
+              "type": "string"
+            }
+          ],
+          "description": "the number of trees",
+          "title": "W"
         },
         "b": {
-          "items": {
-            "type": "number"
-          },
-          "title": "B",
-          "type": "array"
+          "anyOf": [
+            {
+              "type": "number"
+            },
+            {
+              "items": {
+                "type": "number"
+              },
+              "type": "array"
+            },
+            {
+              "type": "string"
+            }
+          ],
+          "description": "the trade-off between including more terminals and using less reliable edges",
+          "title": "B"
         },
         "d": {
-          "items": {
-            "type": "integer"
-          },
-          "title": "D",
-          "type": "array"
+          "anyOf": [
+            {
+              "type": "integer"
+            },
+            {
+              "items": {
+                "type": "integer"
+              },
+              "type": "array"
+            },
+            {
+              "type": "string"
+            }
+          ],
+          "description": "controls the maximum path-length from v0 to terminal nodes",
+          "title": "D"
         },
         "mu": {
-          "items": {
-            "anyOf": [
-              {
-                "type": "number"
+          "anyOf": [
+            {
+              "type": "number"
+            },
+            {
+              "items": {
+                "anyOf": [
+                  {
+                    "type": "number"
+                  },
+                  {
+                    "type": "null"
+                  }
+                ]
               },
-              {
-                "type": "null"
-              }
-            ]
-          },
-          "title": "Mu",
-          "type": "array"
+              "type": "array"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "description": "controls the degree-based negative prizes (defualt 0.0)",
+          "title": "Mu"
         },
         "noise": {
-          "items": {
-            "anyOf": [
-              {
-                "type": "number"
+          "anyOf": [
+            {
+              "type": "number"
+            },
+            {
+              "items": {
+                "anyOf": [
+                  {
+                    "type": "number"
+                  },
+                  {
+                    "type": "null"
+                  }
+                ]
               },
-              {
-                "type": "null"
-              }
-            ]
-          },
-          "title": "Noise",
-          "type": "array"
+              "type": "array"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "description": "Standard Deviation of the gaussian noise added to edges in Noisy Edges Randomizations",
+          "title": "Noise"
         },
         "g": {
-          "items": {
-            "anyOf": [
-              {
-                "type": "number"
+          "anyOf": [
+            {
+              "type": "number"
+            },
+            {
+              "items": {
+                "anyOf": [
+                  {
+                    "type": "number"
+                  },
+                  {
+                    "type": "null"
+                  }
+                ]
               },
-              {
-                "type": "null"
-              }
-            ]
-          },
-          "title": "G",
-          "type": "array"
+              "type": "array"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "description": "(Gamma) multiplicative edge penalty from degree of endpoints",
+          "title": "G"
         },
         "r": {
-          "items": {
-            "anyOf": [
-              {
-                "type": "number"
+          "anyOf": [
+            {
+              "type": "number"
+            },
+            {
+              "items": {
+                "anyOf": [
+                  {
+                    "type": "number"
+                  },
+                  {
+                    "type": "null"
+                  }
+                ]
               },
-              {
-                "type": "null"
-              }
-            ]
-          },
-          "title": "R",
-          "type": "array"
+              "type": "array"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "description": "msgsteiner parameter that adds random noise to edges, which is rarely needed because the Forest --noisyEdges option is recommended instead (default 0)",
+          "title": "R"
         }
       },
       "required": [
-        "dummy_mode",
-        "mu_squared",
-        "exclude_terms",
-        "noisy_edges",
-        "shuffled_prizes",
-        "random_terminals",
-        "seed",
         "w",
         "b",
-        "d",
-        "mu",
-        "noise",
-        "g",
-        "r"
+        "d"
       ],
       "title": "omicsintegrator1RunModel",
       "type": "object"
@@ -794,100 +1019,185 @@
     "omicsintegrator2RunModel": {
       "properties": {
         "w": {
-          "items": {
-            "type": "number"
-          },
-          "title": "W",
-          "type": "array"
+          "anyOf": [
+            {
+              "type": "number"
+            },
+            {
+              "items": {
+                "type": "number"
+              },
+              "type": "array"
+            },
+            {
+              "type": "string"
+            }
+          ],
+          "default": 6,
+          "description": "Omega: the weight of the edges connecting the dummy node to the nodes selected by dummyMode",
+          "title": "W"
         },
         "b": {
-          "items": {
-            "type": "number"
-          },
-          "title": "B",
-          "type": "array"
+          "anyOf": [
+            {
+              "type": "number"
+            },
+            {
+              "items": {
+                "type": "number"
+              },
+              "type": "array"
+            },
+            {
+              "type": "string"
+            }
+          ],
+          "default": 1,
+          "description": "Beta: scaling factor of prizes",
+          "title": "B"
         },
         "g": {
-          "items": {
-            "type": "number"
-          },
-          "title": "G",
-          "type": "array"
+          "anyOf": [
+            {
+              "type": "number"
+            },
+            {
+              "items": {
+                "type": "number"
+              },
+              "type": "array"
+            },
+            {
+              "type": "string"
+            }
+          ],
+          "default": 20,
+          "description": "Gamma: multiplicative edge penalty from degree of endpoints",
+          "title": "G"
         },
         "noise": {
-          "items": {
-            "anyOf": [
-              {
-                "type": "number"
+          "anyOf": [
+            {
+              "type": "number"
+            },
+            {
+              "items": {
+                "anyOf": [
+                  {
+                    "type": "number"
+                  },
+                  {
+                    "type": "null"
+                  }
+                ]
               },
-              {
-                "type": "null"
-              }
-            ]
-          },
-          "title": "Noise",
-          "type": "array"
+              "type": "array"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "description": "Standard Deviation of the gaussian noise added to edges in Noisy Edges Randomizations.",
+          "title": "Noise"
         },
         "noisy_edges": {
-          "items": {
-            "anyOf": [
-              {
-                "type": "integer"
+          "anyOf": [
+            {
+              "type": "integer"
+            },
+            {
+              "items": {
+                "anyOf": [
+                  {
+                    "type": "integer"
+                  },
+                  {
+                    "type": "null"
+                  }
+                ]
               },
-              {
-                "type": "null"
-              }
-            ]
-          },
-          "title": "Noisy Edges",
-          "type": "array"
+              "type": "array"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "description": "An integer specifying how many times to add noise to the given edge values and re-run.",
+          "title": "Noisy Edges"
         },
         "random_terminals": {
-          "items": {
-            "anyOf": [
-              {
-                "type": "integer"
+          "anyOf": [
+            {
+              "type": "integer"
+            },
+            {
+              "items": {
+                "anyOf": [
+                  {
+                    "type": "integer"
+                  },
+                  {
+                    "type": "null"
+                  }
+                ]
               },
-              {
-                "type": "null"
-              }
-            ]
-          },
-          "title": "Random Terminals",
-          "type": "array"
+              "type": "array"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "description": "An integer specifying how many times to apply your given prizes to random nodes in the interactome and re-run",
+          "title": "Random Terminals"
         },
         "dummy_mode": {
-          "items": {
-            "anyOf": [
-              {
-                "type": "string"
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "items": {
+                "anyOf": [
+                  {
+                    "type": "string"
+                  },
+                  {
+                    "type": "null"
+                  }
+                ]
               },
-              {
-                "type": "null"
-              }
-            ]
-          },
-          "title": "Dummy Mode",
-          "type": "array"
+              "type": "array"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "description": "Tells the program which nodes in the interactome to connect the dummy node to. (default: terminals)\n    \"terminals\" = connect to all terminals\n    \"others\" = connect to all nodes except for terminals\n    \"all\" = connect to all nodes in the interactome.",
+          "title": "Dummy Mode"
         },
         "seed": {
-          "items": {
-            "type": "integer"
-          },
-          "title": "Seed",
-          "type": "array"
+          "anyOf": [
+            {
+              "type": "integer"
+            },
+            {
+              "items": {
+                "type": "integer"
+              },
+              "type": "array"
+            },
+            {
+              "type": "string"
+            }
+          ],
+          "description": "The random seed to use for this run. Defaults to the current UNIX timestamp.",
+          "title": "Seed"
         }
       },
-      "required": [
-        "w",
-        "b",
-        "g",
-        "noise",
-        "noisy_edges",
-        "random_terminals",
-        "dummy_mode",
-        "seed"
-      ],
       "title": "omicsintegrator2RunModel",
       "type": "object"
     },
@@ -921,16 +1231,25 @@
     "pathlinkerRunModel": {
       "properties": {
         "k": {
-          "items": {
-            "type": "integer"
-          },
-          "title": "K",
-          "type": "array"
+          "anyOf": [
+            {
+              "type": "integer"
+            },
+            {
+              "items": {
+                "type": "integer"
+              },
+              "type": "array"
+            },
+            {
+              "type": "string"
+            }
+          ],
+          "default": 100,
+          "description": "path length",
+          "title": "K"
         }
       },
-      "required": [
-        "k"
-      ],
       "title": "pathlinkerRunModel",
       "type": "object"
     },
@@ -964,30 +1283,52 @@
     "rwrRunModel": {
       "properties": {
         "threshold": {
-          "items": {
-            "type": "integer"
-          },
-          "title": "Threshold",
-          "type": "array"
+          "anyOf": [
+            {
+              "type": "integer"
+            },
+            {
+              "items": {
+                "type": "integer"
+              },
+              "type": "array"
+            },
+            {
+              "type": "string"
+            }
+          ],
+          "description": "The number of nodes to return",
+          "title": "Threshold"
         },
         "alpha": {
-          "items": {
-            "anyOf": [
-              {
-                "type": "number"
+          "anyOf": [
+            {
+              "type": "number"
+            },
+            {
+              "items": {
+                "anyOf": [
+                  {
+                    "type": "number"
+                  },
+                  {
+                    "type": "null"
+                  }
+                ]
               },
-              {
-                "type": "null"
-              }
-            ]
-          },
-          "title": "Alpha",
-          "type": "array"
+              "type": "array"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "description": "The chance of a restart during the random walk",
+          "title": "Alpha"
         }
       },
       "required": [
-        "threshold",
-        "alpha"
+        "threshold"
       ],
       "title": "rwrRunModel",
       "type": "object"
@@ -1022,30 +1363,52 @@
     "strwrRunModel": {
       "properties": {
         "threshold": {
-          "items": {
-            "type": "integer"
-          },
-          "title": "Threshold",
-          "type": "array"
+          "anyOf": [
+            {
+              "type": "integer"
+            },
+            {
+              "items": {
+                "type": "integer"
+              },
+              "type": "array"
+            },
+            {
+              "type": "string"
+            }
+          ],
+          "description": "The number of nodes to return",
+          "title": "Threshold"
         },
         "alpha": {
-          "items": {
-            "anyOf": [
-              {
-                "type": "number"
+          "anyOf": [
+            {
+              "type": "number"
+            },
+            {
+              "items": {
+                "anyOf": [
+                  {
+                    "type": "number"
+                  },
+                  {
+                    "type": "null"
+                  }
+                ]
               },
-              {
-                "type": "null"
-              }
-            ]
-          },
-          "title": "Alpha",
-          "type": "array"
+              "type": "array"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "description": "The chance of a restart during the random walk",
+          "title": "Alpha"
         }
       },
       "required": [
-        "threshold",
-        "alpha"
+        "threshold"
       ],
       "title": "strwrRunModel",
       "type": "object"
diff --git a/spras/config/algorithms.py b/spras/config/algorithms.py
index bc7b896fc..bf0f13750 100644
--- a/spras/config/algorithms.py
+++ b/spras/config/algorithms.py
@@ -3,13 +3,40 @@
 parameter combinations. This has been isolated from schema.py as it is not declarative,
 and rather mainly contains validators and lower-level pydantic code.
 """
-from typing import Any, cast, Union, Literal
+from typing import Annotated, Any, Callable, cast, Union, Literal
 
 from spras.runner import algorithms
-from pydantic import BaseModel, create_model
+from pydantic import BaseModel, BeforeValidator, create_model
 
 __all__ = ['AlgorithmUnion']
 
+def is_numpy_friendly(type: type[Any] | None) -> bool:
+    """
+    Whether the passed in type can have any numpy helpers.
+    This is mainly used to provide hints in the JSON schema.
+    """
+    return type in (int, float)
+
+def python_evalish_coerce(type: type[Any] | None) -> Callable[[Any], Any]:
+    """
+    Allows for using numpy and python calls
+    """
+    
+    def numpy_coerce_validator(value: Any) -> Any:
+        raise NotImplementedError
+
+    return numpy_coerce_validator
+
+
+def list_coerce(value: Any) -> Any:
+    """
+    Coerces to a value to a list if it isn't already.
+    Used as a BeforeValidator.
+    """
+    if not isinstance(value, list):
+        return [value]
+    return value
+
 def construct_algorithm_model(name: str, model: type[BaseModel]) -> type[BaseModel]:
     """
     Dynamically constructs a parameter-combination model based on the original args model.
@@ -26,14 +53,32 @@ def construct_algorithm_model(name: str, model: type[BaseModel]) -> type[BaseMod
     # class AlgorithmParamsCombination(BaseModel):
     #   key1: list[int]
     #   key2: list[list[str]]
-    # This function does not worry about getting the cartesian product of this.
+    # However, we want to preserve certain conveniences (singleton values, fake python evaluation),
+    # so we also make use of BeforeValidators to do so, and we pass over their preferences into the JSON schema.
+    # (Note: This function does not worry about getting the cartesian product of this.)
 
-    # Map our fields to a list (assuming we have no nested keys)
-    mapped_list_field: dict[str, type[list[Any]]] = {name: list[field.annotation] for name, field in model.model_fields.items()}
+    # Map our fields to a list (assuming we have no nested keys),
+    # and specify our user convenience validators
+    mapped_list_field: dict[str, Annotated] = {
+        name: (Annotated[
+            list[field.annotation],
+            # This order isn't arbitrary.
+            # https://docs.pydantic.dev/latest/concepts/validators/#ordering-of-validators
+            # This runs second. This coerces any singletons to lists.
+            BeforeValidator(list_coerce),
+            # This runs first. This evaluates numpy utils for integer/float lists
+            BeforeValidator(
+                python_evalish_coerce(field.annotation),
+                # json_schema_input_type (sensibly) overwrites, so we only specify it here.
+                json_schema_input_type=Union[field.annotation, list[field.annotation], str] if is_numpy_friendly(field.annotation) else \
+                                       Union[field.annotation, list[field.annotation]]
+            )
+        ], field) for name, field in model.model_fields.items()
+    }
 
     # Runtime assertion check: mapped_list_field does not contain any `__-prefixed` fields
     for key in mapped_list_field.keys():
-        assert not key.startswith("__"), f"A private key has been passed from {name}'s argument schema." + \
+        assert not key.startswith("__"), f"A private key has been passed from {name}'s argument schema. " + \
             "This should have been caught by the Snakemake CI step."
 
     # Pass this as kwargs to create_model, which usually takes in parameters field_name=type.

From 72c4cbd4dbbf9cedf607010f659e5811226ab830 Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Tue, 15 Jul 2025 15:56:13 +0000
Subject: [PATCH 50/60] feat: default runs for default algorithms

---
 config/config.yaml         |  1 +
 config/schema.json         | 64 +++++++++++++++++++++++++++++---------
 spras/config/algorithms.py | 14 ++++++---
 spras/runner.py            | 26 +++++++++-------
 4 files changed, 75 insertions(+), 30 deletions(-)

diff --git a/config/config.yaml b/config/config.yaml
index 3e2127d53..49ae31f4f 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -95,6 +95,7 @@ algorithms:
     include: true
 
   - name: "domino"
+    include: true
     params:
       include: true
       run1:
diff --git a/config/schema.json b/config/schema.json
index 01494a4ea..649b815c6 100644
--- a/config/schema.json
+++ b/config/schema.json
@@ -315,14 +315,16 @@
           "additionalProperties": {
             "$ref": "#/$defs/allpairsRunModel"
           },
+          "default": {
+            "default": {}
+          },
           "title": "Runs",
           "type": "object"
         }
       },
       "required": [
         "name",
-        "include",
-        "runs"
+        "include"
       ],
       "title": "allpairsModel",
       "type": "object"
@@ -347,14 +349,16 @@
           "additionalProperties": {
             "$ref": "#/$defs/bowtiebuilderRunModel"
           },
+          "default": {
+            "default": {}
+          },
           "title": "Runs",
           "type": "object"
         }
       },
       "required": [
         "name",
-        "include",
-        "runs"
+        "include"
       ],
       "title": "bowtiebuilderModel",
       "type": "object"
@@ -379,14 +383,20 @@
           "additionalProperties": {
             "$ref": "#/$defs/dominoRunModel"
           },
+          "default": {
+            "default": {
+              "_time": 1752594898.608572,
+              "module_threshold": null,
+              "slice_threshold": null
+            }
+          },
           "title": "Runs",
           "type": "object"
         }
       },
       "required": [
         "name",
-        "include",
-        "runs"
+        "include"
       ],
       "title": "dominoModel",
       "type": "object"
@@ -482,14 +492,20 @@
           "additionalProperties": {
             "$ref": "#/$defs/meoRunModel"
           },
+          "default": {
+            "default": {
+              "max_path_length": null,
+              "local_search": null,
+              "rand_restarts": null
+            }
+          },
           "title": "Runs",
           "type": "object"
         }
       },
       "required": [
         "name",
-        "include",
-        "runs"
+        "include"
       ],
       "title": "meoModel",
       "type": "object"
@@ -593,14 +609,19 @@
           "additionalProperties": {
             "$ref": "#/$defs/mincostflowRunModel"
           },
+          "default": {
+            "default": {
+              "flow": null,
+              "capacity": null
+            }
+          },
           "title": "Runs",
           "type": "object"
         }
       },
       "required": [
         "name",
-        "include",
-        "runs"
+        "include"
       ],
       "title": "mincostflowModel",
       "type": "object"
@@ -1004,14 +1025,25 @@
           "additionalProperties": {
             "$ref": "#/$defs/omicsintegrator2RunModel"
           },
+          "default": {
+            "default": {
+              "w": 6.0,
+              "b": 1.0,
+              "g": 20.0,
+              "noise": null,
+              "noisy_edges": null,
+              "random_terminals": null,
+              "dummy_mode": null,
+              "seed": 1752594898608
+            }
+          },
           "title": "Runs",
           "type": "object"
         }
       },
       "required": [
         "name",
-        "include",
-        "runs"
+        "include"
       ],
       "title": "omicsintegrator2Model",
       "type": "object"
@@ -1216,14 +1248,18 @@
           "additionalProperties": {
             "$ref": "#/$defs/pathlinkerRunModel"
           },
+          "default": {
+            "default": {
+              "k": 100
+            }
+          },
           "title": "Runs",
           "type": "object"
         }
       },
       "required": [
         "name",
-        "include",
-        "runs"
+        "include"
       ],
       "title": "pathlinkerModel",
       "type": "object"
diff --git a/spras/config/algorithms.py b/spras/config/algorithms.py
index bf0f13750..f129594db 100644
--- a/spras/config/algorithms.py
+++ b/spras/config/algorithms.py
@@ -3,7 +3,7 @@
 parameter combinations. This has been isolated from schema.py as it is not declarative,
 and rather mainly contains validators and lower-level pydantic code.
 """
-from typing import Annotated, Any, Callable, cast, Union, Literal
+from typing import Annotated, Any, Callable, cast, Optional, Union, Literal
 
 from spras.runner import algorithms
 from pydantic import BaseModel, BeforeValidator, create_model
@@ -37,7 +37,7 @@ def list_coerce(value: Any) -> Any:
         return [value]
     return value
 
-def construct_algorithm_model(name: str, model: type[BaseModel]) -> type[BaseModel]:
+def construct_algorithm_model(name: str, model: type[BaseModel], model_default: Optional[BaseModel]) -> type[BaseModel]:
     """
     Dynamically constructs a parameter-combination model based on the original args model.
     This is the most 'hacky' part of this code, but, thanks to pydantic, we avoid reflection
@@ -100,8 +100,14 @@ def construct_algorithm_model(name: str, model: type[BaseModel]) -> type[BaseMod
         f'{name}Model',
         name=Literal[name],
         include=bool,
-        runs=dict[str, run_model]
+        # For algorithms that have a default parameter config, we allow arbitrarily running an algorithm
+        # if no runs are specified. For example, the following config
+        #   name: pathlinker
+        #   include: true
+        # will run, despite there being no entries in `runs`.
+        # (create_model entries take in either a type or (type, default)).
+        runs=dict[str, run_model] if model_default is None else (dict[str, run_model], {"default": model_default})
     )
 
-algorithm_models: list[type[BaseModel]] = [construct_algorithm_model(name, model) for name, (_, model) in algorithms.items()]
+algorithm_models: list[type[BaseModel]] = [construct_algorithm_model(name, model, model_default) for name, (_, model, model_default) in algorithms.items()]
 AlgorithmUnion = Union[tuple(algorithm_models)]
diff --git a/spras/runner.py b/spras/runner.py
index 843b3cf46..4f603f9b9 100644
--- a/spras/runner.py
+++ b/spras/runner.py
@@ -1,4 +1,4 @@
-from typing import Any
+from typing import Any, Optional
 
 from pydantic import BaseModel
 
@@ -17,17 +17,19 @@
 from spras.rwr import RWR, RWRParams
 from spras.strwr import ST_RWR, ST_RWRParams
 
-algorithms: dict[str, tuple[type[PRM], type[BaseModel]]] = {
-    "allpairs": (AllPairs, Empty),
-    "bowtiebuilder": (BowTieBuilder, Empty),
-    "domino": (DOMINO, DominoParams),
-    "meo": (MEO, MEOParams),
-    "mincostflow": (MinCostFlow, MinCostFlowParams),
-    "omicsintegrator1": (OmicsIntegrator1, OmicsIntegrator1Params),
-    "omicsintegrator2": (OmicsIntegrator2, OmicsIntegrator2Params),
-    "pathlinker": (PathLinker, PathLinkerParams),
-    "rwr": (RWR, RWRParams),
-    "strwr": (ST_RWR, ST_RWRParams),
+# Algorithm names to a three-tuple of (PRM, BaseModel, default BaseModel or None if there are no good defaults).
+# This is used for the configuration and to fetch algorithms during reconstruction
+algorithms: dict[str, tuple[type[PRM], type[BaseModel], Optional[BaseModel]]] = {
+    "allpairs": (AllPairs, Empty, Empty()),
+    "bowtiebuilder": (BowTieBuilder, Empty, Empty()),
+    "domino": (DOMINO, DominoParams, DominoParams()),
+    "meo": (MEO, MEOParams, MEOParams()),
+    "mincostflow": (MinCostFlow, MinCostFlowParams, MinCostFlowParams()),
+    "omicsintegrator1": (OmicsIntegrator1, OmicsIntegrator1Params, None),
+    "omicsintegrator2": (OmicsIntegrator2, OmicsIntegrator2Params, OmicsIntegrator2Params()),
+    "pathlinker": (PathLinker, PathLinkerParams, PathLinkerParams()),
+    "rwr": (RWR, RWRParams, None),
+    "strwr": (ST_RWR, ST_RWRParams, None),
 }
 
 def get_algorithm(algorithm: str) -> type[PRM]:

From 2ef26727221583cc7ef6c613e651d15a25c6b0e8 Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Tue, 15 Jul 2025 17:11:33 +0000
Subject: [PATCH 51/60] feat: function running

---
 config/schema.json         |  4 +--
 spras/config/algorithms.py | 55 ++++++++++++++++++++++++++++++--------
 spras/omicsintegrator1.py  |  3 +--
 util/play.py               |  5 ++++
 4 files changed, 52 insertions(+), 15 deletions(-)
 create mode 100644 util/play.py

diff --git a/config/schema.json b/config/schema.json
index 649b815c6..be41b5b3d 100644
--- a/config/schema.json
+++ b/config/schema.json
@@ -385,7 +385,7 @@
           },
           "default": {
             "default": {
-              "_time": 1752594898.608572,
+              "_time": 1752596079.9888437,
               "module_threshold": null,
               "slice_threshold": null
             }
@@ -1034,7 +1034,7 @@
               "noisy_edges": null,
               "random_terminals": null,
               "dummy_mode": null,
-              "seed": 1752594898608
+              "seed": 1752596079988
             }
           },
           "title": "Runs",
diff --git a/spras/config/algorithms.py b/spras/config/algorithms.py
index f129594db..b5c42199d 100644
--- a/spras/config/algorithms.py
+++ b/spras/config/algorithms.py
@@ -3,8 +3,10 @@
 parameter combinations. This has been isolated from schema.py as it is not declarative,
 and rather mainly contains validators and lower-level pydantic code.
 """
+import ast
 from typing import Annotated, Any, Callable, cast, Optional, Union, Literal
 
+import numpy as np
 from spras.runner import algorithms
 from pydantic import BaseModel, BeforeValidator, create_model
 
@@ -17,16 +19,48 @@ def is_numpy_friendly(type: type[Any] | None) -> bool:
     """
     return type in (int, float)
 
-def python_evalish_coerce(type: type[Any] | None) -> Callable[[Any], Any]:
+def python_evalish_coerce(value: Any) -> Any:
     """
-    Allows for using numpy and python calls
+    Allows for using numpy and python calls.
+
+    **Safety Note**: This does not prevent availability attacks: this can still exhaust
+    resources if wanted. This only prevents secret leakage.
     """
+
+    if not isinstance(value, str):
+        return value
+    
+    # These strings are in the form of function calls `function.name(param1, param2, ...)`.
+    # Since we want to avoid `eval` (since this might be running in the secret-sensitive HTCondor),
+    # we need to parse these functions.
+    functions_dict: dict[str, Callable[[list[Any]], list[Union[int, float]]]] = {
+        'range': lambda params: list(range(*params)),
+        "np.linspace": lambda params: list(np.linspace(*params)),
+        "np.arange": lambda params: list(np.arange(*params)),
+        "np.logspace": lambda params: list(np.logspace(*params)),
+    }
+
+    # To do this, we get the AST of our string as an expression
+    value_ast = ast.parse(value, mode='eval')
+
+    # Then we do some light parsing - we're only looking to do some literal evaluation
+    # (e.g. allowing 1+1) and some basic function parsing. Full python programs
+    # should just generate a config.yaml.
+
+    # This should always be an Expression whose body is Call (a function).
+    if not isinstance(value_ast.body, ast.Call):
+        raise ValueError(f'The python code "{value}" should be calling a function directly. Is this meant to be python code?')
     
-    def numpy_coerce_validator(value: Any) -> Any:
-        raise NotImplementedError
+    # We get the function name back as a string
+    function_name = ast.unparse(value_ast.body.func)
 
-    return numpy_coerce_validator
+    # and we use the (non-availability) safe `ast.literal_eval` to support light expressions.
+    arguments = [ast.literal_eval(arg) for arg in value_ast.body.args]
 
+    if function_name not in functions_dict:
+        raise ValueError(f"{function_name} is not an allowed function to be run!")
+    
+    return functions_dict[function_name](arguments)
 
 def list_coerce(value: Any) -> Any:
     """
@@ -65,14 +99,13 @@ def construct_algorithm_model(name: str, model: type[BaseModel], model_default:
             # This order isn't arbitrary.
             # https://docs.pydantic.dev/latest/concepts/validators/#ordering-of-validators
             # This runs second. This coerces any singletons to lists.
-            BeforeValidator(list_coerce),
+            BeforeValidator(list_coerce, json_schema_input_type=Union[field.annotation, list[field.annotation]]),
             # This runs first. This evaluates numpy utils for integer/float lists
             BeforeValidator(
-                python_evalish_coerce(field.annotation),
-                # json_schema_input_type (sensibly) overwrites, so we only specify it here.
-                json_schema_input_type=Union[field.annotation, list[field.annotation], str] if is_numpy_friendly(field.annotation) else \
-                                       Union[field.annotation, list[field.annotation]]
-            )
+                python_evalish_coerce,
+                # json_schema_input_type (sensibly) overwrites, so we have to specify the entire union again here.
+                json_schema_input_type=Union[field.annotation, list[field.annotation], str]
+            ) if is_numpy_friendly(field.annotation) else None
         ], field) for name, field in model.model_fields.items()
     }
 
diff --git a/spras/omicsintegrator1.py b/spras/omicsintegrator1.py
index ddb934bb5..013eced8d 100644
--- a/spras/omicsintegrator1.py
+++ b/spras/omicsintegrator1.py
@@ -209,8 +209,7 @@ def run(inputs, output_file, args, container_framework="docker"):
         command.extend(['--noisyEdges', str(args.noisy_edges)])
         command.extend(['--shuffledPrizes', str(args.shuffled_prizes)])
         command.extend(['--randomTerminals', str(args.random_terminals)])
-        if args.seed is not None:
-            command.extend(['--seed', str(args.seed)])
+        command.extend(['--seed', str(args.seed)])
 
         container_suffix = "omics-integrator-1:no-conda" # no-conda version is the default
         run_container_and_log('Omics Integrator 1',
diff --git a/util/play.py b/util/play.py
new file mode 100644
index 000000000..f53ae9f53
--- /dev/null
+++ b/util/play.py
@@ -0,0 +1,5 @@
+import ast
+value_ast = ast.parse("np.range.test(1, 2, 3)", mode='eval')
+# print(ast.dump(value_ast.body, indent=2))
+assert isinstance(value_ast.body, ast.Call)
+print([ast.literal_eval(arg) for arg in value_ast.body.args])
\ No newline at end of file

From 9442b6496251823456670092f12b404dba19c76c Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Tue, 15 Jul 2025 17:13:45 +0000
Subject: [PATCH 52/60] chore: drop play

---
 util/play.py | 5 -----
 1 file changed, 5 deletions(-)
 delete mode 100644 util/play.py

diff --git a/util/play.py b/util/play.py
deleted file mode 100644
index f53ae9f53..000000000
--- a/util/play.py
+++ /dev/null
@@ -1,5 +0,0 @@
-import ast
-value_ast = ast.parse("np.range.test(1, 2, 3)", mode='eval')
-# print(ast.dump(value_ast.body, indent=2))
-assert isinstance(value_ast.body, ast.Call)
-print([ast.literal_eval(arg) for arg in value_ast.body.args])
\ No newline at end of file

From 60b562f45a89a61d863a4bc6422cf87ce9fe81b7 Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Tue, 15 Jul 2025 17:21:36 +0000
Subject: [PATCH 53/60] fix(config): don't try to parse in config.py

---
 spras/config/config.py | 42 +++++++-----------------------------------
 1 file changed, 7 insertions(+), 35 deletions(-)

diff --git a/spras/config/config.py b/spras/config/config.py
index 252d6ccf5..72f08f330 100644
--- a/spras/config/config.py
+++ b/spras/config/config.py
@@ -164,47 +164,19 @@ def process_algorithms(self, raw_config: RawConfig):
                 # Do not parse the rest of the parameters for this algorithm if it is not included
                 continue
 
-            if cur_params.directed is not None:
-                warnings.warn("UPDATE: we no longer use the directed key in the config file", stacklevel=2)
-
-            cur_params = cur_params.__pydantic_extra__
-            if cur_params is None:
-                raise RuntimeError("An internal error occured: ConfigDict extra should be set on AlgorithmParams.")
-
-            # The algorithm has no named arguments so create a default placeholder
-            if len(cur_params.keys()) == 0:
-                cur_params["run1"] = {"spras_placeholder": ["no parameters"]}
+            runs: dict[str, Any] = cur_params.runs
 
             # Each set of runs should be 1 level down in the config file
-            for run_params in cur_params:
+            for run_name in runs.keys():
                 all_runs = []
 
                 # We create the product of all param combinations for each run
                 param_name_list = []
-                if cur_params[run_params]:
-                    for p in cur_params[run_params]:
-                        param_name_list.append(p)
-                        obj = str(cur_params[run_params][p])
-                        try:
-                            obj = [int(obj)]
-                        except ValueError:
-                            try:
-                                obj = [float(obj)]
-                            except ValueError:
-                                # Handles arrays and special evaluation types
-                                # TODO: do we want to explicitly bar `eval` if we may use untrusted user inputs later?
-                                if obj.startswith(("range", "np.linspace", "np.arange", "np.logspace", "[")):
-                                    obj = eval(obj)
-                                elif obj.lower() == "true":
-                                    obj = [True]
-                                elif obj.lower() == "false":
-                                    obj = [False]
-                                else:
-                                    # Catch-all for strings
-                                    obj = [obj]
-                            if not isinstance(obj, Iterable):
-                                raise ValueError(f"The object `{obj}` in algorithm {alg.name} at key '{p}' in run '{run_params}' is not iterable!") from None
-                        all_runs.append(obj)
+                for param in runs[run_name]:
+                    param_name_list.append(param)
+                    # this is guaranteed to be list[Any] by algorithms.py
+                    param_values: list[Any] = runs[run_name][param]
+                    all_runs.append(param_values)
                 run_list_tuples = list(it.product(*all_runs))
                 param_name_tuple = tuple(param_name_list)
                 for r in run_list_tuples:

From c1947e67409b90cd07a2f632335302f8f6422554 Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Tue, 15 Jul 2025 18:32:30 +0000
Subject: [PATCH 54/60] fix: subscriptability

---
 spras/config/algorithms.py |  17 ++++--
 spras/config/config.py     |  10 ++--
 spras/config/schema.py     |   3 -
 spras/omicsintegrator2.py  |   8 ++-
 test/test_config.py        | 109 ++++++++++++++++---------------------
 5 files changed, 70 insertions(+), 77 deletions(-)

diff --git a/spras/config/algorithms.py b/spras/config/algorithms.py
index b5c42199d..fbc7a2230 100644
--- a/spras/config/algorithms.py
+++ b/spras/config/algorithms.py
@@ -4,11 +4,11 @@
 and rather mainly contains validators and lower-level pydantic code.
 """
 import ast
-from typing import Annotated, Any, Callable, cast, Optional, Union, Literal
+from typing import Annotated, Any, Callable, cast, get_args, Optional, Union, Literal
 
 import numpy as np
 from spras.runner import algorithms
-from pydantic import BaseModel, BeforeValidator, create_model
+from pydantic import BaseModel, BeforeValidator, create_model, Field
 
 __all__ = ['AlgorithmUnion']
 
@@ -17,7 +17,11 @@ def is_numpy_friendly(type: type[Any] | None) -> bool:
     Whether the passed in type can have any numpy helpers.
     This is mainly used to provide hints in the JSON schema.
     """
-    return type in (int, float)
+    allowed_types = (int, float)
+
+    # check basic types, then check optional types
+    return type in allowed_types or \
+        any([arg for arg in get_args(type) if arg in allowed_types])
 
 def python_evalish_coerce(value: Any) -> Any:
     """
@@ -41,10 +45,10 @@ def python_evalish_coerce(value: Any) -> Any:
     }
 
     # To do this, we get the AST of our string as an expression
-    value_ast = ast.parse(value, mode='eval')
+    value_ast = ast.parse(value, mode='eval', filename='config.yaml')
 
     # Then we do some light parsing - we're only looking to do some literal evaluation
-    # (e.g. allowing 1+1) and some basic function parsing. Full python programs
+    # (allowing light python notation) and some basic function parsing. Full python programs
     # should just generate a config.yaml.
 
     # This should always be an Expression whose body is Call (a function).
@@ -143,4 +147,5 @@ def construct_algorithm_model(name: str, model: type[BaseModel], model_default:
     )
 
 algorithm_models: list[type[BaseModel]] = [construct_algorithm_model(name, model, model_default) for name, (_, model, model_default) in algorithms.items()]
-AlgorithmUnion = Union[tuple(algorithm_models)]
+# name differentriates algorithms
+AlgorithmUnion = Annotated[Union[tuple(algorithm_models)], Field(discriminator='name')]
diff --git a/spras/config/config.py b/spras/config/config.py
index 72f08f330..6eeb760a7 100644
--- a/spras/config/config.py
+++ b/spras/config/config.py
@@ -156,15 +156,14 @@ def process_algorithms(self, raw_config: RawConfig):
         self.algorithm_directed = dict()
         self.algorithms = raw_config.algorithms
         for alg in self.algorithms:
-            cur_params = alg.params
-            if cur_params.include:
+            if alg.include:
                 # This dict maps from parameter combinations hashes to parameter combination dictionaries
                 self.algorithm_params[alg.name] = dict()
             else:
                 # Do not parse the rest of the parameters for this algorithm if it is not included
                 continue
 
-            runs: dict[str, Any] = cur_params.runs
+            runs: dict[str, Any] = alg.runs
 
             # Each set of runs should be 1 level down in the config file
             for run_name in runs.keys():
@@ -172,10 +171,11 @@ def process_algorithms(self, raw_config: RawConfig):
 
                 # We create the product of all param combinations for each run
                 param_name_list = []
-                for param in runs[run_name]:
+                run_subscriptable = vars(runs[run_name])
+                for param in run_subscriptable:
                     param_name_list.append(param)
                     # this is guaranteed to be list[Any] by algorithms.py
-                    param_values: list[Any] = runs[run_name][param]
+                    param_values: list[Any] = run_subscriptable[param]
                     all_runs.append(param_values)
                 run_list_tuples = list(it.product(*all_runs))
                 param_name_tuple = tuple(param_name_list)
diff --git a/spras/config/schema.py b/spras/config/schema.py
index 7657a41a0..fc502b677 100644
--- a/spras/config/schema.py
+++ b/spras/config/schema.py
@@ -131,6 +131,3 @@ class RawConfig(BaseModel):
     reconstruction_settings: ReconstructionSettings
 
     model_config = ConfigDict(extra='forbid')
-
-# AlgorithmUnion is dynamically constructed.
-RawConfig.model_rebuild()
diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py
index 5e8e73ef0..d92ba77d2 100644
--- a/spras/omicsintegrator2.py
+++ b/spras/omicsintegrator2.py
@@ -5,6 +5,7 @@
 import pandas as pd
 from pydantic import BaseModel, ConfigDict, Field
 
+from spras.config.util import CaseInsensitiveEnum
 from spras.containers import prepare_volume, run_container_and_log
 from spras.dataset import Dataset
 from spras.interactome import reinsert_direction_col_undirected
@@ -13,6 +14,11 @@
 
 __all__ = ['OmicsIntegrator2', 'OmicsIntegrator2Params']
 
+class DummyMode(CaseInsensitiveEnum):
+    terminals = 'terminals'
+    others = 'others'
+    all = 'all'
+
 class OmicsIntegrator2Params(BaseModel):
     w: float = 6
     "Omega: the weight of the edges connecting the dummy node to the nodes selected by dummyMode"
@@ -32,7 +38,7 @@ class OmicsIntegrator2Params(BaseModel):
     random_terminals: Optional[int] = None
     "An integer specifying how many times to apply your given prizes to random nodes in the interactome and re-run"
 
-    dummy_mode: Optional[str] = None
+    dummy_mode: Optional[DummyMode] = None
     """
     Tells the program which nodes in the interactome to connect the dummy node to. (default: terminals)
         "terminals" = connect to all terminals
diff --git a/test/test_config.py b/test/test_config.py
index 6095ad145..b0031d029 100644
--- a/test/test_config.py
+++ b/test/test_config.py
@@ -5,6 +5,7 @@
 
 import spras.config.config as config
 from spras.config.schema import DEFAULT_HASH_LENGTH
+from spras.config.container_schema import DEFAULT_CONTAINER_PREFIX
 
 filler_dataset_data: dict[str, str | list[str]] = {
     "data_dir": "fake",
@@ -18,10 +19,12 @@
 # individual values of the dict can be changed and the whole initialization can be re-run.
 def get_test_config():
     test_raw_config = {
-        "container_framework": "singularity",
-        "container_registry": {
-            "base_url": "docker.io",
-            "owner": "reedcompbio",
+        "containers": {
+            "framework": "singularity",
+            "registry": {
+                "base_url": "docker.io",
+                "owner": "reedcompbio",
+            },
         },
         "hash_length": 7,
         "reconstruction_settings": {
@@ -49,55 +52,37 @@ def get_test_config():
             "data_dir": "gs-fake"
         }],
         "algorithms": [
+            # Since there is algorithm validation,
+            # we are (mostly) forced to use real algorithm parameters here.
+            # To make this more readable, we make the 'test names' the run names.
+            # TODO: we don't have a test for combinations of strings anymore. This seems to be fine,
+            # but it would be nice to have once we introduce an algorithm that takes more than 1 string parameter.
             {
-                "name": "strings",
-                "params": {
-                    "include": True,
-                    "run1": {"test": "str1", "test2": ["str2", "str3"]}
-                }
-            },
-            {
-                "name": "numbersAndBools",
-                "params": {
-                    "include": True,
-                    "run1": {"a": 1, "b": [float(2.0), 3], "c": [4], "d": float(5.6), "f": False}
-                }
-            },
-            {
-                "name": "singleton_int64_with_array",
-                "params": {
-                    "include": True,
-                    "run1": {"test": np.int64(1), "test2": [2, 3]}
+                "name": "omicsintegrator2",
+                "include": True,
+                "runs": {
+                    "strings": {"dummyMode": ["terminals", "others"], "b": 1},
+                    # spacing in np.linspace is on purpose
+                    "singleton_string_np_linspace": {"dummyMode": "terminals", "b": "np.linspace(0,    5,2)"},
+                    "str_array_np_logspace": {"test": ["others", "all"], "g": "np.logspace(1,1)"}
                 }
             },
             {
-                "name": "singleton_string_np_linspace",
-                "params": {
-                    "include": True,
-                    "run1": {"test": "str1", "test2": "np.linspace(0,5,2)"}
+                "name": "meo",
+                "include": True,
+                "runs": {
+                    "numbersAndBool": {"max_path_length": 1, "rand_restarts": [float(2.0), 3], "local_search": True},
+                    "numbersAndBools": {"max_path_length": 1, "rand_restarts": [float(2.0), 3], "local_search": [True, False]},
+                    "boolArrTest": {"local_search": [True, False], "max_path_length": "range(1, 3)"}
                 }
             },
             {
-                "name": "str_array_np_logspace",
-                "params": {
-                    "include": True,
-                    "run1": {"test": ["a", "b"], "test2": "np.logspace(1,1)"}
+                "name": "mincostflow",
+                "include": True,
+                "runs": {
+                    "int64artifact": {"flow": "np.arange(5,6)", "capacity": [2, 3]}
                 }
             },
-            {
-                "name": "int64artifact",
-                "params": {
-                    "include": True,
-                    "run1": {"test": "np.arange(5,6)", "test2": [2, 3]}
-                }
-            },
-            {
-                "name": "boolArrTest",
-                "params": {
-                    "include": True,
-                    "run1": {"flags": [True, False], "range": "range(1, 3)"}
-                }
-            }
         ],
         "analysis": {
             "summary": {
@@ -159,46 +144,46 @@ def test_config_container_framework_normalization(self):
         # Test singularity
         test_config = get_test_config()
 
-        test_config["container_framework"] = "singularity"
+        test_config["containers"]["framework"] = "singularity"
         config.init_global(test_config)
-        assert (config.config.container_framework == "singularity")
+        assert (config.config.container_settings.framework == "singularity")
 
         # Test singularity with capitalization
-        test_config["container_framework"] = "Singularity"
+        test_config["containers"]["framework"] = "Singularity"
         config.init_global(test_config)
-        assert (config.config.container_framework == "singularity")
+        assert (config.config.container_settings.framework == "singularity")
 
         # Test docker
-        test_config["container_framework"] = "docker"
+        test_config["containers"]["framework"] = "docker"
         config.init_global(test_config)
-        assert (config.config.container_framework == "docker")
+        assert (config.config.container_settings.framework == "docker")
 
         # Test docker with capitalization
-        test_config["container_framework"] = "Docker"
+        test_config["containers"]["framework"] = "Docker"
         config.init_global(test_config)
-        assert (config.config.container_framework == "docker")
+        assert (config.config.container_settings.framework == "docker")
 
         # Test unknown framework
-        test_config["container_framework"] = "badFramework"
+        test_config["containers"]["framework"] = "badFramework"
         with pytest.raises(ValueError):
             config.init_global(test_config)
 
     def test_config_container_registry(self):
         test_config = get_test_config()
-        test_config["container_registry"]["base_url"] = "docker.io"
-        test_config["container_registry"]["owner"] = "reedcompbio"
+        test_config["containers"]["registry"]["base_url"] = "docker.io"
+        test_config["containers"]["registry"]["owner"] = "reedcompbio"
         config.init_global(test_config)
-        assert (config.config.container_prefix == "docker.io/reedcompbio")
+        assert (config.config.container_settings.prefix == "docker.io/reedcompbio")
 
-        test_config["container_registry"]["base_url"] = "another.repo"
-        test_config["container_registry"]["owner"] = "different-owner"
+        test_config["containers"]["registry"]["base_url"] = "another.repo"
+        test_config["containers"]["registry"]["owner"] = "different-owner"
         config.init_global(test_config)
-        assert (config.config.container_prefix == "another.repo/different-owner")
+        assert (config.config.container_settings.prefix == "another.repo/different-owner")
 
-        test_config["container_registry"]["base_url"] = ""
-        test_config["container_registry"]["owner"] = ""
+        test_config["containers"]["registry"]["base_url"] = ""
+        test_config["containers"]["registry"]["owner"] = ""
         config.init_global(test_config)
-        assert (config.config.container_prefix == config.DEFAULT_CONTAINER_PREFIX)
+        assert (config.config.container_settings.prefix == DEFAULT_CONTAINER_PREFIX)
 
     def test_error_dataset_label(self):
         test_config = get_test_config()

From 8beaf72a7e7f64c42cc543a0f31b77fdf99485e3 Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Tue, 15 Jul 2025 20:03:48 +0000
Subject: [PATCH 55/60] fix: auto-discriminator mapping & forbid

---
 config/egfr.yaml                 |  71 ++++++++------------
 config/schema.json               | 107 ++++++++++++++++++++++++++++---
 spras/config/algorithms.py       |   6 +-
 spras/config/container_schema.py |  11 +++-
 spras/domino.py                  |   2 +-
 spras/meo.py                     |   2 +-
 spras/mincostflow.py             |   2 +-
 spras/omicsintegrator1.py        |   2 +-
 spras/omicsintegrator2.py        |   2 +-
 spras/pathlinker.py              |   2 +-
 spras/rwr.py                     |   2 +-
 spras/strwr.py                   |   2 +-
 12 files changed, 143 insertions(+), 68 deletions(-)

diff --git a/config/egfr.yaml b/config/egfr.yaml
index 9b4ccc45b..106963c62 100644
--- a/config/egfr.yaml
+++ b/config/egfr.yaml
@@ -1,41 +1,25 @@
-# The length of the hash used to identify a parameter combination
-hash_length: 7
-
-# Specify the container framework used by each PRM wrapper. Valid options include:
-# - docker (default if not specified)
-# - singularity -- Also known as apptainer, useful in HPC/HTC environments where docker isn't allowed
-# - dsub -- experimental with limited support, used for running on Google Cloud
-container_framework: docker
+# yaml-language-server: $schema=./schema.json
 
-# Only used if container_framework is set to singularity, this will unpack the singularity containers
-# to the local filesystem. This is useful when PRM containers need to run inside another container,
-# such as would be the case in an HTCondor/OSPool environment.
-# NOTE: This unpacks singularity containers to the local filesystem, which will take up space in a way
-# that persists after the workflow is complete. To clean up the unpacked containers, the user must
-# manually delete them.
-unpack_singularity: false
-
-# Allow the user to configure which container registry containers should be pulled from
-# Note that this assumes container names are consistent across registries, and that the
-# registry being passed doesn't require authentication for pull actions
-container_registry:
-  base_url: docker.io
-  # The owner or project of the registry
-  # For example, "reedcompbio" if the image is available as docker.io/reedcompbio/allpairs
-  owner: reedcompbio
+hash_length: 7
+containers:
+  framework: docker
+  unpack_singularity: false
+  registry:
+    base_url: docker.io
+    owner: reedcompbio
 
 algorithms:
   - name: pathlinker
-    params:
-      include: true
+    include: true
+    runs:
       run1:
         k:
           - 10
           - 20
           - 70
   - name: omicsintegrator1
-    params:
-      include: true
+    include: true
+    runs:
       run1:
         b:
           - 0.55
@@ -53,8 +37,8 @@ algorithms:
           - 0.008
         dummy_mode: ["file"]
   - name: omicsintegrator2
-    params:
-      include: true
+    include: true
+    runs:
       run1:
         b:
           - 4
@@ -66,36 +50,31 @@ algorithms:
         g:
           - 3
   - name: meo
-    params:
-      include: true
+    include: true
+    runs:
       run1:
-        local_search:
-          - "Yes"
+        local_search: true
         max_path_length:
           - 3
         rand_restarts:
           - 10
       run2:
-        local_search:
-          - "No"
-        max_path_length:
-          - 2
-        rand_restarts:
-          - 10
+        local_search: false
+        max_path_length: 2
+        rand_restarts: 10
   - name: allpairs
-    params:
-      include: true
+    include: true
   - name: domino
-    params:
-      include: true
+    include: true
+    runs:
       run1:
         slice_threshold:
           - 0.3
         module_threshold:
           - 0.05
   - name: mincostflow
-    params:
-      include: true
+    include: true
+    runs:
       run1:
         capacity:
           - 15
diff --git a/config/schema.json b/config/schema.json
index be41b5b3d..c15dcaa8a 100644
--- a/config/schema.json
+++ b/config/schema.json
@@ -50,23 +50,23 @@
       "additionalProperties": false,
       "properties": {
         "base_url": {
+          "default": "docker.io",
+          "description": "The domain of the registry",
           "title": "Base Url",
           "type": "string"
         },
         "owner": {
+          "default": "reedcompbio",
           "description": "The owner or project of the registry",
           "title": "Owner",
           "type": "string"
         }
       },
-      "required": [
-        "base_url",
-        "owner"
-      ],
       "title": "ContainerRegistry",
       "type": "object"
     },
     "ContainerSettings": {
+      "additionalProperties": false,
       "properties": {
         "framework": {
           "$ref": "#/$defs/ContainerFramework",
@@ -149,6 +149,15 @@
       "title": "Dataset",
       "type": "object"
     },
+    "DummyMode": {
+      "enum": [
+        "terminals",
+        "others",
+        "all"
+      ],
+      "title": "DummyMode",
+      "type": "string"
+    },
     "EvaluationAnalysis": {
       "additionalProperties": false,
       "properties": {
@@ -301,6 +310,7 @@
       "type": "object"
     },
     "allpairsModel": {
+      "additionalProperties": false,
       "properties": {
         "name": {
           "const": "allpairs",
@@ -330,11 +340,13 @@
       "type": "object"
     },
     "allpairsRunModel": {
+      "additionalProperties": false,
       "properties": {},
       "title": "allpairsRunModel",
       "type": "object"
     },
     "bowtiebuilderModel": {
+      "additionalProperties": false,
       "properties": {
         "name": {
           "const": "bowtiebuilder",
@@ -364,11 +376,13 @@
       "type": "object"
     },
     "bowtiebuilderRunModel": {
+      "additionalProperties": false,
       "properties": {},
       "title": "bowtiebuilderRunModel",
       "type": "object"
     },
     "dominoModel": {
+      "additionalProperties": false,
       "properties": {
         "name": {
           "const": "domino",
@@ -385,7 +399,7 @@
           },
           "default": {
             "default": {
-              "_time": 1752596079.9888437,
+              "_time": 1752606304.38952,
               "module_threshold": null,
               "slice_threshold": null
             }
@@ -402,6 +416,7 @@
       "type": "object"
     },
     "dominoRunModel": {
+      "additionalProperties": false,
       "properties": {
         "_time": {
           "anyOf": [
@@ -439,6 +454,9 @@
               },
               "type": "array"
             },
+            {
+              "type": "string"
+            },
             {
               "type": "null"
             }
@@ -465,6 +483,9 @@
               },
               "type": "array"
             },
+            {
+              "type": "string"
+            },
             {
               "type": "null"
             }
@@ -478,6 +499,7 @@
       "type": "object"
     },
     "meoModel": {
+      "additionalProperties": false,
       "properties": {
         "name": {
           "const": "meo",
@@ -511,6 +533,7 @@
       "type": "object"
     },
     "meoRunModel": {
+      "additionalProperties": false,
       "properties": {
         "max_path_length": {
           "anyOf": [
@@ -530,6 +553,9 @@
               },
               "type": "array"
             },
+            {
+              "type": "string"
+            },
             {
               "type": "null"
             }
@@ -582,6 +608,9 @@
               },
               "type": "array"
             },
+            {
+              "type": "string"
+            },
             {
               "type": "null"
             }
@@ -595,6 +624,7 @@
       "type": "object"
     },
     "mincostflowModel": {
+      "additionalProperties": false,
       "properties": {
         "name": {
           "const": "mincostflow",
@@ -627,6 +657,7 @@
       "type": "object"
     },
     "mincostflowRunModel": {
+      "additionalProperties": false,
       "properties": {
         "flow": {
           "anyOf": [
@@ -646,6 +677,9 @@
               },
               "type": "array"
             },
+            {
+              "type": "string"
+            },
             {
               "type": "null"
             }
@@ -672,6 +706,9 @@
               },
               "type": "array"
             },
+            {
+              "type": "string"
+            },
             {
               "type": "null"
             }
@@ -685,6 +722,7 @@
       "type": "object"
     },
     "omicsintegrator1Model": {
+      "additionalProperties": false,
       "properties": {
         "name": {
           "const": "omicsintegrator1",
@@ -712,6 +750,7 @@
       "type": "object"
     },
     "omicsintegrator1RunModel": {
+      "additionalProperties": false,
       "properties": {
         "dummy_mode": {
           "anyOf": [
@@ -915,6 +954,9 @@
               },
               "type": "array"
             },
+            {
+              "type": "string"
+            },
             {
               "type": "null"
             }
@@ -941,6 +983,9 @@
               },
               "type": "array"
             },
+            {
+              "type": "string"
+            },
             {
               "type": "null"
             }
@@ -967,6 +1012,9 @@
               },
               "type": "array"
             },
+            {
+              "type": "string"
+            },
             {
               "type": "null"
             }
@@ -993,6 +1041,9 @@
               },
               "type": "array"
             },
+            {
+              "type": "string"
+            },
             {
               "type": "null"
             }
@@ -1011,6 +1062,7 @@
       "type": "object"
     },
     "omicsintegrator2Model": {
+      "additionalProperties": false,
       "properties": {
         "name": {
           "const": "omicsintegrator2",
@@ -1034,7 +1086,7 @@
               "noisy_edges": null,
               "random_terminals": null,
               "dummy_mode": null,
-              "seed": 1752596079988
+              "seed": 1752606304389
             }
           },
           "title": "Runs",
@@ -1049,6 +1101,7 @@
       "type": "object"
     },
     "omicsintegrator2RunModel": {
+      "additionalProperties": false,
       "properties": {
         "w": {
           "anyOf": [
@@ -1125,6 +1178,9 @@
               },
               "type": "array"
             },
+            {
+              "type": "string"
+            },
             {
               "type": "null"
             }
@@ -1151,6 +1207,9 @@
               },
               "type": "array"
             },
+            {
+              "type": "string"
+            },
             {
               "type": "null"
             }
@@ -1177,6 +1236,9 @@
               },
               "type": "array"
             },
+            {
+              "type": "string"
+            },
             {
               "type": "null"
             }
@@ -1188,13 +1250,13 @@
         "dummy_mode": {
           "anyOf": [
             {
-              "type": "string"
+              "$ref": "#/$defs/DummyMode"
             },
             {
               "items": {
                 "anyOf": [
                   {
-                    "type": "string"
+                    "$ref": "#/$defs/DummyMode"
                   },
                   {
                     "type": "null"
@@ -1234,6 +1296,7 @@
       "type": "object"
     },
     "pathlinkerModel": {
+      "additionalProperties": false,
       "properties": {
         "name": {
           "const": "pathlinker",
@@ -1265,6 +1328,7 @@
       "type": "object"
     },
     "pathlinkerRunModel": {
+      "additionalProperties": false,
       "properties": {
         "k": {
           "anyOf": [
@@ -1290,6 +1354,7 @@
       "type": "object"
     },
     "rwrModel": {
+      "additionalProperties": false,
       "properties": {
         "name": {
           "const": "rwr",
@@ -1317,6 +1382,7 @@
       "type": "object"
     },
     "rwrRunModel": {
+      "additionalProperties": false,
       "properties": {
         "threshold": {
           "anyOf": [
@@ -1354,6 +1420,9 @@
               },
               "type": "array"
             },
+            {
+              "type": "string"
+            },
             {
               "type": "null"
             }
@@ -1370,6 +1439,7 @@
       "type": "object"
     },
     "strwrModel": {
+      "additionalProperties": false,
       "properties": {
         "name": {
           "const": "strwr",
@@ -1397,6 +1467,7 @@
       "type": "object"
     },
     "strwrRunModel": {
+      "additionalProperties": false,
       "properties": {
         "threshold": {
           "anyOf": [
@@ -1434,6 +1505,9 @@
               },
               "type": "array"
             },
+            {
+              "type": "string"
+            },
             {
               "type": "null"
             }
@@ -1463,7 +1537,22 @@
     },
     "algorithms": {
       "items": {
-        "anyOf": [
+        "discriminator": {
+          "mapping": {
+            "allpairs": "#/$defs/allpairsModel",
+            "bowtiebuilder": "#/$defs/bowtiebuilderModel",
+            "domino": "#/$defs/dominoModel",
+            "meo": "#/$defs/meoModel",
+            "mincostflow": "#/$defs/mincostflowModel",
+            "omicsintegrator1": "#/$defs/omicsintegrator1Model",
+            "omicsintegrator2": "#/$defs/omicsintegrator2Model",
+            "pathlinker": "#/$defs/pathlinkerModel",
+            "rwr": "#/$defs/rwrModel",
+            "strwr": "#/$defs/strwrModel"
+          },
+          "propertyName": "name"
+        },
+        "oneOf": [
           {
             "$ref": "#/$defs/allpairsModel"
           },
diff --git a/spras/config/algorithms.py b/spras/config/algorithms.py
index fbc7a2230..32f6b82d3 100644
--- a/spras/config/algorithms.py
+++ b/spras/config/algorithms.py
@@ -8,7 +8,7 @@
 
 import numpy as np
 from spras.runner import algorithms
-from pydantic import BaseModel, BeforeValidator, create_model, Field
+from pydantic import BaseModel, BeforeValidator, create_model, ConfigDict, Field
 
 __all__ = ['AlgorithmUnion']
 
@@ -123,6 +123,7 @@ def construct_algorithm_model(name: str, model: type[BaseModel], model_default:
     # have had a key that starts with __ in mapped_list_fields. The above assertion prevents this.
     run_model = (cast(Any, create_model))(
         f'{name}RunModel',
+        __config__=ConfigDict(extra='forbid'),
         **mapped_list_field
     )
     
@@ -143,7 +144,8 @@ def construct_algorithm_model(name: str, model: type[BaseModel], model_default:
         #   include: true
         # will run, despite there being no entries in `runs`.
         # (create_model entries take in either a type or (type, default)).
-        runs=dict[str, run_model] if model_default is None else (dict[str, run_model], {"default": model_default})
+        runs=dict[str, run_model] if model_default is None else (dict[str, run_model], {"default": model_default}),
+        __config__=ConfigDict(extra='forbid')
     )
 
 algorithm_models: list[type[BaseModel]] = [construct_algorithm_model(name, model, model_default) for name, (_, model, model_default) in algorithms.items()]
diff --git a/spras/config/container_schema.py b/spras/config/container_schema.py
index 9688a9b51..ea9881a30 100644
--- a/spras/config/container_schema.py
+++ b/spras/config/container_schema.py
@@ -22,10 +22,13 @@ class ContainerFramework(CaseInsensitiveEnum):
     dsub = 'dsub'
 
 class ContainerRegistry(BaseModel):
-    base_url: str
-    owner: str = Field(description="The owner or project of the registry")
+    base_url: str = "docker.io"
+    "The domain of the registry"
 
-    model_config = ConfigDict(extra='forbid')
+    owner: str = "reedcompbio"
+    "The owner or project of the registry"
+
+    model_config = ConfigDict(extra='forbid', use_attribute_docstrings=True)
 
 class ContainerSettings(BaseModel):
     framework: ContainerFramework = ContainerFramework.docker
@@ -33,6 +36,8 @@ class ContainerSettings(BaseModel):
     registry: ContainerRegistry
     hash_length: int = 7
 
+    model_config = ConfigDict(extra='forbid')
+
 @dataclass
 class ProcessedContainerSettings:
     framework: ContainerFramework = ContainerFramework.docker
diff --git a/spras/domino.py b/spras/domino.py
index a9ce7a43b..521f89722 100644
--- a/spras/domino.py
+++ b/spras/domino.py
@@ -26,7 +26,7 @@ class DominoParams(NondeterministicModel):
     slice_threshold: Optional[float] = None
     "the p-value threshold for considering a putative module as final module (optional)"
 
-    model_config = ConfigDict(use_attribute_docstrings=True)
+    model_config = ConfigDict(extra='forbid', use_attribute_docstrings=True)
 
 """
 DOMINO will construct a fully undirected graph from the provided input file
diff --git a/spras/meo.py b/spras/meo.py
index 02edf07af..4b3f9299e 100644
--- a/spras/meo.py
+++ b/spras/meo.py
@@ -83,7 +83,7 @@ class MEOParams(BaseModel):
     rand_restarts: Optional[int] = None
     "The number of random restarts to do."
 
-    model_config = ConfigDict(use_attribute_docstrings=True)
+    model_config = ConfigDict(extra='forbid', use_attribute_docstrings=True)
 
 """
 MEO can support partially directed graphs
diff --git a/spras/mincostflow.py b/spras/mincostflow.py
index 2673d91e2..1f7ff0cf7 100644
--- a/spras/mincostflow.py
+++ b/spras/mincostflow.py
@@ -20,7 +20,7 @@ class MinCostFlowParams(BaseModel):
     capacity: Optional[float] = None
     "amount of capacity allowed on each edge"
 
-    model_config = ConfigDict(use_attribute_docstrings=True)
+    model_config = ConfigDict(extra='forbid', use_attribute_docstrings=True)
 
 """
 MinCostFlow deals with fully directed graphs
diff --git a/spras/omicsintegrator1.py b/spras/omicsintegrator1.py
index 013eced8d..1f33c25f7 100644
--- a/spras/omicsintegrator1.py
+++ b/spras/omicsintegrator1.py
@@ -77,7 +77,7 @@ class OmicsIntegrator1Params(BaseModel):
     r: Optional[float] = None
     "msgsteiner parameter that adds random noise to edges, which is rarely needed because the Forest --noisyEdges option is recommended instead (default 0)"
 
-    model_config = ConfigDict(use_attribute_docstrings=True)
+    model_config = ConfigDict(extra='forbid', use_attribute_docstrings=True)
 
 class OmicsIntegrator1(PRM[OmicsIntegrator1Params]):
     """
diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py
index d92ba77d2..aef4f3c48 100644
--- a/spras/omicsintegrator2.py
+++ b/spras/omicsintegrator2.py
@@ -49,7 +49,7 @@ class OmicsIntegrator2Params(BaseModel):
     seed: int = Field(default_factory=lambda _: int(time.time() * 1000))
     "The random seed to use for this run. Defaults to the current UNIX timestamp."
 
-    model_config = ConfigDict(use_attribute_docstrings=True)
+    model_config = ConfigDict(extra='forbid', use_attribute_docstrings=True)
 
 """
 Omics Integrator 2 will construct a fully undirected graph from the provided input file
diff --git a/spras/pathlinker.py b/spras/pathlinker.py
index 9b6fe964c..da0a91ba2 100644
--- a/spras/pathlinker.py
+++ b/spras/pathlinker.py
@@ -19,7 +19,7 @@ class PathLinkerParams(BaseModel):
     k: int = 100
     "path length"
 
-    model_config = ConfigDict(use_attribute_docstrings=True)
+    model_config = ConfigDict(extra='forbid', use_attribute_docstrings=True)
 
 """
 Pathlinker will construct a fully directed graph from the provided input file
diff --git a/spras/rwr.py b/spras/rwr.py
index ba78589ec..dff5bdb97 100644
--- a/spras/rwr.py
+++ b/spras/rwr.py
@@ -19,7 +19,7 @@ class RWRParams(BaseModel):
     alpha: Optional[float] = None
     "The chance of a restart during the random walk"
 
-    model_config = ConfigDict(use_attribute_docstrings=True)
+    model_config = ConfigDict(extra='forbid', use_attribute_docstrings=True)
 
 class RWR(PRM[RWRParams]):
     required_inputs = ['network','nodes']
diff --git a/spras/strwr.py b/spras/strwr.py
index 37590e7c6..1b9159eff 100644
--- a/spras/strwr.py
+++ b/spras/strwr.py
@@ -19,7 +19,7 @@ class ST_RWRParams(BaseModel):
     alpha: Optional[float] = None
     "The chance of a restart during the random walk"
 
-    model_config = ConfigDict(use_attribute_docstrings=True)
+    model_config = ConfigDict(extra='forbid', use_attribute_docstrings=True)
 
 # Note: This class is almost identical to the rwr.py file.
 class ST_RWR(PRM[ST_RWRParams]):

From b07a7ef0f1eba21609f0eb87bffc603a4199723c Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Tue, 15 Jul 2025 20:04:32 +0000
Subject: [PATCH 56/60] style: fmt

---
 spras/config/algorithms.py       | 13 +++++++------
 spras/config/container_schema.py |  7 ++++---
 spras/config/schema.py           |  1 +
 spras/config/util.py             |  2 +-
 spras/domino.py                  |  2 +-
 spras/omicsintegrator1.py        |  2 +-
 spras/runner.py                  |  2 +-
 test/test_config.py              |  2 +-
 8 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/spras/config/algorithms.py b/spras/config/algorithms.py
index 32f6b82d3..8c49c2ae2 100644
--- a/spras/config/algorithms.py
+++ b/spras/config/algorithms.py
@@ -4,11 +4,12 @@
 and rather mainly contains validators and lower-level pydantic code.
 """
 import ast
-from typing import Annotated, Any, Callable, cast, get_args, Optional, Union, Literal
+from typing import Annotated, Any, Callable, Literal, Optional, Union, cast, get_args
 
 import numpy as np
+from pydantic import BaseModel, BeforeValidator, ConfigDict, Field, create_model
+
 from spras.runner import algorithms
-from pydantic import BaseModel, BeforeValidator, create_model, ConfigDict, Field
 
 __all__ = ['AlgorithmUnion']
 
@@ -33,7 +34,7 @@ def python_evalish_coerce(value: Any) -> Any:
 
     if not isinstance(value, str):
         return value
-    
+
     # These strings are in the form of function calls `function.name(param1, param2, ...)`.
     # Since we want to avoid `eval` (since this might be running in the secret-sensitive HTCondor),
     # we need to parse these functions.
@@ -54,7 +55,7 @@ def python_evalish_coerce(value: Any) -> Any:
     # This should always be an Expression whose body is Call (a function).
     if not isinstance(value_ast.body, ast.Call):
         raise ValueError(f'The python code "{value}" should be calling a function directly. Is this meant to be python code?')
-    
+
     # We get the function name back as a string
     function_name = ast.unparse(value_ast.body.func)
 
@@ -63,7 +64,7 @@ def python_evalish_coerce(value: Any) -> Any:
 
     if function_name not in functions_dict:
         raise ValueError(f"{function_name} is not an allowed function to be run!")
-    
+
     return functions_dict[function_name](arguments)
 
 def list_coerce(value: Any) -> Any:
@@ -126,7 +127,7 @@ def construct_algorithm_model(name: str, model: type[BaseModel], model_default:
         __config__=ConfigDict(extra='forbid'),
         **mapped_list_field
     )
-    
+
     # Here is an example of how this would look like inside config.yaml
     # name: pathlinker
     # include: true
diff --git a/spras/config/container_schema.py b/spras/config/container_schema.py
index ea9881a30..c88692678 100644
--- a/spras/config/container_schema.py
+++ b/spras/config/container_schema.py
@@ -6,10 +6,11 @@
 this subsection of the configuration.
 """
 
+import warnings
 from dataclasses import dataclass
-from pydantic import BaseModel, ConfigDict, Field
 from typing import Optional
-import warnings
+
+from pydantic import BaseModel, ConfigDict, Field
 
 from spras.config.util import CaseInsensitiveEnum
 
@@ -60,7 +61,7 @@ def from_container_settings(settings: ContainerSettings, default_hash_length: in
         container_prefix = DEFAULT_CONTAINER_PREFIX
         if settings.registry and settings.registry.base_url != "" and settings.registry.owner != "":
             container_prefix = settings.registry.base_url + "/" + settings.registry.owner
-        
+
         return ProcessedContainerSettings(
             framework=container_framework,
             unpack_singularity=unpack_singularity,
diff --git a/spras/config/schema.py b/spras/config/schema.py
index fc502b677..b2ff0b3bd 100644
--- a/spras/config/schema.py
+++ b/spras/config/schema.py
@@ -19,6 +19,7 @@
 from spras.config.container_schema import ContainerSettings
 from spras.config.util import CaseInsensitiveEnum
 
+
 class SummaryAnalysis(BaseModel):
     include: bool
 
diff --git a/spras/config/util.py b/spras/config/util.py
index 0ed99a26e..63799e478 100644
--- a/spras/config/util.py
+++ b/spras/config/util.py
@@ -4,8 +4,8 @@
 only import this config file.
 """
 
-from enum import Enum
 import time
+from enum import Enum
 from typing import Any
 
 from pydantic import BaseModel, ConfigDict, Field
diff --git a/spras/domino.py b/spras/domino.py
index 521f89722..a45a445a2 100644
--- a/spras/domino.py
+++ b/spras/domino.py
@@ -5,8 +5,8 @@
 import pandas as pd
 from pydantic import ConfigDict
 
-from spras.containers import prepare_volume, run_container_and_log
 from spras.config.util import NondeterministicModel
+from spras.containers import prepare_volume, run_container_and_log
 from spras.interactome import (
     add_constant,
     reinsert_direction_col_undirected,
diff --git a/spras/omicsintegrator1.py b/spras/omicsintegrator1.py
index 1f33c25f7..d9ee603fb 100644
--- a/spras/omicsintegrator1.py
+++ b/spras/omicsintegrator1.py
@@ -1,5 +1,5 @@
-from pathlib import Path
 import time
+from pathlib import Path
 from typing import Optional
 
 from pydantic import BaseModel, ConfigDict, Field
diff --git a/spras/runner.py b/spras/runner.py
index 4f603f9b9..209a32f42 100644
--- a/spras/runner.py
+++ b/spras/runner.py
@@ -5,8 +5,8 @@
 # supported algorithm imports
 from spras.allpairs import AllPairs
 from spras.btb import BowTieBuilder
-from spras.dataset import Dataset
 from spras.config.util import Empty
+from spras.dataset import Dataset
 from spras.domino import DOMINO, DominoParams
 from spras.meo import MEO, MEOParams
 from spras.mincostflow import MinCostFlow, MinCostFlowParams
diff --git a/test/test_config.py b/test/test_config.py
index b0031d029..71842c2e1 100644
--- a/test/test_config.py
+++ b/test/test_config.py
@@ -4,8 +4,8 @@
 import pytest
 
 import spras.config.config as config
-from spras.config.schema import DEFAULT_HASH_LENGTH
 from spras.config.container_schema import DEFAULT_CONTAINER_PREFIX
+from spras.config.schema import DEFAULT_HASH_LENGTH
 
 filler_dataset_data: dict[str, str | list[str]] = {
     "data_dir": "fake",

From 0bcd1d15ae03e5cfb4b1a0398d64585b713bb7b5 Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Tue, 15 Jul 2025 20:29:01 +0000
Subject: [PATCH 57/60] fix: coerce fields to validate default

---
 spras/config/algorithms.py | 16 +++++++++++-----
 test/test_config.py        |  6 +++---
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/spras/config/algorithms.py b/spras/config/algorithms.py
index 8c49c2ae2..c65ddae8a 100644
--- a/spras/config/algorithms.py
+++ b/spras/config/algorithms.py
@@ -4,6 +4,7 @@
 and rather mainly contains validators and lower-level pydantic code.
 """
 import ast
+import copy
 from typing import Annotated, Any, Callable, Literal, Optional, Union, cast, get_args
 
 import numpy as np
@@ -98,8 +99,14 @@ def construct_algorithm_model(name: str, model: type[BaseModel], model_default:
 
     # Map our fields to a list (assuming we have no nested keys),
     # and specify our user convenience validators
-    mapped_list_field: dict[str, Annotated] = {
-        name: (Annotated[
+    mapped_list_field: dict[str, Annotated] = dict()
+    for field_name, field in model.model_fields.items():
+        # We need to create a copy of the field,
+        # as we need to make sure that it gets mapped to the list coerced version of the field.
+        new_field = copy.deepcopy(field)
+        new_field.validate_default = True
+
+        mapped_list_field[field_name] = (Annotated[
             list[field.annotation],
             # This order isn't arbitrary.
             # https://docs.pydantic.dev/latest/concepts/validators/#ordering-of-validators
@@ -111,9 +118,8 @@ def construct_algorithm_model(name: str, model: type[BaseModel], model_default:
                 # json_schema_input_type (sensibly) overwrites, so we have to specify the entire union again here.
                 json_schema_input_type=Union[field.annotation, list[field.annotation], str]
             ) if is_numpy_friendly(field.annotation) else None
-        ], field) for name, field in model.model_fields.items()
-    }
-
+        ], new_field)
+    
     # Runtime assertion check: mapped_list_field does not contain any `__-prefixed` fields
     for key in mapped_list_field.keys():
         assert not key.startswith("__"), f"A private key has been passed from {name}'s argument schema. " + \
diff --git a/test/test_config.py b/test/test_config.py
index 71842c2e1..e38272f94 100644
--- a/test/test_config.py
+++ b/test/test_config.py
@@ -61,10 +61,10 @@ def get_test_config():
                 "name": "omicsintegrator2",
                 "include": True,
                 "runs": {
-                    "strings": {"dummyMode": ["terminals", "others"], "b": 1},
+                    "strings": {"dummy_mode": ["terminals", "others"], "b": 1},
                     # spacing in np.linspace is on purpose
-                    "singleton_string_np_linspace": {"dummyMode": "terminals", "b": "np.linspace(0,    5,2)"},
-                    "str_array_np_logspace": {"test": ["others", "all"], "g": "np.logspace(1,1)"}
+                    "singleton_string_np_linspace": {"dummy_mode": "terminals", "b": "np.linspace(0,    5,2)"},
+                    "str_array_np_logspace": {"dummy_mode": ["others", "all"], "g": "np.logspace(1,1)"}
                 }
             },
             {

From 1cb5d179a876517f15224aa18aba7e7e719cc9de Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Tue, 15 Jul 2025 21:55:57 +0000
Subject: [PATCH 58/60] fix: test

---
 config/config.yaml         |   3 +-
 config/egfr.yaml           |  57 ++++++------------
 config/schema.json         |   4 +-
 spras/config/algorithms.py |   7 ++-
 spras/config/config.py     |   7 ++-
 test/test_config.py        | 120 +++++++++++++++++++++++++++++--------
 6 files changed, 126 insertions(+), 72 deletions(-)

diff --git a/config/config.yaml b/config/config.yaml
index 49ae31f4f..30b438390 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -96,8 +96,7 @@ algorithms:
 
   - name: "domino"
     include: true
-    params:
-      include: true
+    runs:
       run1:
         slice_threshold: 0.3
         module_threshold: 0.05
diff --git a/config/egfr.yaml b/config/egfr.yaml
index 106963c62..363d213a1 100644
--- a/config/egfr.yaml
+++ b/config/egfr.yaml
@@ -25,39 +25,28 @@ algorithms:
           - 0.55
           - 2
           - 10
-        d:
-          - 10
-        g:
-          - 1e-3
-        r:
-          - 0.01
-        w:
-          - 0.1
-        mu:
-          - 0.008
+        d: 10
+        g: 1e-3
+        r: 0.01
+        w: 0.1
+        mu: 0.008
         dummy_mode: ["file"]
   - name: omicsintegrator2
     include: true
     runs:
       run1:
-        b:
-          - 4
-        g:
-          - 0
+        b: 4
+        g: 0
       run2:
-        b:
-          - 2
-        g:
-          - 3
+        b: 2
+        g: 3
   - name: meo
     include: true
     runs:
       run1:
         local_search: true
-        max_path_length:
-          - 3
-        rand_restarts:
-          - 10
+        max_path_length: 3
+        rand_restarts: 10
       run2:
         local_search: false
         max_path_length: 2
@@ -68,28 +57,20 @@ algorithms:
     include: true
     runs:
       run1:
-        slice_threshold:
-          - 0.3
-        module_threshold:
-          - 0.05
+        slice_threshold: 0.3
+        module_threshold: 0.05
   - name: mincostflow
     include: true
     runs:
       run1:
-        capacity:
-          - 15
-        flow:
-          - 80
+        capacity: 15
+        flow: 80
       run2:
-        capacity:
-          - 1
-        flow:
-          - 6
+        capacity: 1
+        flow: 6
       run3:
-        capacity:
-          - 5
-        flow:
-          - 60
+        capacity: 5
+        flow: 60
 datasets:
   - data_dir: input
     edge_files:
diff --git a/config/schema.json b/config/schema.json
index c15dcaa8a..494736275 100644
--- a/config/schema.json
+++ b/config/schema.json
@@ -399,7 +399,7 @@
           },
           "default": {
             "default": {
-              "_time": 1752606304.38952,
+              "_time": 1752611437.804319,
               "module_threshold": null,
               "slice_threshold": null
             }
@@ -1086,7 +1086,7 @@
               "noisy_edges": null,
               "random_terminals": null,
               "dummy_mode": null,
-              "seed": 1752606304389
+              "seed": 1752611437804
             }
           },
           "title": "Runs",
diff --git a/spras/config/algorithms.py b/spras/config/algorithms.py
index c65ddae8a..889efab35 100644
--- a/spras/config/algorithms.py
+++ b/spras/config/algorithms.py
@@ -47,7 +47,8 @@ def python_evalish_coerce(value: Any) -> Any:
     }
 
     # To do this, we get the AST of our string as an expression
-    value_ast = ast.parse(value, mode='eval', filename='config.yaml')
+    # (filename='<string>' is to make the error message more closely resemble that of eval.)
+    value_ast = ast.parse(value, mode='eval', filename='<string>')
 
     # Then we do some light parsing - we're only looking to do some literal evaluation
     # (allowing light python notation) and some basic function parsing. Full python programs
@@ -60,7 +61,7 @@ def python_evalish_coerce(value: Any) -> Any:
     # We get the function name back as a string
     function_name = ast.unparse(value_ast.body.func)
 
-    # and we use the (non-availability) safe `ast.literal_eval` to support light expressions.
+    # and we use the (non-availability) safe `ast.literal_eval` to support literals passed into functions.
     arguments = [ast.literal_eval(arg) for arg in value_ast.body.args]
 
     if function_name not in functions_dict:
@@ -119,7 +120,7 @@ def construct_algorithm_model(name: str, model: type[BaseModel], model_default:
                 json_schema_input_type=Union[field.annotation, list[field.annotation], str]
             ) if is_numpy_friendly(field.annotation) else None
         ], new_field)
-    
+
     # Runtime assertion check: mapped_list_field does not contain any `__-prefixed` fields
     for key in mapped_list_field.keys():
         assert not key.startswith("__"), f"A private key has been passed from {name}'s argument schema. " + \
diff --git a/spras/config/config.py b/spras/config/config.py
index 6eeb760a7..2c0499fb7 100644
--- a/spras/config/config.py
+++ b/spras/config/config.py
@@ -79,7 +79,7 @@ def __init__(self, raw_config: dict[str, Any]):
         self.algorithms = None
         # A nested dict mapping algorithm names to dicts that map parameter hashes to parameter combinations.
         # Only includes algorithms that are set to be run with 'include: true'.
-        self.algorithm_params = None
+        self.algorithm_params: dict[str, dict[str, Any]] = dict()
         # Deprecated. Previously a dict mapping algorithm names to a Boolean tracking whether they used directed graphs.
         self.algorithm_directed = None
         # A dict with the analysis settings
@@ -196,6 +196,11 @@ def process_algorithms(self, raw_config: RawConfig):
                     if params_hash in prior_params_hashes:
                         raise ValueError(f'Parameter hash collision detected. Increase the hash_length in the config file '
                                         f'(current length {self.hash_length}).')
+
+                    # We preserve the run name as it carries useful information for the parameter log,
+                    # and is useful for testing.
+                    run_dict["_spras_run_name"] = run_name
+
                     self.algorithm_params[alg.name][params_hash] = run_dict
 
     def process_analysis(self, raw_config: RawConfig):
diff --git a/test/test_config.py b/test/test_config.py
index e38272f94..3d8d67d78 100644
--- a/test/test_config.py
+++ b/test/test_config.py
@@ -1,11 +1,17 @@
+import copy
 import pickle
+from typing import Iterable
 
 import numpy as np
 import pytest
+from pydantic import BaseModel
 
 import spras.config.config as config
 from spras.config.container_schema import DEFAULT_CONTAINER_PREFIX
 from spras.config.schema import DEFAULT_HASH_LENGTH
+from spras.meo import MEOParams
+from spras.mincostflow import MinCostFlowParams
+from spras.omicsintegrator2 import DummyMode, OmicsIntegrator2Params
 
 filler_dataset_data: dict[str, str | list[str]] = {
     "data_dir": "fake",
@@ -61,9 +67,9 @@ def get_test_config():
                 "name": "omicsintegrator2",
                 "include": True,
                 "runs": {
-                    "strings": {"dummy_mode": ["terminals", "others"], "b": 1},
+                    "strings": {"dummy_mode": ["terminals", "others"], "b": 3},
                     # spacing in np.linspace is on purpose
-                    "singleton_string_np_linspace": {"dummy_mode": "terminals", "b": "np.linspace(0,    5,2)"},
+                    "singleton_string_np_linspace": {"dummy_mode": "terminals", "b": "np.linspace(0,    5,2,)"},
                     "str_array_np_logspace": {"dummy_mode": ["others", "all"], "g": "np.logspace(1,1)"}
                 }
             },
@@ -71,7 +77,8 @@ def get_test_config():
                 "name": "meo",
                 "include": True,
                 "runs": {
-                    "numbersAndBool": {"max_path_length": 1, "rand_restarts": [float(2.0), 3], "local_search": True},
+                    "numbersAndBoolsDuplicate": {"max_path_length": 1, "rand_restarts": [float(2.0), 3], "local_search": [True, False]},
+                    "numbersAndBool": {"max_path_length": 2, "rand_restarts": [float(2.0), 3], "local_search": [True]},
                     "numbersAndBools": {"max_path_length": 1, "rand_restarts": [float(2.0), 3], "local_search": [True, False]},
                     "boolArrTest": {"local_search": [True, False], "max_path_length": "range(1, 3)"}
                 }
@@ -80,7 +87,7 @@ def get_test_config():
                 "name": "mincostflow",
                 "include": True,
                 "runs": {
-                    "int64artifact": {"flow": "np.arange(5,6)", "capacity": [2, 3]}
+                    "int64artifact": {"flow": "np.arange(5, 7)", "capacity": [2, 3]}
                 }
             },
         ],
@@ -104,22 +111,49 @@ def get_test_config():
 
     return test_raw_config
 
-def value_test_util(name: str, configurations: list):
-    assert name in config.config.algorithm_params, f"{name} isn't a present algorithm configuration!"
-
-    keys = config.config.algorithm_params[name]
-    values = [config.config.algorithm_params[name][key] for key in keys]
+def value_test_util(alg: str, run_name: str, param_type: type[BaseModel], configurations: Iterable[BaseModel]):
+    """
+    Utility test function to be able to test against certain named runs
+    under algorithms. This is, unfortunately, a very holistic function that depends
+    on the current state of how config parsing is.
+    """
+    assert alg in config.config.algorithm_params, f"{alg} isn't a present algorithm name!"
+    runs = config.config.algorithm_params[alg]
+    # Filter using the internal _spras_run_name key.
+    runs = {hash: params for hash, params in runs.items() if params["_spras_run_name"] == run_name}
+
+    # We copy values so we don't mutate it
+    values: list[dict] = copy.deepcopy(list(runs.values()))
+    for value in values:
+        # then, remove the internal key for easy comparison.
+        del value["_spras_run_name"]
+
+    # Since configurations is a bunch of objects, we need to turn those into dictionaries
+    # and exclude their defaults.
+    new_configurations = [config.model_dump(exclude_defaults=True) for config in configurations]
+
+    # Same for values, but we reserialize them first
+    values = [param_type.model_validate(value).model_dump(exclude_defaults=True) for value in values]
+
+    # Now, we need to also remove any dynamic values from values and configurations
+    # (_time and seeded values)
+    for value in values:
+        value.pop("_time", None)
+        value.pop("seed", None)
+    for configuration in new_configurations:
+        configuration.pop("_time", None)
+        configuration.pop("seed", None)
 
     # https://stackoverflow.com/a/50486270/7589775
     # Note: We use pickle as we also compare dictionaries in these two sets - some kind of consistent total ordering
     # is required for the tests to consistently pass when comparing them to `configurations`.
-    set_values = set(tuple(sorted(d.items())) for d in sorted(values, key=lambda x: pickle.dumps(x, protocol=3)))
-    set_configurations = set(tuple(sorted(d.items())) for d in sorted(configurations, key=lambda x: pickle.dumps(x, protocol=3)))
+    final_values = sorted(tuple(sorted(d.items())) for d in sorted(values, key=lambda x: pickle.dumps(x, protocol=3)))
+    final_configurations = sorted(tuple(sorted(d.items())) for d in sorted(new_configurations, key=lambda x: pickle.dumps(x, protocol=3)))
 
-    if set_values != set_configurations:
-        print(f'Got: {set_values}')
-        print(f'Expected: {set_configurations}')
-        assert set_values == set_configurations
+    if final_values != final_configurations:
+        print(f'Got: {final_values}')
+        print(f'Expected: {final_configurations}')
+        assert final_values == final_configurations
 
 class TestConfig:
     """
@@ -225,17 +259,51 @@ def test_config_values(self):
         test_config = get_test_config()
         config.init_global(test_config)
 
-        value_test_util('strings', [{'test': "str1", 'test2': "str2"}, {'test': 'str1', 'test2': 'str3'}])
-        value_test_util('numbersAndBools', [{'a': 1, 'b': float(2.0), 'c': 4, 'd': 5.6, 'f': False}, {'a': 1, 'b': 3, 'c': 4, 'd': 5.6, 'f': False}])
-
-        value_test_util('singleton_int64_with_array', [{'test': 1, 'test2': 2}, {'test': 1, 'test2': 3}])
-        value_test_util('singleton_string_np_linspace', [{'test': "str1", 'test2': 5.0}, {'test': "str1", 'test2': 0.0}])
-        value_test_util('str_array_np_logspace', [{'test': "a", 'test2': 10}] * 10 + [{'test': "b", 'test2': 10}] * 10)
-
-        value_test_util('int64artifact', [{'test': 5, 'test2': 2}, {'test': 5, 'test2': 3}])
-
-        value_test_util('boolArrTest', [{'flags': True, 'range': 1}, {'flags': False, 'range': 2},
-                                     {'flags': False, 'range': 1}, {'flags': True, 'range': 2}])
+        value_test_util('omicsintegrator2', 'strings', OmicsIntegrator2Params, [
+            OmicsIntegrator2Params(dummy_mode=DummyMode.terminals, b=3),
+            OmicsIntegrator2Params(dummy_mode=DummyMode.others, b=3)
+        ])
+
+        value_test_util('omicsintegrator2', 'singleton_string_np_linspace', OmicsIntegrator2Params, [
+            OmicsIntegrator2Params(dummy_mode=DummyMode.terminals, b=5.0),
+            OmicsIntegrator2Params(dummy_mode=DummyMode.terminals, b=0.0)
+        ])
+
+        value_test_util('omicsintegrator2', 'str_array_np_logspace', OmicsIntegrator2Params, [
+            # While these both repeat 50 times, parameter hash makes sure to not duplicate the work.
+            # This serves as a test to make sure _time isn't inserted during parameter combinations.
+            OmicsIntegrator2Params(dummy_mode=DummyMode.others, g=10), OmicsIntegrator2Params(dummy_mode=DummyMode.all, g=10)
+        ])
+
+        value_test_util('meo', 'numbersAndBools', MEOParams, [
+            MEOParams(max_path_length=1, rand_restarts=2, local_search=False),
+            MEOParams(max_path_length=1, rand_restarts=2, local_search=True),
+            MEOParams(max_path_length=1, rand_restarts=3, local_search=False),
+            MEOParams(max_path_length=1, rand_restarts=3, local_search=True),
+        ])
+
+        # Encoding this behavior: run names are not passed into the parameter hash,
+        # and thus won't duplicate runs.
+        value_test_util('meo', 'numbersAndBoolsDuplicate', MEOParams, [])
+
+        value_test_util('meo', 'numbersAndBool', MEOParams, [
+            MEOParams(max_path_length=2, rand_restarts=2, local_search=True),
+            MEOParams(max_path_length=2, rand_restarts=3, local_search=True),
+        ])
+
+        value_test_util('mincostflow', 'int64artifact', MinCostFlowParams, [
+            MinCostFlowParams(flow=5, capacity=2),
+            MinCostFlowParams(flow=5, capacity=3),
+            MinCostFlowParams(flow=6, capacity=2),
+            MinCostFlowParams(flow=6, capacity=3)
+        ])
+
+        value_test_util('meo', 'boolArrTest', MEOParams, [
+            MEOParams(local_search=True, max_path_length=1),
+            MEOParams(local_search=True, max_path_length=2),
+            MEOParams(local_search=False, max_path_length=1),
+            MEOParams(local_search=False, max_path_length=2)
+        ])
 
     @pytest.mark.parametrize("ml_include, eval_include, expected_ml, expected_eval", [
         (True, True, True, True),

From c93244ff32dddff416d7f21c838b28fef4ed9cc9 Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Tue, 15 Jul 2025 22:11:56 +0000
Subject: [PATCH 59/60] fix: correct all algorithm usage

---
 spras/allpairs.py         |  3 ++-
 spras/btb.py              |  3 ++-
 spras/domino.py           | 23 ++++++++++++-----------
 spras/meo.py              | 26 ++++++++++++++------------
 spras/mincostflow.py      | 19 ++++++++++---------
 spras/omicsintegrator1.py | 16 +++++++++-------
 spras/omicsintegrator2.py | 17 +++++++++--------
 spras/pathlinker.py       | 17 +++++++++--------
 spras/rwr.py              | 16 +++++++++-------
 spras/strwr.py            | 18 ++++++++++--------
 10 files changed, 86 insertions(+), 72 deletions(-)

diff --git a/spras/allpairs.py b/spras/allpairs.py
index bba5df467..5c1476e8a 100644
--- a/spras/allpairs.py
+++ b/spras/allpairs.py
@@ -73,7 +73,8 @@ def generate_inputs(data: Dataset, filename_map):
                                       header=["#Interactor1", "Interactor2", "Weight"])
 
     @staticmethod
-    def run(inputs, output_file, args=None, container_settings=ProcessedContainerSettings()):
+    def run(inputs, output_file, args=None, container_settings=None):
+        if not container_settings: container_settings = ProcessedContainerSettings()
         if not inputs["nodetypes"] or not inputs["network"] or not inputs["directed_flag"]:
             raise ValueError('Required All Pairs Shortest Paths arguments are missing')
 
diff --git a/spras/btb.py b/spras/btb.py
index 7f7a1b944..16bce75ae 100644
--- a/spras/btb.py
+++ b/spras/btb.py
@@ -66,7 +66,8 @@ def generate_inputs(data, filename_map):
 
     # Skips parameter validation step
     @staticmethod
-    def run(inputs, output_file, args=None, container_settings=ProcessedContainerSettings()):
+    def run(inputs, output_file, args=None, container_settings=None):
+        if not container_settings: container_settings = ProcessedContainerSettings()
         # Tests for pytest (docker container also runs this)
         # Testing out here avoids the trouble that container errors provide
 
diff --git a/spras/domino.py b/spras/domino.py
index a45a445a2..d3d761e1f 100644
--- a/spras/domino.py
+++ b/spras/domino.py
@@ -5,6 +5,7 @@
 import pandas as pd
 from pydantic import ConfigDict
 
+from spras.config.container_schema import ProcessedContainerSettings
 from spras.config.util import NondeterministicModel
 from spras.containers import prepare_volume, run_container_and_log
 from spras.interactome import (
@@ -77,9 +78,9 @@ def generate_inputs(data, filename_map):
                         header=['ID_interactor_A', 'ppi', 'ID_interactor_B'])
 
     @staticmethod
-    def run(inputs, output_file, args=None, container_framework="docker"):
-        if not args:
-            args = DominoParams()
+    def run(inputs, output_file, args=None, container_settings=None):
+        if not container_settings: container_settings = ProcessedContainerSettings()
+        if not args: args = DominoParams()
 
         # Let visualization be always true, parallelization be always 1 thread, and use_cache be always false.
         if not inputs["network"] or not inputs["active_genes"]:
@@ -90,19 +91,19 @@ def run(inputs, output_file, args=None, container_framework="docker"):
         # Each volume is a tuple (source, destination)
         volumes = list()
 
-        bind_path, network_file = prepare_volume(inputs["network"], work_dir)
+        bind_path, network_file = prepare_volume(inputs["network"], work_dir, container_settings)
         volumes.append(bind_path)
 
-        bind_path, node_file = prepare_volume(inputs["active_genes"], work_dir)
+        bind_path, node_file = prepare_volume(inputs["active_genes"], work_dir, container_settings)
         volumes.append(bind_path)
 
         out_dir = Path(output_file).parent
         out_dir.mkdir(parents=True, exist_ok=True)
-        bind_path, mapped_out_dir = prepare_volume(str(out_dir), work_dir)
+        bind_path, mapped_out_dir = prepare_volume(str(out_dir), work_dir, container_settings)
         volumes.append(bind_path)
 
         slices_file = Path(out_dir, 'slices.txt')
-        bind_path, mapped_slices_file = prepare_volume(str(slices_file), work_dir)
+        bind_path, mapped_slices_file = prepare_volume(str(slices_file), work_dir, container_settings)
         volumes.append(bind_path)
 
         # Make the Python command to run within the container
@@ -112,11 +113,11 @@ def run(inputs, output_file, args=None, container_framework="docker"):
 
         container_suffix = "domino"
         run_container_and_log('slicer',
-                             container_framework,
                              container_suffix,
                              slicer_command,
                              volumes,
-                             work_dir)
+                             work_dir,
+                             container_settings)
 
         # Make the Python command to run within the container
         domino_command = ['domino',
@@ -136,11 +137,11 @@ def run(inputs, output_file, args=None, container_framework="docker"):
             domino_command.extend(['--module_threshold', str(args.module_threshold)])
 
         run_container_and_log('DOMINO',
-                             container_framework,
                              container_suffix,
                              domino_command,
                              volumes,
-                             work_dir)
+                             work_dir,
+                             container_settings)
 
         # DOMINO creates a new folder in out_dir to output its modules HTML files into called active_genes
         # The filename is determined by the input active_genes and cannot be configured
diff --git a/spras/meo.py b/spras/meo.py
index 4b3f9299e..b3b8a5973 100644
--- a/spras/meo.py
+++ b/spras/meo.py
@@ -4,6 +4,7 @@
 
 from pydantic import BaseModel, ConfigDict
 
+from spras.config.container_schema import ProcessedContainerSettings
 from spras.containers import prepare_volume, run_container_and_log
 from spras.interactome import (
     add_directionality_constant,
@@ -145,7 +146,7 @@ def generate_inputs(data, filename_map):
     # TODO add parameter validation
     # TODO document required arguments
     @staticmethod
-    def run(inputs, output_file=None, args=None, container_framework="docker"):
+    def run(inputs, output_file=None, args=None, container_settings=None):
         """
         Run Maximum Edge Orientation in the Docker image with the provided parameters.
         The properties file is generated from the provided arguments.
@@ -154,8 +155,8 @@ def run(inputs, output_file=None, args=None, container_framework="docker"):
         Only the edge output file is retained.
         All other output files are deleted.
         """
-        if not args:
-            args = MEOParams()
+        if not container_settings: container_settings = ProcessedContainerSettings()
+        if not args: args = MEOParams()
 
         if inputs["edges"] is None or inputs["sources"] is None or inputs["targets"] is None:
             raise ValueError('Required Maximum Edge Orientation arguments are missing')
@@ -165,44 +166,45 @@ def run(inputs, output_file=None, args=None, container_framework="docker"):
         # Each volume is a tuple (src, dest)
         volumes = list()
 
-        bind_path, edge_file = prepare_volume(inputs["edges"], work_dir)
+        bind_path, edge_file = prepare_volume(inputs["edges"], work_dir, container_settings)
         volumes.append(bind_path)
 
-        bind_path, source_file = prepare_volume(inputs["sources"], work_dir)
+        bind_path, source_file = prepare_volume(inputs["sources"], work_dir, container_settings)
         volumes.append(bind_path)
 
-        bind_path, target_file = prepare_volume(inputs["targets"], work_dir)
+        bind_path, target_file = prepare_volume(inputs["targets"], work_dir, container_settings)
         volumes.append(bind_path)
 
         out_dir = Path(output_file).parent
         # Maximum Edge Orientation requires that the output directory exist
         out_dir.mkdir(parents=True, exist_ok=True)
 
-        bind_path, mapped_output_file = prepare_volume(str(output_file), work_dir)
+        bind_path, mapped_output_file = prepare_volume(str(output_file), work_dir, container_settings)
         volumes.append(bind_path)
 
         # Hard code the path output filename, which will be deleted
         path_output_file = Path(out_dir, 'path-output.txt')
-        bind_path, mapped_path_output = prepare_volume(str(path_output_file), work_dir)
+        bind_path, mapped_path_output = prepare_volume(str(path_output_file), work_dir, container_settings)
         volumes.append(bind_path)
 
         properties_file = 'meo-properties.txt'
         properties_file_local = Path(out_dir, properties_file)
         write_properties(filename=properties_file_local, edges=edge_file, sources=source_file, targets=target_file,
                          edge_output=mapped_output_file, path_output=mapped_path_output,
-                         max_path_length=args.max_path_length, local_search=args.local_search, rand_restarts=args.rand_restarts, framework=container_framework)
-        bind_path, properties_file = prepare_volume(str(properties_file_local), work_dir)
+                         max_path_length=args.max_path_length, local_search=args.local_search, rand_restarts=args.rand_restarts,
+                         framework=container_settings.framework)
+        bind_path, properties_file = prepare_volume(str(properties_file_local), work_dir, container_settings)
         volumes.append(bind_path)
 
         command = ['java', '-jar', '/meo/EOMain.jar', properties_file]
 
         container_suffix = "meo"
         run_container_and_log('Maximum Edge Orientation',
-                             container_framework,
                              container_suffix,
                              command,
                              volumes,
-                             work_dir)
+                             work_dir,
+                             container_settings)
 
         properties_file_local.unlink(missing_ok=True)
 
diff --git a/spras/mincostflow.py b/spras/mincostflow.py
index 1f7ff0cf7..05dd22bf5 100644
--- a/spras/mincostflow.py
+++ b/spras/mincostflow.py
@@ -3,6 +3,7 @@
 
 from pydantic import BaseModel, ConfigDict
 
+from spras.config.container_schema import ProcessedContainerSettings
 from spras.containers import prepare_volume, run_container_and_log
 from spras.interactome import (
     convert_undirected_to_directed,
@@ -72,9 +73,9 @@ def generate_inputs(data, filename_map):
                      header=False)
 
     @staticmethod
-    def run(inputs, output_file, args=None, container_framework="docker"):
-        if not args:
-            args = MinCostFlowParams()
+    def run(inputs, output_file, args=None, container_settings=None):
+        if not container_settings: container_settings = ProcessedContainerSettings()
+        if not args: args = MinCostFlowParams()
 
         # ensures that these parameters are required
         if not inputs["sources"] or not inputs["targets"] or not inputs["edges"]:
@@ -86,19 +87,19 @@ def run(inputs, output_file, args=None, container_framework="docker"):
         # the tuple is for mapping the sources, targets, edges, and output
         volumes = list()
 
-        bind_path, sources_file = prepare_volume(inputs["sources"], work_dir)
+        bind_path, sources_file = prepare_volume(inputs["sources"], work_dir, container_settings)
         volumes.append(bind_path)
 
-        bind_path, targets_file = prepare_volume(inputs["targets"], work_dir)
+        bind_path, targets_file = prepare_volume(inputs["targets"], work_dir, container_settings)
         volumes.append(bind_path)
 
-        bind_path, edges_file = prepare_volume(inputs["edges"], work_dir)
+        bind_path, edges_file = prepare_volume(inputs["edges"], work_dir, container_settings)
         volumes.append(bind_path)
 
         # Create a prefix for the output filename and ensure the directory exists
         out_dir = Path(output_file).parent
         out_dir.mkdir(parents=True, exist_ok=True)
-        bind_path, mapped_out_dir = prepare_volume(str(out_dir), work_dir)
+        bind_path, mapped_out_dir = prepare_volume(str(out_dir), work_dir, container_settings)
         volumes.append(bind_path)
         mapped_out_prefix = mapped_out_dir + '/out'
 
@@ -121,11 +122,11 @@ def run(inputs, output_file, args=None, container_framework="docker"):
 
         # constructs a docker run call
         run_container_and_log('MinCostFlow',
-                             container_framework,
                              container_suffix,
                              command,
                              volumes,
-                             work_dir)
+                             work_dir,
+                             container_settings)
 
         # Check the output of the container
         out_dir_content = sorted(out_dir.glob('*.sif'))
diff --git a/spras/omicsintegrator1.py b/spras/omicsintegrator1.py
index d9ee603fb..9d1396902 100644
--- a/spras/omicsintegrator1.py
+++ b/spras/omicsintegrator1.py
@@ -4,6 +4,7 @@
 
 from pydantic import BaseModel, ConfigDict, Field
 
+from spras.config.container_schema import ProcessedContainerSettings
 from spras.containers import prepare_volume, run_container_and_log
 from spras.interactome import reinsert_direction_col_mixed
 from spras.prm import PRM
@@ -142,7 +143,8 @@ def generate_inputs(data, filename_map):
     # TODO add support for knockout argument
     # TODO add reasonable default values
     @staticmethod
-    def run(inputs, output_file, args, container_framework="docker"):
+    def run(inputs, output_file, args, container_settings=None):
+        if not container_settings: container_settings = ProcessedContainerSettings()
         if inputs["edges"] is None or inputs["prizes"] is None or output_file is None:
             raise ValueError('Required Omics Integrator 1 arguments are missing')
 
@@ -151,10 +153,10 @@ def run(inputs, output_file, args, container_framework="docker"):
         # Each volume is a tuple (src, dest)
         volumes = list()
 
-        bind_path, edge_file = prepare_volume(inputs["edges"], work_dir)
+        bind_path, edge_file = prepare_volume(inputs["edges"], work_dir, container_settings)
         volumes.append(bind_path)
 
-        bind_path, prize_file = prepare_volume(inputs["prizes"], work_dir)
+        bind_path, prize_file = prepare_volume(inputs["prizes"], work_dir, container_settings)
         volumes.append(bind_path)
 
         # 4 dummy mode possibilities:
@@ -167,13 +169,13 @@ def run(inputs, output_file, args, container_framework="docker"):
         if args.dummy_mode == 'file':
             if inputs["dummy_nodes"] is None:
                 raise ValueError("dummy_nodes file is required when dummy_mode is set to 'file'")
-            bind_path, dummy_file = prepare_volume(inputs["dummy_nodes"], work_dir)
+            bind_path, dummy_file = prepare_volume(inputs["dummy_nodes"], work_dir, container_settings)
             volumes.append(bind_path)
 
         out_dir = Path(output_file).parent
         # Omics Integrator 1 requires that the output directory exist
         out_dir.mkdir(parents=True, exist_ok=True)
-        bind_path, mapped_out_dir = prepare_volume(str(out_dir), work_dir)
+        bind_path, mapped_out_dir = prepare_volume(str(out_dir), work_dir, container_settings)
         volumes.append(bind_path)
 
         conf_file = 'oi1-configuration.txt'
@@ -181,7 +183,7 @@ def run(inputs, output_file, args, container_framework="docker"):
         # Temporary file that will be deleted after running Omics Integrator 1
         write_conf(conf_file_local, w=args.w, b=args.b, d=args.d, mu=args.mu,
                    noise=args.noise, g=args.g, r=args.r)
-        bind_path, conf_file = prepare_volume(str(conf_file_local), work_dir)
+        bind_path, conf_file = prepare_volume(str(conf_file_local), work_dir, container_settings)
         volumes.append(bind_path)
 
         command = ['python', '/OmicsIntegrator/scripts/forest.py',
@@ -213,11 +215,11 @@ def run(inputs, output_file, args, container_framework="docker"):
 
         container_suffix = "omics-integrator-1:no-conda" # no-conda version is the default
         run_container_and_log('Omics Integrator 1',
-                             container_framework,
                              container_suffix,  # no-conda version is the default
                              command,
                              volumes,
                              work_dir,
+                             container_settings,
                              {'TMPDIR': mapped_out_dir})
 
         conf_file_local.unlink(missing_ok=True)
diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py
index aef4f3c48..8b5c29799 100644
--- a/spras/omicsintegrator2.py
+++ b/spras/omicsintegrator2.py
@@ -5,6 +5,7 @@
 import pandas as pd
 from pydantic import BaseModel, ConfigDict, Field
 
+from spras.config.container_schema import ProcessedContainerSettings
 from spras.config.util import CaseInsensitiveEnum
 from spras.containers import prepare_volume, run_container_and_log
 from spras.dataset import Dataset
@@ -110,7 +111,7 @@ def generate_inputs(data: Dataset, filename_map):
     # TODO add reasonable default values
     # TODO document required arguments
     @staticmethod
-    def run(inputs, output_file, args=None, container_framework="docker"):
+    def run(inputs, output_file, args=None, container_settings=None):
         """
         Run Omics Integrator 2 in the Docker image with the provided parameters.
         Only the .tsv output file is retained and then renamed.
@@ -118,8 +119,8 @@ def run(inputs, output_file, args=None, container_framework="docker"):
         @param output_file: the name of the output file, which will overwrite any existing file with this name
         @param container_framework: choose the container runtime framework, currently supports "docker" or "singularity" (optional)
         """
-        if not args:
-            args = OmicsIntegrator2Params()
+        if not container_settings: container_settings = ProcessedContainerSettings()
+        if not args: args = OmicsIntegrator2Params()
 
         if inputs["edges"] is None or inputs["prizes"] is None:
             raise ValueError('Required Omics Integrator 2 arguments are missing')
@@ -129,16 +130,16 @@ def run(inputs, output_file, args=None, container_framework="docker"):
         # Each volume is a tuple (src, dest)
         volumes = list()
 
-        bind_path, edge_file = prepare_volume(inputs["edges"], work_dir)
+        bind_path, edge_file = prepare_volume(inputs["edges"], work_dir, container_settings)
         volumes.append(bind_path)
 
-        bind_path, prize_file = prepare_volume(inputs["prizes"], work_dir)
+        bind_path, prize_file = prepare_volume(inputs["prizes"], work_dir, container_settings)
         volumes.append(bind_path)
 
         out_dir = Path(output_file).parent
         # Omics Integrator 2 requires that the output directory exist
         out_dir.mkdir(parents=True, exist_ok=True)
-        bind_path, mapped_out_dir = prepare_volume(out_dir, work_dir)
+        bind_path, mapped_out_dir = prepare_volume(out_dir, work_dir, container_settings)
         volumes.append(bind_path)
 
         command = ['OmicsIntegrator', '-e', edge_file, '-p', prize_file,
@@ -164,11 +165,11 @@ def run(inputs, output_file, args=None, container_framework="docker"):
 
         container_suffix = "omics-integrator-2:v2"
         run_container_and_log('Omics Integrator 2',
-                             container_framework,
                              container_suffix,
                              command,
                              volumes,
-                             work_dir)
+                             work_dir,
+                             container_settings)
 
         # TODO do we want to retain other output files?
         # TODO if deleting other output files, write them all to a tmp directory and copy
diff --git a/spras/pathlinker.py b/spras/pathlinker.py
index da0a91ba2..f71015f0e 100644
--- a/spras/pathlinker.py
+++ b/spras/pathlinker.py
@@ -4,6 +4,7 @@
 
 from pydantic import BaseModel, ConfigDict
 
+from spras.config.container_schema import ProcessedContainerSettings
 from spras.containers import prepare_volume, run_container_and_log
 from spras.dataset import Dataset
 from spras.interactome import (
@@ -76,9 +77,9 @@ def generate_inputs(data, filename_map):
                      header=["#Interactor1","Interactor2","Weight"])
 
     @staticmethod
-    def run(inputs, output_file, args=None, container_framework="docker"):
-        if not args:
-            args = PathLinkerParams()
+    def run(inputs, output_file, args=None, container_settings=None):
+        if not container_settings: container_settings = ProcessedContainerSettings()
+        if not args: args = PathLinkerParams()
 
         if not inputs["nodetypes"] or not inputs["network"]:
             raise ValueError('Required PathLinker arguments are missing')
@@ -88,10 +89,10 @@ def run(inputs, output_file, args=None, container_framework="docker"):
         # Each volume is a tuple (src, dest)
         volumes = list()
 
-        bind_path, node_file = prepare_volume(inputs["nodetypes"], work_dir)
+        bind_path, node_file = prepare_volume(inputs["nodetypes"], work_dir, container_settings)
         volumes.append(bind_path)
 
-        bind_path, network_file = prepare_volume(inputs["network"], work_dir)
+        bind_path, network_file = prepare_volume(inputs["network"], work_dir, container_settings)
         volumes.append(bind_path)
 
         # PathLinker does not provide an argument to set the output directory
@@ -99,7 +100,7 @@ def run(inputs, output_file, args=None, container_framework="docker"):
         out_dir = Path(output_file).parent
         # PathLinker requires that the output directory exist
         out_dir.mkdir(parents=True, exist_ok=True)
-        bind_path, mapped_out_dir = prepare_volume(str(out_dir), work_dir)
+        bind_path, mapped_out_dir = prepare_volume(str(out_dir), work_dir, container_settings)
         volumes.append(bind_path)
         mapped_out_prefix = mapped_out_dir + '/out'  # Use posix path inside the container
 
@@ -113,11 +114,11 @@ def run(inputs, output_file, args=None, container_framework="docker"):
 
         container_suffix = "pathlinker:v2"
         run_container_and_log('PathLinker',
-                             container_framework,
                              container_suffix,
                              command,
                              volumes,
-                             work_dir)
+                             work_dir,
+                             container_settings)
 
         # Rename the primary output file to match the desired output filename
         # Currently PathLinker only writes one output file so we do not need to delete others
diff --git a/spras/rwr.py b/spras/rwr.py
index dff5bdb97..a46e734e6 100644
--- a/spras/rwr.py
+++ b/spras/rwr.py
@@ -4,6 +4,7 @@
 import pandas as pd
 from pydantic import BaseModel, ConfigDict
 
+from spras.config.container_schema import ProcessedContainerSettings
 from spras.containers import prepare_volume, run_container
 from spras.dataset import Dataset
 from spras.interactome import reinsert_direction_col_directed
@@ -45,7 +46,8 @@ def generate_inputs(data, filename_map):
         edges.to_csv(filename_map['network'],sep='|',index=False,columns=['Interactor1','Interactor2'],header=False)
 
     @staticmethod
-    def run(inputs, output_file, args, container_framework="docker"):
+    def run(inputs, output_file, args, container_settings=None):
+        if not container_settings: container_settings = ProcessedContainerSettings()
         if not inputs["nodes"] or not inputs["network"]:
             raise ValueError('Required RWR arguments are missing')
 
@@ -60,10 +62,10 @@ def run(inputs, output_file, args, container_framework="docker"):
         # Each volume is a tuple (src, dest)
         volumes = list()
 
-        bind_path, nodes_file = prepare_volume(inputs["nodes"], work_dir)
+        bind_path, nodes_file = prepare_volume(inputs["nodes"], work_dir, container_settings)
         volumes.append(bind_path)
 
-        bind_path, network_file = prepare_volume(inputs["network"], work_dir)
+        bind_path, network_file = prepare_volume(inputs["network"], work_dir, container_settings)
         volumes.append(bind_path)
 
         # RWR does not provide an argument to set the output directory
@@ -71,7 +73,7 @@ def run(inputs, output_file, args, container_framework="docker"):
         out_dir = Path(output_file).parent
         # RWR requires that the output directory exist
         out_dir.mkdir(parents=True, exist_ok=True)
-        bind_path, mapped_out_dir = prepare_volume(str(out_dir), work_dir)
+        bind_path, mapped_out_dir = prepare_volume(str(out_dir), work_dir, container_settings)
         volumes.append(bind_path)
         mapped_out_prefix = mapped_out_dir + "/output.txt"
         command = ['python',
@@ -85,11 +87,11 @@ def run(inputs, output_file, args, container_framework="docker"):
             command.extend(['--alpha', str(args.alpha)])
 
         container_suffix = 'rwr:v1'
-        out = run_container(container_framework,
-                            container_suffix,
+        out = run_container(container_suffix,
                             command,
                             volumes,
-                            work_dir)
+                            work_dir,
+                            container_settings)
 
         print(out)
         # Rename the primary output file to match the desired output filename
diff --git a/spras/strwr.py b/spras/strwr.py
index 1b9159eff..28a76099e 100644
--- a/spras/strwr.py
+++ b/spras/strwr.py
@@ -4,6 +4,7 @@
 import pandas as pd
 from pydantic import BaseModel, ConfigDict
 
+from spras.config.container_schema import ProcessedContainerSettings
 from spras.containers import prepare_volume, run_container
 from spras.dataset import Dataset
 from spras.interactome import reinsert_direction_col_directed
@@ -47,7 +48,8 @@ def generate_inputs(data, filename_map):
         edges.to_csv(filename_map['network'],sep='|',index=False,columns=['Interactor1','Interactor2'],header=False)
 
     @staticmethod
-    def run(inputs, output_file, args, container_framework="docker"):
+    def run(inputs, output_file, args, container_settings=None):
+        if not container_settings: container_settings = ProcessedContainerSettings()
         if not inputs["sources"] or not inputs["targets"] or not inputs["network"] or not output_file:
             raise ValueError('Required local_neighborhood arguments are missing')
 
@@ -63,13 +65,13 @@ def run(inputs, output_file, args, container_framework="docker"):
         # Each volume is a tuple (src, dest)
         volumes = list()
 
-        bind_path, source_file = prepare_volume(inputs["sources"], work_dir)
+        bind_path, source_file = prepare_volume(inputs["sources"], work_dir, container_settings)
         volumes.append(bind_path)
 
-        bind_path, target_file = prepare_volume(inputs["targets"], work_dir)
+        bind_path, target_file = prepare_volume(inputs["targets"], work_dir, container_settings)
         volumes.append(bind_path)
 
-        bind_path, network_file = prepare_volume(inputs["network"], work_dir)
+        bind_path, network_file = prepare_volume(inputs["network"], work_dir, container_settings)
         volumes.append(bind_path)
 
         # ST_RWR does not provide an argument to set the output directory
@@ -77,7 +79,7 @@ def run(inputs, output_file, args, container_framework="docker"):
         out_dir = Path(output_file).parent
         # ST_RWR requires that the output directory exist
         out_dir.mkdir(parents=True, exist_ok=True)
-        bind_path, mapped_out_dir = prepare_volume(str(out_dir), work_dir)
+        bind_path, mapped_out_dir = prepare_volume(str(out_dir), work_dir, container_settings)
         volumes.append(bind_path)
         mapped_out_prefix = mapped_out_dir + "/output.txt"
         command = ['python',
@@ -92,11 +94,11 @@ def run(inputs, output_file, args, container_framework="docker"):
             command.extend(['--alpha', str(args.alpha)])
 
         container_suffix = 'st-rwr:v1'
-        out = run_container(container_framework,
-                            container_suffix,
+        out = run_container(container_suffix,
                             command,
                             volumes,
-                            work_dir)
+                            work_dir,
+                            container_settings)
 
         print(out)
         # Rename the primary output file to match the desired output filename

From 69268f4ca83ee6d9977f12d0124e613df67e0ab1 Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Tue, 15 Jul 2025 23:10:59 +0000
Subject: [PATCH 60/60] chore: talk about resumability

---
 Snakefile              |  2 +-
 spras/config/schema.py | 24 ++++++++++++++++++++----
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/Snakefile b/Snakefile
index 34681cb02..358a83f42 100644
--- a/Snakefile
+++ b/Snakefile
@@ -25,7 +25,7 @@ algorithm_params = _config.config.algorithm_params
 algorithm_directed = _config.config.algorithm_directed
 pca_params = _config.config.pca_params
 hac_params = _config.config.hac_params
-FRAMEWORK = _config.config.container_framework
+FRAMEWORK = _config.config.container_settings.framework
 
 # Return the dataset or gold_standard dictionary from the config file given the label
 def get_dataset(_datasets, label):
diff --git a/spras/config/schema.py b/spras/config/schema.py
index b2ff0b3bd..49100bef3 100644
--- a/spras/config/schema.py
+++ b/spras/config/schema.py
@@ -117,11 +117,27 @@ class ReconstructionSettings(BaseModel):
     model_config = ConfigDict(extra='forbid')
 
 class RawConfig(BaseModel):
+    resume: bool = Field(alias="_resume", default=False)
+    """
+    Declares whether a config has resumability. This is meant to be used internally, as it
+    enforces some extra preconditions on the config (such that all defaults must be explicitly
+    declared within the config, and that it meets the specified hash).
+
+    Unlike their nonresumable counterparts, these resumable configurations will store all configuration
+    defaults (including, most importantly, _time from NondeterministicModel and any seeded values).
+
+    Resumable configurations are generated whenever a non-resumable configuration is run, inside
+    `{output}/resumables/{hash}.yaml`. The timestamp is present only for file ordering, and {hash} is a hash
+    of the configuration _excluding_ default values.
+
+    By default, SPRAS runs through Snakemake will generate a resumable configuration if none is present,
+    or reuse the configuration associated with its hash otherwise.
+    """
+
     containers: ContainerSettings
 
-    hash_length: int = Field(
-        description="The length of the hash used to identify a parameter combination",
-        default=DEFAULT_HASH_LENGTH)
+    hash_length: int = DEFAULT_HASH_LENGTH
+    "The length of the hash used to identify a parameter combination"
 
     # See algorithms.py for more information about AlgorithmUnion
     algorithms: list[AlgorithmUnion] # type: ignore - pydantic allows this.
@@ -131,4 +147,4 @@ class RawConfig(BaseModel):
 
     reconstruction_settings: ReconstructionSettings
 
-    model_config = ConfigDict(extra='forbid')
+    model_config = ConfigDict(extra='forbid', use_attribute_docstrings=True)