Reed-CompBio · tristan-f-r · Jun 16, 2025 · Jun 16, 2025 · Jun 16, 2025 · Jun 16, 2025
diff --git a/.github/workflows/build-containers.yml b/.github/workflows/build-containers.yml
@@ -48,6 +48,11 @@ jobs:
     with:
       path: docker-wrappers/Cytoscape
       container: reedcompbio/py4cytoscape
+  build-and-remove-capdsd:
+    uses: "./.github/workflows/build-and-remove-template.yml"
+    with:
+      path: docker-wrappers/capDSD
+      container: reedcompbio/capdsd
   build-and-remove-spras:
     uses: "./.github/workflows/build-and-remove-template.yml"
     with:

diff --git a/Snakefile b/Snakefile
@@ -6,6 +6,7 @@ from spras.dataset import Dataset
 from spras.evaluation import Evaluation
 from spras.analysis import ml, summary, graphspace, cytoscape
 import spras.config as _config
+from spras.util import extend_filename
 
 # Snakemake updated the behavior in the 6.5.0 release https://github.com/snakemake/snakemake/pull/1037
 # and using the wrong separator prevents Snakemake from matching filenames to the rules that can produce them
@@ -189,7 +190,9 @@ checkpoint prepare_input:
         # Use the algorithm's generate_inputs function to load the merged dataset, extract the relevant columns,
         # and write the output files specified by required_inputs
         # The filename_map provides the output file path for each required input file type
-        filename_map = {input_type: SEP.join([out_dir, 'prepared', f'{wildcards.dataset}-{wildcards.algorithm}-inputs', f'{input_type}.txt']) for input_type in runner.get_required_inputs(wildcards.algorithm)}
+        filename_map = {input_type: SEP.join(
+            [out_dir, 'prepared', f'{wildcards.dataset}-{wildcards.algorithm}-inputs', extend_filename(input_type)]
+        ) for input_type in runner.get_required_inputs(wildcards.algorithm)}
         runner.prepare_inputs(wildcards.algorithm, input.dataset_file, filename_map)
 
 # Collect the prepared input files from the specified directory
@@ -207,7 +210,7 @@ def collect_prepared_input(wildcards):
     prepared_dir = SEP.join([out_dir, 'prepared', f'{wildcards.dataset}-{wildcards.algorithm}-inputs'])
 
     # Construct the list of expected prepared input files for the reconstruction algorithm
-    prepared_inputs = expand(f'{prepared_dir}{SEP}{{type}}.txt',type=runner.get_required_inputs(algorithm=wildcards.algorithm))
+    prepared_inputs = expand(f'{prepared_dir}{SEP}{{type}}',type=map(extend_filename, runner.get_required_inputs(algorithm=wildcards.algorithm)))
     # If the directory is missing, do nothing because the missing output triggers running prepare_input
     if os.path.isdir(prepared_dir):
         # If the directory exists, confirm all prepared input files exist as well (as opposed to some or none)
@@ -238,7 +241,10 @@ rule reconstruct:
         # Create a copy so that the updates are not written to the parameters logfile
         params = reconstruction_params(wildcards.algorithm, wildcards.params).copy()
         # Add the input files
-        params.update(dict(zip(runner.get_required_inputs(wildcards.algorithm), *{input}, strict=True)))
+        params.update(dict(zip(
+            [inp.replace(".", "_") for inp in runner.get_required_inputs(wildcards.algorithm)],
+            *{input}, strict=True
+        )))
         # Add the output file
         # All run functions can accept a relative path to the output file that should be written that is called 'output_file'
         params['output_file'] = output.pathway_file

diff --git a/config/config.yaml b/config/config.yaml
@@ -24,7 +24,7 @@ container_registry:
   base_url: docker.io
   # The owner or project of the registry
   # For example, "reedcompbio" if the image is available as docker.io/reedcompbio/allpairs
-  owner: reedcompbio
+  owner: pubtristanf
 
 # This list of algorithms should be generated by a script which checks the filesystem for installs.
 # It shouldn't be changed by mere mortals. (alternatively, we could add a path to executable for each algorithm
@@ -96,6 +96,10 @@ algorithms:
         slice_threshold: [0.3]
         module_threshold: [0.05]
 
+  - name: "capdsd"
+    params:
+      include: true
+
 # Here we specify which pathways to run and other file location information.
 # DataLoader.py can currently only load a single dataset
 # Assume that if a dataset label does not change, the lists of associated input files do not change

diff --git a/docker-wrappers/capDSD/Dockerfile b/docker-wrappers/capDSD/Dockerfile
@@ -0,0 +1,8 @@
+FROM python:2.7.18
+
+RUN pip install numpy==1.16.6
+# Since this is an arbitrary internet ZIP file, we use the web archive link instead.
+# TODO: checksum?
+RUN wget https://web.archive.org/web/20250616194746/http://dsd.cs.tufts.edu/capdsd/files//capDSD-src.zip
+
+RUN unzip capDSD-src.zip -d capDSD/
diff --git a/docker-wrappers/capDSD/README.md b/docker-wrappers/capDSD/README.md
@@ -0,0 +1,3 @@
+# capDSD Docker Image
+
+A Docker image for [capDSD](https://doi.org/10.1093/bioinformatics/btu263) that is available on [DockerHub](https://hub.docker.com/repository/docker/reedcompbio/capdsd).
diff --git a/spras/capDSD.py b/spras/capDSD.py
@@ -0,0 +1,89 @@
+from pathlib import Path
+
+from spras.containers import prepare_volume, run_container_and_log
+from spras.dataset import Dataset
+from spras.interactome import convert_directed_to_undirected
+from spras.prm import PRM
+
+__all__ = ['CapDSD']
+
+class CapDSD(PRM):
+    required_inputs = ['ppi', 'ppip.ppip']
+
+    @staticmethod
+    def generate_inputs(data: Dataset, filename_map: dict[str, str]):
+        """
+        Access fields from the dataset and write the required input files
+        @param data: dataset
+        @param filename_map: a dict mapping file types in the required_inputs to the filename for that type
+        """
+        for input_type in CapDSD.required_inputs:
+            if input_type not in filename_map:
+                raise ValueError(f"{input_type} filename is missing")
+
+        # create the ppi
+        ppi = data.get_interactome()
+        ppi = convert_directed_to_undirected(ppi)
+        ppi.to_csv(filename_map['ppi'], sep='\t', index=False, columns=["Interactor1", "Interactor2", "Weight"],
+                   header=False)
+
+        # then, we want to 'guide' the ppi with a .ppip file, which is a secondary,
+        # trusted interactome: we use the directed edges from the interactome as our
+        # trusted edges.
+        ppip = data.get_interactome()
+        ppip = ppip[ppip["Direction"] == "D"]
+        ppip.to_csv(filename_map['ppip.ppip'], sep='\t', index=False, columns=["Interactor1", "Interactor2"], header=False)
+
+    @staticmethod
+    def run(ppi=None, ppip=None, output_file=None, container_framework="docker"):
+        """
+        Run BTB with Docker
+        @param ppi:  input interactome file containing only undirected edges (required)
+        @param ppip:  input interactome file containing only directed edges (required)
+        @param output_file: path to the output matrix (required)
+        @param container_framework: specify a container framework
+        """
+        if not ppi or not ppip or not output_file:
+            raise ValueError("Required capDSD arguments are missing")
+
+        work_dir = '/capDSD'
+
+        volumes = list()
+
+        bind_path, ppi_file = prepare_volume(ppi, work_dir)
+        volumes.append(bind_path)
+
+        bind_path, ppip_file = prepare_volume(ppip, work_dir)
+        volumes.append(bind_path)
+
+        # Create a prefix for the output filename and ensure the directory exists
+        out_dir = Path(output_file).parent
+        out_dir.mkdir(parents=True, exist_ok=True)
+        bind_path, mapped_out_dir = prepare_volume(str(out_dir), work_dir)
+        volumes.append(bind_path)
+        mapped_out_prefix = mapped_out_dir + '/output'
+
+        container_suffix = "capdsd"
+
+        # Since the volumes are binded under different folders, we can safely
+        # use the ppip_file's parent.
+        command = ['python',
+                   '/capDSD/DSD.py',
+                   '-pathmode', '1',
+                   '-p', str(Path(ppip_file).parent),
+                   ppi_file, mapped_out_prefix]
+
+
+        run_container_and_log('capDSD',
+                              container_framework,
+                              container_suffix,
+                              command,
+                              volumes,
+                              work_dir)
+
+        output_matrix = Path(out_dir) / 'output.dsd'
+        output_matrix.rename(output_file)
+
+    @staticmethod
+    def parse_output(raw_pathway_file: str, standardized_pathway_file: str):
+        pass
diff --git a/spras/runner.py b/spras/runner.py
@@ -1,5 +1,6 @@
 # supported algorithm imports
 from spras.allpairs import AllPairs as allpairs
+from spras.capDSD import CapDSD as capdsd
 from spras.dataset import Dataset
 from spras.domino import DOMINO as domino
 from spras.meo import MEO as meo

diff --git a/spras/util.py b/spras/util.py
@@ -105,3 +105,13 @@ def duplicate_edges(df: pd.DataFrame) -> (pd.DataFrame, bool):
     unique_edges_df = df_sorted.drop_duplicates(subset=["Node1", "Node2", "Direction"], keep="first", ignore_index=True)
 
     return unique_edges_df, not unique_edges_df.equals(df)
+
+# https://stackoverflow.com/a/49689414/7589775
+def extend_filename(file_name: str, extension=".txt") -> str:
+    """
+    Adds a default file extension if none is provided.
+    """
+    root, ext = os.path.splitext(file_name)
+    if not ext:
+        ext = extension
+    return f'{root}{ext}'
diff --git a/test/capDSD/expected/capdsd-matrix-expected.txt b/test/capDSD/expected/capdsd-matrix-expected.txt
@@ -0,0 +1,5 @@
+	A	B	C	D
+A	0.0	1.9999962366471538	4.153838337651781	4.153838337651781
+B	1.9999962366471538	0.0	2.153842101004627	2.153842101004627
+C	4.153838337651781	2.153842101004627	0.0	0.0
+D	4.153838337651781	2.153842101004627	0.0	0.0
diff --git a/test/capDSD/input/capdsd-ppi.txt b/test/capDSD/input/capdsd-ppi.txt
@@ -0,0 +1,2 @@
+A	B	0.5
+C	D	0.75
diff --git a/test/capDSD/input/capdsd-ppip.ppip b/test/capDSD/input/capdsd-ppip.ppip
@@ -0,0 +1 @@
+B	C
diff --git a/test/capDSD/test_capDSD.py b/test/capDSD/test_capDSD.py
@@ -0,0 +1,61 @@
+import filecmp
+import shutil
+from pathlib import Path
+
+import pytest
+
+import spras.config as config
+from spras.capDSD import CapDSD
+
+config.init_from_file("config/config.yaml")
+
+TEST_DIR = Path('test', 'capDSD')
+IN_DIR = TEST_DIR / 'input'
+OUT_DIR = TEST_DIR / 'output'
+EXPECTED_DIR = TEST_DIR / 'expected'
+
+INPUT_PPI = IN_DIR / 'capdsd-ppi.txt'
+INPUT_PPIP = IN_DIR / 'capdsd-ppip.txt'
+
+OUT_FILE = OUT_DIR / 'output.txt'
+EXPECTED_FILE = EXPECTED_DIR / 'capdsd-matrix-expected.txt'
+
+class TestCapDSD:
+    """
+    Run capDSD tests in the Docker image
+    """
+    def test_capdsd_required(self):
+        OUT_FILE.unlink(missing_ok=True)
+        # Only include required arguments
+        CapDSD.run(
+            ppi=INPUT_PPI,
+            ppip=INPUT_PPIP,
+            output_file=OUT_FILE
+        )
+        assert OUT_FILE.exists()
+
+        assert filecmp.cmp(OUT_FILE, EXPECTED_FILE)
+
+    def test_capdsd_missing(self):
+        # Test the expected error is raised when required arguments are missing
+        with pytest.raises(ValueError):
+            # No PPI
+            CapDSD.run(
+                ppip=INPUT_PPIP,
+                output_file=OUT_FILE
+            )
+
+    # Only run Singularity test if the binary is available on the system
+    # spython is only available on Unix, but do not explicitly skip non-Unix platforms
+    @pytest.mark.skipif(not shutil.which('singularity'), reason='Singularity not found on system')
+    def test_capdsd_singularity(self):
+        OUT_FILE.unlink(missing_ok=True)
+        # Only include required arguments and run with Singularity
+        CapDSD.run(
+            ppi=INPUT_PPI,
+            ppip=INPUT_PPIP,
+            output_file=OUT_FILE,
+            container_framework="singularity")
+        assert OUT_FILE.exists()
+
+        assert filecmp.cmp(OUT_FILE, EXPECTED_FILE)
diff --git a/test/generate-inputs/expected/capdsd-ppi-expected.txt b/test/generate-inputs/expected/capdsd-ppi-expected.txt
@@ -0,0 +1,2 @@
+test_A	B	0.98
+B	C	0.77
diff --git a/test/generate-inputs/test_generate_inputs.py b/test/generate-inputs/test_generate_inputs.py
@@ -16,7 +16,8 @@
     'omicsintegrator2': 'edges',
     'domino': 'network',
     'pathlinker': 'network',
-    'allpairs': 'network'
+    'allpairs': 'network',
+    'capdsd': 'ppi'
 }
 
 

diff --git a/test/parse-outputs/test_parse_outputs.py b/test/parse-outputs/test_parse_outputs.py
@@ -12,7 +12,7 @@
 # the DOMINO output of the network dip.sif and the nodes tnfa_active_genes_file.txt
 # from https://github.com/Shamir-Lab/DOMINO/tree/master/examples
 
-algorithms = ['mincostflow', 'meo', 'omicsintegrator1', 'omicsintegrator2', 'pathlinker', 'allpairs', 'domino']
+algorithms = ['mincostflow', 'meo', 'omicsintegrator1', 'omicsintegrator2', 'pathlinker', 'allpairs', 'domino', 'capdsd']
 
 
 class TestParseOutputs:
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		# capDSD Docker Image

		A Docker image for [capDSD](https://doi.org/10.1093/bioinformatics/btu263) that is available on [DockerHub](https://hub.docker.com/repository/docker/reedcompbio/capdsd).