Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .github/workflows/build-containers.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ jobs:
with:
path: docker-wrappers/Cytoscape
container: reedcompbio/py4cytoscape
build-and-remove-capdsd:
uses: "./.github/workflows/build-and-remove-template.yml"
with:
path: docker-wrappers/capDSD
container: reedcompbio/capdsd
build-and-remove-spras:
uses: "./.github/workflows/build-and-remove-template.yml"
with:
Expand Down
12 changes: 9 additions & 3 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ from spras.dataset import Dataset
from spras.evaluation import Evaluation
from spras.analysis import ml, summary, graphspace, cytoscape
import spras.config as _config
from spras.util import extend_filename

# Snakemake updated the behavior in the 6.5.0 release https://github.com/snakemake/snakemake/pull/1037
# and using the wrong separator prevents Snakemake from matching filenames to the rules that can produce them
Expand Down Expand Up @@ -189,7 +190,9 @@ checkpoint prepare_input:
# Use the algorithm's generate_inputs function to load the merged dataset, extract the relevant columns,
# and write the output files specified by required_inputs
# The filename_map provides the output file path for each required input file type
filename_map = {input_type: SEP.join([out_dir, 'prepared', f'{wildcards.dataset}-{wildcards.algorithm}-inputs', f'{input_type}.txt']) for input_type in runner.get_required_inputs(wildcards.algorithm)}
filename_map = {input_type: SEP.join(
[out_dir, 'prepared', f'{wildcards.dataset}-{wildcards.algorithm}-inputs', extend_filename(input_type)]
) for input_type in runner.get_required_inputs(wildcards.algorithm)}
runner.prepare_inputs(wildcards.algorithm, input.dataset_file, filename_map)

# Collect the prepared input files from the specified directory
Expand All @@ -207,7 +210,7 @@ def collect_prepared_input(wildcards):
prepared_dir = SEP.join([out_dir, 'prepared', f'{wildcards.dataset}-{wildcards.algorithm}-inputs'])

# Construct the list of expected prepared input files for the reconstruction algorithm
prepared_inputs = expand(f'{prepared_dir}{SEP}{{type}}.txt',type=runner.get_required_inputs(algorithm=wildcards.algorithm))
prepared_inputs = expand(f'{prepared_dir}{SEP}{{type}}',type=map(extend_filename, runner.get_required_inputs(algorithm=wildcards.algorithm)))
# If the directory is missing, do nothing because the missing output triggers running prepare_input
if os.path.isdir(prepared_dir):
# If the directory exists, confirm all prepared input files exist as well (as opposed to some or none)
Expand Down Expand Up @@ -238,7 +241,10 @@ rule reconstruct:
# Create a copy so that the updates are not written to the parameters logfile
params = reconstruction_params(wildcards.algorithm, wildcards.params).copy()
# Add the input files
params.update(dict(zip(runner.get_required_inputs(wildcards.algorithm), *{input}, strict=True)))
params.update(dict(zip(
[inp.replace(".", "_") for inp in runner.get_required_inputs(wildcards.algorithm)],
*{input}, strict=True
)))
# Add the output file
# All run functions can accept a relative path to the output file that should be written that is called 'output_file'
params['output_file'] = output.pathway_file
Expand Down
6 changes: 5 additions & 1 deletion config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ container_registry:
base_url: docker.io
# The owner or project of the registry
# For example, "reedcompbio" if the image is available as docker.io/reedcompbio/allpairs
owner: reedcompbio
owner: pubtristanf

# This list of algorithms should be generated by a script which checks the filesystem for installs.
# It shouldn't be changed by mere mortals. (alternatively, we could add a path to executable for each algorithm
Expand Down Expand Up @@ -96,6 +96,10 @@ algorithms:
slice_threshold: [0.3]
module_threshold: [0.05]

- name: "capdsd"
params:
include: true

# Here we specify which pathways to run and other file location information.
# DataLoader.py can currently only load a single dataset
# Assume that if a dataset label does not change, the lists of associated input files do not change
Expand Down
8 changes: 8 additions & 0 deletions docker-wrappers/capDSD/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
FROM python:2.7.18

RUN pip install numpy==1.16.6
# Since this is an arbitrary internet ZIP file, we use the web archive link instead.
# TODO: checksum?
RUN wget https://web.archive.org/web/20250616194746/http://dsd.cs.tufts.edu/capdsd/files//capDSD-src.zip

RUN unzip capDSD-src.zip -d capDSD/
3 changes: 3 additions & 0 deletions docker-wrappers/capDSD/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# capDSD Docker Image

A Docker image for [capDSD](https://doi.org/10.1093/bioinformatics/btu263) that is available on [DockerHub](https://hub.docker.com/repository/docker/reedcompbio/capdsd).
89 changes: 89 additions & 0 deletions spras/capDSD.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
from pathlib import Path

from spras.containers import prepare_volume, run_container_and_log
from spras.dataset import Dataset
from spras.interactome import convert_directed_to_undirected
from spras.prm import PRM

__all__ = ['CapDSD']

class CapDSD(PRM):
required_inputs = ['ppi', 'ppip.ppip']

@staticmethod
def generate_inputs(data: Dataset, filename_map: dict[str, str]):
"""
Access fields from the dataset and write the required input files
@param data: dataset
@param filename_map: a dict mapping file types in the required_inputs to the filename for that type
"""
for input_type in CapDSD.required_inputs:
if input_type not in filename_map:
raise ValueError(f"{input_type} filename is missing")

# create the ppi
ppi = data.get_interactome()
ppi = convert_directed_to_undirected(ppi)
ppi.to_csv(filename_map['ppi'], sep='\t', index=False, columns=["Interactor1", "Interactor2", "Weight"],
header=False)

# then, we want to 'guide' the ppi with a .ppip file, which is a secondary,
# trusted interactome: we use the directed edges from the interactome as our
# trusted edges.
ppip = data.get_interactome()
ppip = ppip[ppip["Direction"] == "D"]
ppip.to_csv(filename_map['ppip.ppip'], sep='\t', index=False, columns=["Interactor1", "Interactor2"], header=False)

@staticmethod
def run(ppi=None, ppip=None, output_file=None, container_framework="docker"):
"""
Run BTB with Docker
@param ppi: input interactome file containing only undirected edges (required)
@param ppip: input interactome file containing only directed edges (required)
@param output_file: path to the output matrix (required)
@param container_framework: specify a container framework
"""
if not ppi or not ppip or not output_file:
raise ValueError("Required capDSD arguments are missing")

work_dir = '/capDSD'

volumes = list()

bind_path, ppi_file = prepare_volume(ppi, work_dir)
volumes.append(bind_path)

bind_path, ppip_file = prepare_volume(ppip, work_dir)
volumes.append(bind_path)

# Create a prefix for the output filename and ensure the directory exists
out_dir = Path(output_file).parent
out_dir.mkdir(parents=True, exist_ok=True)
bind_path, mapped_out_dir = prepare_volume(str(out_dir), work_dir)
volumes.append(bind_path)
mapped_out_prefix = mapped_out_dir + '/output'

container_suffix = "capdsd"

# Since the volumes are binded under different folders, we can safely
# use the ppip_file's parent.
command = ['python',
'/capDSD/DSD.py',
'-pathmode', '1',
'-p', str(Path(ppip_file).parent),
ppi_file, mapped_out_prefix]


run_container_and_log('capDSD',
container_framework,
container_suffix,
command,
volumes,
work_dir)

output_matrix = Path(out_dir) / 'output.dsd'
output_matrix.rename(output_file)

@staticmethod
def parse_output(raw_pathway_file: str, standardized_pathway_file: str):
pass
1 change: 1 addition & 0 deletions spras/runner.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# supported algorithm imports
from spras.allpairs import AllPairs as allpairs
from spras.capDSD import CapDSD as capdsd
from spras.dataset import Dataset
from spras.domino import DOMINO as domino
from spras.meo import MEO as meo
Expand Down
10 changes: 10 additions & 0 deletions spras/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,13 @@ def duplicate_edges(df: pd.DataFrame) -> (pd.DataFrame, bool):
unique_edges_df = df_sorted.drop_duplicates(subset=["Node1", "Node2", "Direction"], keep="first", ignore_index=True)

return unique_edges_df, not unique_edges_df.equals(df)

# https://stackoverflow.com/a/49689414/7589775
def extend_filename(file_name: str, extension=".txt") -> str:
"""
Adds a default file extension if none is provided.
"""
root, ext = os.path.splitext(file_name)
if not ext:
ext = extension
return f'{root}{ext}'
5 changes: 5 additions & 0 deletions test/capDSD/expected/capdsd-matrix-expected.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
A B C D
A 0.0 1.9999962366471538 4.153838337651781 4.153838337651781
B 1.9999962366471538 0.0 2.153842101004627 2.153842101004627
C 4.153838337651781 2.153842101004627 0.0 0.0
D 4.153838337651781 2.153842101004627 0.0 0.0
2 changes: 2 additions & 0 deletions test/capDSD/input/capdsd-ppi.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
A B 0.5
C D 0.75
1 change: 1 addition & 0 deletions test/capDSD/input/capdsd-ppip.ppip
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
B C
61 changes: 61 additions & 0 deletions test/capDSD/test_capDSD.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import filecmp
import shutil
from pathlib import Path

import pytest

import spras.config as config
from spras.capDSD import CapDSD

config.init_from_file("config/config.yaml")

TEST_DIR = Path('test', 'capDSD')
IN_DIR = TEST_DIR / 'input'
OUT_DIR = TEST_DIR / 'output'
EXPECTED_DIR = TEST_DIR / 'expected'

INPUT_PPI = IN_DIR / 'capdsd-ppi.txt'
INPUT_PPIP = IN_DIR / 'capdsd-ppip.txt'

OUT_FILE = OUT_DIR / 'output.txt'
EXPECTED_FILE = EXPECTED_DIR / 'capdsd-matrix-expected.txt'

class TestCapDSD:
"""
Run capDSD tests in the Docker image
"""
def test_capdsd_required(self):
OUT_FILE.unlink(missing_ok=True)
# Only include required arguments
CapDSD.run(
ppi=INPUT_PPI,
ppip=INPUT_PPIP,
output_file=OUT_FILE
)
assert OUT_FILE.exists()

assert filecmp.cmp(OUT_FILE, EXPECTED_FILE)

def test_capdsd_missing(self):
# Test the expected error is raised when required arguments are missing
with pytest.raises(ValueError):
# No PPI
CapDSD.run(
ppip=INPUT_PPIP,
output_file=OUT_FILE
)

# Only run Singularity test if the binary is available on the system
# spython is only available on Unix, but do not explicitly skip non-Unix platforms
@pytest.mark.skipif(not shutil.which('singularity'), reason='Singularity not found on system')
def test_capdsd_singularity(self):
OUT_FILE.unlink(missing_ok=True)
# Only include required arguments and run with Singularity
CapDSD.run(
ppi=INPUT_PPI,
ppip=INPUT_PPIP,
output_file=OUT_FILE,
container_framework="singularity")
assert OUT_FILE.exists()

assert filecmp.cmp(OUT_FILE, EXPECTED_FILE)
2 changes: 2 additions & 0 deletions test/generate-inputs/expected/capdsd-ppi-expected.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
test_A B 0.98
B C 0.77
3 changes: 2 additions & 1 deletion test/generate-inputs/test_generate_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
'omicsintegrator2': 'edges',
'domino': 'network',
'pathlinker': 'network',
'allpairs': 'network'
'allpairs': 'network',
'capdsd': 'ppi'
}


Expand Down
2 changes: 1 addition & 1 deletion test/parse-outputs/test_parse_outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# the DOMINO output of the network dip.sif and the nodes tnfa_active_genes_file.txt
# from https://github.com/Shamir-Lab/DOMINO/tree/master/examples

algorithms = ['mincostflow', 'meo', 'omicsintegrator1', 'omicsintegrator2', 'pathlinker', 'allpairs', 'domino']
algorithms = ['mincostflow', 'meo', 'omicsintegrator1', 'omicsintegrator2', 'pathlinker', 'allpairs', 'domino', 'capdsd']


class TestParseOutputs:
Expand Down
Loading