Skip to content

Commit b89b3a8

Browse files
committed
Changes for new wepp flow
1 parent 9ad45c5 commit b89b3a8

File tree

5 files changed

+56
-44
lines changed

5 files changed

+56
-44
lines changed

README.md

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -79,9 +79,22 @@ All set to try the [examples](#example).
7979
```
8080
git clone https://github.com/TurakhiaLab/metaWEPP.git
8181
cd metaWEPP
82+
chmod +x run-metawepp
8283
```
8384

84-
**Step 2:** Install Kraken.
85+
**Step 2:** Update `~/.bashrc` for linux or `~/.zshrc` for macOS.
86+
```
87+
echo "
88+
run-metawepp() {
89+
snakemake -s $PWD/Snakefile \"\$@\"
90+
}
91+
export -f run-metawepp
92+
" >> ~/.bashrc
93+
94+
source ~/.bashrc
95+
```
96+
97+
**Step 3:** Install Kraken.
8598
The following commands install kraken and also update the `$PATH` variable for running the tool easily.
8699
```
87100
git clone https://github.com/DerrickWood/kraken2.git
@@ -98,10 +111,12 @@ cd ..
98111
sudo apt-get install minimap2
99112
pip install viral_usher matplotlib snakemake
100113
```
101-
**Step 4:** Install `WEPP`.
114+
**Step 5:** Install `WEPP`.
102115

103116
```
104117
git clone --recurse-submodules https://github.com/TurakhiaLab/WEPP.git
118+
cd WEPP
119+
chmod +x run-wepp
105120
```
106121
View the WEPP installation guide starting from Option 3 in the [WEPP repository](https://github.com/TurakhiaLab/WEPP/tree/main?tab=readme-ov-file#-option-3-install-via-shell-commands-requires-sudo-access).
107122

@@ -130,7 +145,7 @@ rm k2_viral_20251015.tar.gz
130145
Follow the command prompts to add the pathogens.
131146

132147
```
133-
snakemake --config KRAKEN_DB=viral_kraken_db DIR=simulated_metagenomic_sample MIN_PROP=0.05 PATHOGENS=default,respiratory_syncytial_virus_a,sars_cov_2 CLADE_LIST=,nextstrain,nextstrain:pango CLADE_IDX=-1,0,1 --cores 32
148+
run-metawepp --config KRAKEN_DB=viral_kraken_db DIR=simulated_metagenomic_sample MIN_PROP=0.05 PATHOGENS=default,respiratory_syncytial_virus_a,sars_cov_2 CLADE_LIST=,nextstrain,nextstrain:pango CLADE_IDX=-1,0,1 --cores 32
134149
```
135150

136151
For SARS-CoV-2:
@@ -215,7 +230,7 @@ rm k2_viral_20251015.tar.gz
215230

216231
**Step 6:** Run the pipeline
217232
```
218-
snakemake --config KRAKEN_DB=viral_kraken_db DIR=real_metagenomic_sample MIN_PROP=0.05 PATHOGENS=default,respiratory_syncytial_virus_a CLADE_LIST=,nextstrain CLADE_IDX=-1,0 --cores 32
233+
run-metawepp --config KRAKEN_DB=viral_kraken_db DIR=real_metagenomic_sample MIN_PROP=0.05 PATHOGENS=default,respiratory_syncytial_virus_a CLADE_LIST=,nextstrain CLADE_IDX=-1,0 --cores 32
219234
```
220235

221236
On being prompted to add a new species for haplotype-level analysis, press `y`, and follow the steps below for RSV-A and Rhinovirus-A.
@@ -382,12 +397,12 @@ metaWEPP requires `KRAKEN_DB` and `DIR` to be specified as command-line argument
382397
Examples:
383398
1. Using all the parameters from the config file:
384399
```
385-
snakemake --config KRAKEN_DB=viral_kraken_db DIR=simulated_metagenomic_sample --cores 32
400+
run-metawepp --config KRAKEN_DB=viral_kraken_db DIR=simulated_metagenomic_sample --cores 32
386401
```
387402

388403
2. Overriding MIN_Q and MIN_PROP_FOR_WEPP through command line:
389404
```
390-
snakemake --config KRAKEN_DB=viral_kraken_db DIR=simulated_metagenomic_sample MIN_Q=25 MIN_PROP_FOR_WEPP=0.05 --cores 32
405+
run-metawepp --config KRAKEN_DB=viral_kraken_db DIR=simulated_metagenomic_sample MIN_Q=25 MIN_PROP_FOR_WEPP=0.05 --cores 32
391406
```
392407

393408
### <a name="mat"> MAT for pathogen species

Snakefile

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ for arg in sys.argv[1:]:
3131
# 2. DEFINE PATHS FOR INTERNAL RESOURCES
3232
# ────────────────────────────────────────────────────────────────
3333

34-
RUNNING_TEST = "test" in requested_rules
34+
RUNNING_HELP = "help" in requested_rules
3535

3636
PATHOGEN_ROOT = Path("data/pathogens_for_wepp")
3737
ADDED_TAXONS = PATHOGEN_ROOT / "added_taxons.csv"
@@ -47,7 +47,7 @@ if wepp_conda_path.exists():
4747
WEPP_DATA.mkdir(parents=True, exist_ok=True)
4848
else:
4949
WEPP_ROOT = WEPP_DATA
50-
if not WEPP_ROOT.exists() and not RUNNING_TEST:
50+
if not WEPP_ROOT.exists() and not RUNNING_HELP:
5151
print(f"Error: WEPP not installed at {WEPP_ROOT}", file=sys.stderr)
5252
sys.exit(1)
5353

@@ -118,7 +118,7 @@ def check_input_files():
118118
fq2 = str(gzip_if_needed(r2_files[0]))
119119
return fq1, fq2
120120

121-
if not RUNNING_TEST:
121+
if not RUNNING_HELP:
122122
FQ1, FQ2 = check_input_files()
123123

124124
# ────────────────────────────────────────────────────────────────
@@ -381,8 +381,10 @@ rule prepare_for_wepp:
381381
taxonium_files = list(dest_dir.glob("*.jsonl")) + list(dest_dir.glob("*.jsonl.gz"))
382382
taxonium_arg = f"TAXONIUM_FILE={taxonium_files[0].name} " if taxonium_files else ""
383383

384+
wepp_executable = WEPP_ROOT / "run-wepp"
385+
384386
cmd = (
385-
f"snakemake "
387+
f"{wepp_executable} "
386388
f"-s {WEPP_SNAKEFILE} "
387389
f"--directory {WEPP_DATA} "
388390
f"--cores {workflow.cores} --use-conda "
@@ -407,10 +409,11 @@ rule run_wepp:
407409
output:
408410
"results/{DIR}/.wepp.done"
409411
run:
412+
410413
for line in open(input[0]):
411414
cmd = line.strip()
412415
if cmd:
413-
subprocess.check_call(cmd, shell=True)
416+
subprocess.check_call(cmd, shell=True, executable="/bin/bash")
414417
Path(output[0]).touch()
415418

416419
rule print_dashboard_instructions:
@@ -433,7 +436,7 @@ rule print_dashboard_instructions:
433436

434437
Path(output[0]).touch()
435438

436-
rule test:
439+
rule help:
437440
message: "Printing metaWEPP configuration help"
438441
params:
439442
script = BASE_DIR / "scripts/metawepp_help.py"

docker/Dockerfile

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ RUN apt-get update && apt-get install -y \
1212
sudo \
1313
pip \
1414
build-essential \
15+
libopenmpi-dev \
16+
libboost-all-dev \
1517
python3-pandas \
1618
pkg-config \
1719
zip \
@@ -58,6 +60,16 @@ RUN git clone https://github.com/DerrickWood/kraken2.git && \
5860
./install_kraken2.sh . && \
5961
echo 'export PATH="/metaWEPP/kraken2:$PATH"' >> /root/.bashrc
6062

63+
# Setting aliases for running WEPP and metaWEPP
64+
RUN echo 'run-wepp() {' >> /root/.bashrc && \
65+
echo ' /root/miniforge3/bin/snakemake -s /metaWEPP/WEPP/workflow/Snakefile "$@"' >> /root/.bashrc && \
66+
echo '}' >> /root/.bashrc && \
67+
echo 'export -f run-wepp' >> /root/.bashrc
68+
69+
RUN echo 'run-metawepp() {' >> /root/.bashrc && \
70+
echo ' /root/miniforge3/bin/snakemake -s /metaWEPP/Snakefile "$@"' >> /root/.bashrc && \
71+
echo '}' >> /root/.bashrc && \
72+
echo 'export -f run-metawepp' >> /root/.bashrc
6173

6274
# Default command: start an interactive login shell to source ~/.bashrc
6375
CMD [ "bash", "--login" ]

run-metawepp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#!/bin/bash
2+
3+
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
4+
5+
exec snakemake -s "$REPO_ROOT/workflow/Snakefile" "$@"

scripts/metawepp_help.py

Lines changed: 9 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -10,44 +10,21 @@
1010
OPTIONS = [
1111
("DIR", "Folder containing the metagenomic reads."),
1212
("KRAKEN_DB", "Folder containing the Kraken2 database."),
13-
(
14-
"SEQUENCING_TYPE",
15-
"Sequencing read type (s: Illumina single-ended, d: Illumina double-ended, n: ONT long reads).",
16-
),
13+
("SEQUENCING_TYPE", "Sequencing read type (s: Illumina single-ended, d: Illumina double-ended, n: ONT long reads)."),
1714
("PRIMER_BED", "BED file for primers, expected under WEPP/primers."),
18-
(
19-
"MIN_AF",
20-
"Alleles with an allele frequency below this threshold are masked (Illumina: 0.5%, Ion Torrent: 1.5%, ONT: 2%).",
21-
),
15+
("MIN_AF", "Alleles with an allele frequency below this threshold are masked (Illumina: 0.5%, Ion Torrent: 1.5%, ONT: 2%)."),
2216
("MIN_DEPTH", "Sites with read depth below this threshold are masked by WEPP."),
2317
("MIN_Q", "Alleles with a Phred score below this threshold are masked by WEPP."),
24-
(
25-
"MIN_PROP",
26-
"Minimum proportion of haplotypes detected by WEPP (Wastewater samples: 0.5%, Clinical samples: 5%).",
27-
),
18+
("MIN_PROP", "Minimum proportion of haplotypes detected by WEPP (Wastewater samples: 0.5%, Clinical samples: 5%)."),
2819
("MIN_LEN", "Minimum read length to consider after ivar trim (default: 80)."),
29-
(
30-
"MAX_READS",
31-
"Maximum number of reads considered by WEPP from the sample (useful for reducing runtime).",
32-
),
20+
("MAX_READS", "Maximum number of reads considered by WEPP from the sample (useful for reducing runtime)."),
3321
("DASHBOARD_ENABLED", "Enables the WEPP dashboard for visualizing haplotype results."),
34-
(
35-
"PATHOGENS",
36-
"List of pathogens with custom WEPP settings; species not listed use default settings.",
37-
),
38-
(
39-
"CLADE_LIST",
40-
"Comma-separated clade annotation schemes in the MAT file, ordered to match PATHOGENS; leave blank for species without clade annotations.",
41-
),
42-
(
43-
"CLADE_IDX",
44-
"Comma-separated clade indices for each pathogen; use -1 for species without lineage annotations, ordered to match PATHOGENS.",
45-
),
22+
("ADD_SPECIES_RUNTIME", "Asks users to add pathogen species at runtime when enabled."),
23+
("PATHOGENS", "List of pathogens with custom WEPP settings; species not listed use default settings."),
24+
("CLADE_LIST", "Comma-separated clade annotation schemes in the MAT file, ordered to match PATHOGENS; leave blank for species without clade annotations."),
25+
("CLADE_IDX", "Comma-separated clade indices for each pathogen; use -1 for species without lineage annotations, ordered to match PATHOGENS."),
4626
("MIN_DEPTH_FOR_WEPP", "Minimum read coverage required to run WEPP for any pathogen species."),
47-
(
48-
"MIN_PROP_FOR_WEPP",
49-
"Minimum relative abundance before metaWEPP prompts adding a species for haplotype-level analysis.",
50-
),
27+
("MIN_PROP_FOR_WEPP", "Minimum relative abundance before metaWEPP prompts adding a species for haplotype-level analysis."),
5128
]
5229

5330

0 commit comments

Comments
 (0)