Skip to content

Commit b9ffb6a

Browse files
authored
Merge branch 'rc5.0.0' into feat/refactor-config-system
2 parents 3047a01 + dc114ee commit b9ffb6a

File tree

14 files changed

+290
-36
lines changed

14 files changed

+290
-36
lines changed

configExample.json

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,5 +79,23 @@
7979
"pasteur": {
8080
"client_id": "",
8181
"client_secret": ""
82+
},
83+
"_comment": "Singularity runtime configuration",
84+
"singularity": {
85+
"binary": "/usr/bin/singularity",
86+
"_comment": "Paths that must be visible inside every container (passed as --bind)",
87+
"bind_paths": ["/data"],
88+
"_comment": "Adapter path inside the trimmomatic container (bioconda default)",
89+
"trimmomatic_adapters": "/opt/conda/share/trimmomatic/adapters/"
90+
},
91+
"_comment": "Absolute paths to pre-built Singularity SIF images on the cluster",
92+
"containers": {
93+
"skesa": "/fs1/resources/containers/skesa_2.5.1.sif",
94+
"blast": "/fs1/resources/containers/blast_2.12.0.sif",
95+
"bwa": "/fs1/resources/containers/bwa_0.7.17.sif",
96+
"samtools": "/fs1/resources/containers/samtools_1.13.sif",
97+
"picard": "/fs1/resources/containers/picard_2.20.3.sif",
98+
"trimmomatic": "/fs1/resources/containers/trimmomatic_0.39.sif",
99+
"quast": "/fs1/resources/containers/quast_5.3.0.sif"
82100
}
83101
}

containers/Dockerfile.blast

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# NCBI BLAST 2.12.0 — nucleotide BLAST search and database indexing
2+
# Build: docker build -f Dockerfile.blast -t blast:2.12.0 .
3+
# Convert: singularity pull --name blast_2.12.0.sif docker://your-repo/blast:2.12.0
4+
FROM mambaorg/micromamba:1-bookworm-slim
5+
6+
ARG MAMBA_DOCKERFILE_ACTIVATE=1
7+
8+
USER root
9+
RUN apt-get update \
10+
&& apt-get upgrade -y \
11+
&& apt-get clean \
12+
&& rm -rf /var/lib/apt/lists/*
13+
USER $MAMBA_USER
14+
15+
RUN micromamba install -y -n base -c conda-forge -c bioconda \
16+
blast=2.12.0 \
17+
&& micromamba clean -afy

containers/Dockerfile.bwa

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# BWA 0.7.17 — short read aligner (mem mode) and reference indexing
2+
# Build: docker build -f Dockerfile.bwa -t bwa:0.7.17 .
3+
# Convert: singularity pull --name bwa_0.7.17.sif docker://your-repo/bwa:0.7.17
4+
FROM mambaorg/micromamba:1-bookworm-slim
5+
6+
ARG MAMBA_DOCKERFILE_ACTIVATE=1
7+
8+
USER root
9+
RUN apt-get update \
10+
&& apt-get upgrade -y \
11+
&& apt-get clean \
12+
&& rm -rf /var/lib/apt/lists/*
13+
USER $MAMBA_USER
14+
15+
RUN micromamba install -y -n base -c conda-forge -c bioconda \
16+
bwa=0.7.17 \
17+
&& micromamba clean -afy

containers/Dockerfile.picard

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Picard 2.20.3 — duplicate marking and insert size metrics
2+
# Build: docker build -f Dockerfile.picard -t picard:2.20.3 .
3+
# Convert: singularity pull --name picard_2.20.3.sif docker://your-repo/picard:2.20.3
4+
#
5+
# The bioconda picard package installs a wrapper script that calls the JAR via
6+
# the bundled Java runtime; no separate openjdk install is needed.
7+
FROM mambaorg/micromamba:1-bookworm-slim
8+
9+
ARG MAMBA_DOCKERFILE_ACTIVATE=1
10+
11+
USER root
12+
RUN apt-get update \
13+
&& apt-get upgrade -y \
14+
&& apt-get clean \
15+
&& rm -rf /var/lib/apt/lists/*
16+
USER $MAMBA_USER
17+
18+
RUN micromamba install -y -n base -c conda-forge -c bioconda \
19+
picard=2.20.3 \
20+
r-base=4.1.1 \
21+
&& micromamba clean -afy

containers/Dockerfile.quast

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# QUAST 5.3.0 — assembly quality assessment
2+
# Build: docker build -f Dockerfile.quast -t quast:5.3.0 .
3+
# Convert: singularity pull --name quast_5.3.0.sif docker://your-repo/quast:5.3.0
4+
#
5+
# The entry point is quast.py (as installed by bioconda).
6+
FROM mambaorg/micromamba:1-bookworm-slim
7+
8+
ARG MAMBA_DOCKERFILE_ACTIVATE=1
9+
10+
USER root
11+
RUN apt-get update \
12+
&& apt-get upgrade -y \
13+
&& apt-get clean \
14+
&& rm -rf /var/lib/apt/lists/*
15+
USER $MAMBA_USER
16+
17+
RUN micromamba install -y -n base -c conda-forge -c bioconda \
18+
quast=5.3.0 \
19+
&& micromamba clean -afy

containers/Dockerfile.samtools

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# SAMtools 1.13 — SAM/BAM processing, indexing, and statistics
2+
# Build: docker build -f Dockerfile.samtools -t samtools:1.13 .
3+
# Convert: singularity pull --name samtools_1.13.sif docker://your-repo/samtools:1.13
4+
FROM mambaorg/micromamba:1-bookworm-slim
5+
6+
ARG MAMBA_DOCKERFILE_ACTIVATE=1
7+
8+
USER root
9+
RUN apt-get update \
10+
&& apt-get upgrade -y \
11+
&& apt-get clean \
12+
&& rm -rf /var/lib/apt/lists/*
13+
USER $MAMBA_USER
14+
15+
RUN micromamba install -y -n base -c conda-forge -c bioconda \
16+
samtools=1.13 \
17+
&& micromamba clean -afy

containers/Dockerfile.skesa

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# SKESA 2.5.1 — de novo assembler
2+
# Build: docker build -f Dockerfile.skesa -t skesa:2.5.1 .
3+
# Convert: singularity pull --name skesa_2.5.1.sif docker://your-repo/skesa:2.5.1
4+
FROM mambaorg/micromamba:1-bookworm-slim
5+
6+
ARG MAMBA_DOCKERFILE_ACTIVATE=1
7+
8+
USER root
9+
RUN apt-get update \
10+
&& apt-get upgrade -y \
11+
&& apt-get clean \
12+
&& rm -rf /var/lib/apt/lists/*
13+
USER $MAMBA_USER
14+
15+
RUN micromamba install -y -n base -c conda-forge -c bioconda \
16+
skesa=2.5.1 \
17+
&& micromamba clean -afy

containers/Dockerfile.trimmomatic

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Trimmomatic 0.39 — quality trimming for paired-end Illumina reads
2+
# Build: docker build -f Dockerfile.trimmomatic -t trimmomatic:0.39 .
3+
# Convert: singularity pull --name trimmomatic_0.39.sif docker://your-repo/trimmomatic:0.39
4+
#
5+
# Adapter files are located inside the container at:
6+
# /opt/conda/share/trimmomatic/adapters/
7+
# Set singularity.trimmomatic_adapters in your config to this path.
8+
FROM mambaorg/micromamba:1-bookworm-slim
9+
10+
ARG MAMBA_DOCKERFILE_ACTIVATE=1
11+
12+
USER root
13+
RUN apt-get update \
14+
&& apt-get upgrade -y \
15+
&& apt-get clean \
16+
&& rm -rf /var/lib/apt/lists/*
17+
USER $MAMBA_USER
18+
19+
RUN micromamba install -y -n base -c conda-forge -c bioconda \
20+
trimmomatic=0.39 \
21+
&& micromamba clean -afy

microSALT/utils/job_creator.py

Lines changed: 92 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,16 @@ def get_headerargs(self):
9595
headerline = f"-A {self.slurm_header.project} -p {self.slurm_header.type} -n {self.slurm_header.threads} -t {self.slurm_header.time} -J {self.slurm_header.job_prefix}_{self.name} --qos {self.slurm_header.qos} --output {self.finishdir}/slurm_{self.name}.log"
9696
return headerline
9797

98+
def _singularity_exec(self, tool, command):
99+
"""Return command wrapped with singularity exec for the given tool container."""
100+
sif = self.config["containers"][tool]
101+
binary = self.config["singularity"]["binary"]
102+
bind_list = list(self.config["singularity"].get("bind_paths", []))
103+
if self.finishdir and self.finishdir not in bind_list:
104+
bind_list.append(self.finishdir)
105+
bind = f"--bind {','.join(bind_list)}" if bind_list else ""
106+
return f"{binary} exec {bind} {sif} {command}"
107+
98108
def verify_fastq(self):
99109
"""Uses arg indir to return a dict of PE fastq tuples fulfilling naming convention"""
100110
verified_files = list()
@@ -177,13 +187,15 @@ def create_assemblysection(self):
177187
batchfile = open(self.batchfile, "a+")
178188
# memory is actually 128 per node regardless of cores.
179189
batchfile.write("# SKESA assembly\n")
180-
batchfile.write(
181-
f"mkdir -p {assembly_dir} &"
190+
skesa_cmd = (
182191
f"skesa "
183192
f"--cores {self.slurm_header.threads} "
184193
f"--memory {8 * int(self.slurm_header.threads)} "
185194
f"--contigs_out {contigs_file_raw} "
186-
f"--reads {self.concat_files['f']},{self.concat_files['r']}\n"
195+
f"--reads {self.concat_files['f']},{self.concat_files['r']}"
196+
)
197+
batchfile.write(
198+
f"mkdir -p {assembly_dir} &" f"{self._singularity_exec('skesa', skesa_cmd)}\n"
187199
)
188200

189201
# Convert sequence naming in Skesa output into Spades format in the contigs fasta file:
@@ -232,19 +244,31 @@ def blast_subset(self, name, search_string):
232244
)
233245
if name == "mlst":
234246
batchfile.write(
235-
f"blastn -db {os.path.dirname(ref)}/{ref_nosuf} -query {self.finishdir}/assembly/{self.name}_contigs.fasta -out {self.finishdir}/blast_search/{name}/loci_query_{ref_nosuf}.txt -task megablast -num_threads {self.slurm_header.threads} -outfmt {blast_format}\n"
247+
self._singularity_exec(
248+
"blast",
249+
f"blastn -db {os.path.dirname(ref)}/{ref_nosuf} -query {self.finishdir}/assembly/{self.name}_contigs.fasta -out {self.finishdir}/blast_search/{name}/loci_query_{ref_nosuf}.txt -task megablast -num_threads {self.slurm_header.threads} -outfmt {blast_format}",
250+
)
251+
+ "\n"
236252
)
237253
else:
238254
batchfile.write(
239-
f"blastn -db {os.path.dirname(ref)}/{ref_nosuf} -query {self.finishdir}/assembly/{self.name}_contigs.fasta -out {self.finishdir}/blast_search/{name}/{ref_nosuf}.txt -task megablast -num_threads {self.slurm_header.threads} -outfmt {blast_format}\n"
255+
self._singularity_exec(
256+
"blast",
257+
f"blastn -db {os.path.dirname(ref)}/{ref_nosuf} -query {self.finishdir}/assembly/{self.name}_contigs.fasta -out {self.finishdir}/blast_search/{name}/{ref_nosuf}.txt -task megablast -num_threads {self.slurm_header.threads} -outfmt {blast_format}",
258+
)
259+
+ "\n"
240260
)
241261
elif len(file_list) == 1:
242262
ref_nosuf = re.search(r"(\w+(?:\-\w+)*)\.\w+", os.path.basename(file_list[0])).group(1)
243263
batchfile.write(
244264
f"## BLAST {name} search in {self.sample.get('organism').replace('_', ' ').capitalize()}\n"
245265
)
246266
batchfile.write(
247-
f"blastn -db {os.path.dirname(search_string)}/{ref_nosuf} -query {self.finishdir}/assembly/{self.name}_contigs.fasta -out {self.finishdir}/blast_search/{name}/{ref_nosuf}.txt -task megablast -num_threads {self.slurm_header.threads} -outfmt {blast_format}\n"
267+
self._singularity_exec(
268+
"blast",
269+
f"blastn -db {os.path.dirname(search_string)}/{ref_nosuf} -query {self.finishdir}/assembly/{self.name}_contigs.fasta -out {self.finishdir}/blast_search/{name}/{ref_nosuf}.txt -task megablast -num_threads {self.slurm_header.threads} -outfmt {blast_format}",
270+
)
271+
+ "\n"
248272
)
249273
batchfile.write("\n")
250274
batchfile.close()
@@ -262,37 +286,72 @@ def create_variantsection(self):
262286

263287
batchfile.write("## Alignment & Deduplication\n")
264288
batchfile.write(
265-
f"bwa mem -M -t {self.slurm_header.threads} {ref} {self.concat_files['f']} {self.concat_files['r']} > {outbase}.sam\n"
289+
self._singularity_exec(
290+
"bwa",
291+
f"bwa mem -M -t {self.slurm_header.threads} {ref} {self.concat_files['f']} {self.concat_files['r']} > {outbase}.sam",
292+
)
293+
+ "\n"
266294
)
267295
batchfile.write(
268-
f"samtools view --threads {self.slurm_header.threads} -b -o {outbase}.bam -T {ref} {outbase}.sam\n"
296+
self._singularity_exec(
297+
"samtools",
298+
f"samtools view --threads {self.slurm_header.threads} -b -o {outbase}.bam -T {ref} {outbase}.sam",
299+
)
300+
+ "\n"
269301
)
270302
batchfile.write(
271-
f"samtools sort --threads {self.slurm_header.threads} -o {outbase}.bam_sort {outbase}.bam\n"
303+
self._singularity_exec(
304+
"samtools",
305+
f"samtools sort --threads {self.slurm_header.threads} -o {outbase}.bam_sort {outbase}.bam",
306+
)
307+
+ "\n"
272308
)
273309
batchfile.write(
274-
f"picard MarkDuplicates I={outbase}.bam_sort O={outbase}.bam_sort_rmdup M={outbase}.stats.dup REMOVE_DUPLICATES=true\n"
310+
self._singularity_exec(
311+
"picard",
312+
f"picard MarkDuplicates I={outbase}.bam_sort O={outbase}.bam_sort_rmdup M={outbase}.stats.dup REMOVE_DUPLICATES=true",
313+
)
314+
+ "\n"
275315
)
276-
batchfile.write(f"samtools index {outbase}.bam_sort_rmdup\n")
277316
batchfile.write(
278-
f"samtools idxstats {outbase}.bam_sort_rmdup &> {outbase}.stats.ref\n"
317+
self._singularity_exec("samtools", f"samtools index {outbase}.bam_sort_rmdup")
318+
+ "\n"
319+
)
320+
batchfile.write(
321+
self._singularity_exec(
322+
"samtools", f"samtools idxstats {outbase}.bam_sort_rmdup"
323+
)
324+
+ f" &> {outbase}.stats.ref\n"
279325
)
280326
# Removal of temp aligment files
281327
batchfile.write(f"rm {outbase}.bam {outbase}.sam\n")
282328

283329
batchfile.write("## Primary stats generation\n")
284330
# Insert stats, dedupped
285331
batchfile.write(
286-
f"picard CollectInsertSizeMetrics I={outbase}.bam_sort_rmdup O={outbase}.stats.ins H={outbase}.hist.ins\n"
332+
self._singularity_exec(
333+
"picard",
334+
f"picard CollectInsertSizeMetrics I={outbase}.bam_sort_rmdup O={outbase}.stats.ins H={outbase}.hist.ins",
335+
)
336+
+ "\n"
287337
)
288338
# Coverage
289339
batchfile.write(
290-
f"samtools stats --coverage 1,10000,1 {outbase}.bam_sort_rmdup |grep ^COV | cut -f 2- &> {outbase}.stats.cov\n"
340+
self._singularity_exec(
341+
"samtools", f"samtools stats --coverage 1,10000,1 {outbase}.bam_sort_rmdup"
342+
)
343+
+ f" |grep ^COV | cut -f 2- &> {outbase}.stats.cov\n"
291344
)
292345
# Mapped rate, no dedup,dedup in MWGS (trimming has no effect)!
293-
batchfile.write(f"samtools flagstat {outbase}.bam_sort &> {outbase}.stats.map\n")
346+
batchfile.write(
347+
self._singularity_exec("samtools", f"samtools flagstat {outbase}.bam_sort")
348+
+ f" &> {outbase}.stats.map\n"
349+
)
294350
# Total reads, no dedup,dedup in MWGS (trimming has no effect)!
295-
batchfile.write(f"samtools view -c {outbase}.bam_sort &> {outbase}.stats.raw\n")
351+
batchfile.write(
352+
self._singularity_exec("samtools", f"samtools view -c {outbase}.bam_sort")
353+
+ f" &> {outbase}.stats.raw\n"
354+
)
296355

297356
batchfile.write("\n\n")
298357
batchfile.close()
@@ -301,6 +360,7 @@ def create_preprocsection(self):
301360
"""Concatinates data, possibly trims it, then makes the unstranded reads usable"""
302361
forward = list()
303362
reverse = list()
363+
304364
for root, dirs, files in os.walk(self.folders.adapters):
305365
if "NexteraPE-PE.fa" not in files:
306366
self.logger.error(
@@ -336,7 +396,17 @@ def create_preprocsection(self):
336396
ru = f"{trimdir}/{outfile}_trim_rev_unpair.fastq.gz"
337397
batchfile.write("##Trimming section\n")
338398
batchfile.write(
339-
f"trimmomatic PE -threads {self.slurm_header.threads} -phred33 {self.concat_files.get('f')} {self.concat_files.get('r')} {fp} {fu} {rp} {ru} ILLUMINACLIP:{self.folders.adapters}/NexteraPE-PE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36\n"
399+
self._singularity_exec(
400+
"trimmomatic",
401+
(
402+
f"trimmomatic PE -threads {self.slurm_header.threads}"
403+
f" -phred33 {self.concat_files.get('f')} {self.concat_files.get('r')}"
404+
f" {fp} {fu} {rp} {ru}"
405+
f" ILLUMINACLIP:{self.folders.adapters}/NexteraPE-PE.fa:2:30:10"
406+
" LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36"
407+
),
408+
)
409+
+ "\n"
340410
)
341411

342412
batchfile.write("## Interlaced trimmed files\n")
@@ -353,7 +423,11 @@ def create_assemblystats_section(self):
353423
batchfile.write("# QUAST QC metrics\n")
354424
batchfile.write(f"mkdir {self.finishdir}/assembly/quast\n")
355425
batchfile.write(
356-
f"quast.py {self.finishdir}/assembly/{self.name}_contigs.fasta -o {self.finishdir}/assembly/quast\n"
426+
self._singularity_exec(
427+
"quast",
428+
f"quast.py {self.finishdir}/assembly/{self.name}_contigs.fasta -o {self.finishdir}/assembly/quast",
429+
)
430+
+ "\n"
357431
)
358432
batchfile.write(
359433
f"mv {self.finishdir}/assembly/quast/report.tsv {self.finishdir}/assembly/quast/{self.name}_report.tsv\n\n"

0 commit comments

Comments
 (0)