@@ -95,6 +95,16 @@ def get_headerargs(self):
9595 headerline = f"-A { self .slurm_header .project } -p { self .slurm_header .type } -n { self .slurm_header .threads } -t { self .slurm_header .time } -J { self .slurm_header .job_prefix } _{ self .name } --qos { self .slurm_header .qos } --output { self .finishdir } /slurm_{ self .name } .log"
9696 return headerline
9797
98+ def _singularity_exec (self , tool , command ):
99+ """Return command wrapped with singularity exec for the given tool container."""
100+ sif = self .config ["containers" ][tool ]
101+ binary = self .config ["singularity" ]["binary" ]
102+ bind_list = list (self .config ["singularity" ].get ("bind_paths" , []))
103+ if self .finishdir and self .finishdir not in bind_list :
104+ bind_list .append (self .finishdir )
105+ bind = f"--bind { ',' .join (bind_list )} " if bind_list else ""
106+ return f"{ binary } exec { bind } { sif } { command } "
107+
98108 def verify_fastq (self ):
99109 """Uses arg indir to return a dict of PE fastq tuples fulfilling naming convention"""
100110 verified_files = list ()
@@ -177,13 +187,15 @@ def create_assemblysection(self):
177187 batchfile = open (self .batchfile , "a+" )
178188 # memory is actually 128 per node regardless of cores.
179189 batchfile .write ("# SKESA assembly\n " )
180- batchfile .write (
181- f"mkdir -p { assembly_dir } &"
190+ skesa_cmd = (
182191 f"skesa "
183192 f"--cores { self .slurm_header .threads } "
184193 f"--memory { 8 * int (self .slurm_header .threads )} "
185194 f"--contigs_out { contigs_file_raw } "
186- f"--reads { self .concat_files ['f' ]} ,{ self .concat_files ['r' ]} \n "
195+ f"--reads { self .concat_files ['f' ]} ,{ self .concat_files ['r' ]} "
196+ )
197+ batchfile .write (
198+ f"mkdir -p { assembly_dir } &" f"{ self ._singularity_exec ('skesa' , skesa_cmd )} \n "
187199 )
188200
189201 # Convert sequence naming in Skesa output into Spades format in the contigs fasta file:
@@ -232,19 +244,31 @@ def blast_subset(self, name, search_string):
232244 )
233245 if name == "mlst" :
234246 batchfile .write (
235- f"blastn -db { os .path .dirname (ref )} /{ ref_nosuf } -query { self .finishdir } /assembly/{ self .name } _contigs.fasta -out { self .finishdir } /blast_search/{ name } /loci_query_{ ref_nosuf } .txt -task megablast -num_threads { self .slurm_header .threads } -outfmt { blast_format } \n "
247+ self ._singularity_exec (
248+ "blast" ,
249+ f"blastn -db { os .path .dirname (ref )} /{ ref_nosuf } -query { self .finishdir } /assembly/{ self .name } _contigs.fasta -out { self .finishdir } /blast_search/{ name } /loci_query_{ ref_nosuf } .txt -task megablast -num_threads { self .slurm_header .threads } -outfmt { blast_format } " ,
250+ )
251+ + "\n "
236252 )
237253 else :
238254 batchfile .write (
239- f"blastn -db { os .path .dirname (ref )} /{ ref_nosuf } -query { self .finishdir } /assembly/{ self .name } _contigs.fasta -out { self .finishdir } /blast_search/{ name } /{ ref_nosuf } .txt -task megablast -num_threads { self .slurm_header .threads } -outfmt { blast_format } \n "
255+ self ._singularity_exec (
256+ "blast" ,
257+ f"blastn -db { os .path .dirname (ref )} /{ ref_nosuf } -query { self .finishdir } /assembly/{ self .name } _contigs.fasta -out { self .finishdir } /blast_search/{ name } /{ ref_nosuf } .txt -task megablast -num_threads { self .slurm_header .threads } -outfmt { blast_format } " ,
258+ )
259+ + "\n "
240260 )
241261 elif len (file_list ) == 1 :
242262 ref_nosuf = re .search (r"(\w+(?:\-\w+)*)\.\w+" , os .path .basename (file_list [0 ])).group (1 )
243263 batchfile .write (
244264 f"## BLAST { name } search in { self .sample .get ('organism' ).replace ('_' , ' ' ).capitalize ()} \n "
245265 )
246266 batchfile .write (
247- f"blastn -db { os .path .dirname (search_string )} /{ ref_nosuf } -query { self .finishdir } /assembly/{ self .name } _contigs.fasta -out { self .finishdir } /blast_search/{ name } /{ ref_nosuf } .txt -task megablast -num_threads { self .slurm_header .threads } -outfmt { blast_format } \n "
267+ self ._singularity_exec (
268+ "blast" ,
269+ f"blastn -db { os .path .dirname (search_string )} /{ ref_nosuf } -query { self .finishdir } /assembly/{ self .name } _contigs.fasta -out { self .finishdir } /blast_search/{ name } /{ ref_nosuf } .txt -task megablast -num_threads { self .slurm_header .threads } -outfmt { blast_format } " ,
270+ )
271+ + "\n "
248272 )
249273 batchfile .write ("\n " )
250274 batchfile .close ()
@@ -262,37 +286,72 @@ def create_variantsection(self):
262286
263287 batchfile .write ("## Alignment & Deduplication\n " )
264288 batchfile .write (
265- f"bwa mem -M -t { self .slurm_header .threads } { ref } { self .concat_files ['f' ]} { self .concat_files ['r' ]} > { outbase } .sam\n "
289+ self ._singularity_exec (
290+ "bwa" ,
291+ f"bwa mem -M -t { self .slurm_header .threads } { ref } { self .concat_files ['f' ]} { self .concat_files ['r' ]} > { outbase } .sam" ,
292+ )
293+ + "\n "
266294 )
267295 batchfile .write (
268- f"samtools view --threads { self .slurm_header .threads } -b -o { outbase } .bam -T { ref } { outbase } .sam\n "
296+ self ._singularity_exec (
297+ "samtools" ,
298+ f"samtools view --threads { self .slurm_header .threads } -b -o { outbase } .bam -T { ref } { outbase } .sam" ,
299+ )
300+ + "\n "
269301 )
270302 batchfile .write (
271- f"samtools sort --threads { self .slurm_header .threads } -o { outbase } .bam_sort { outbase } .bam\n "
303+ self ._singularity_exec (
304+ "samtools" ,
305+ f"samtools sort --threads { self .slurm_header .threads } -o { outbase } .bam_sort { outbase } .bam" ,
306+ )
307+ + "\n "
272308 )
273309 batchfile .write (
274- f"picard MarkDuplicates I={ outbase } .bam_sort O={ outbase } .bam_sort_rmdup M={ outbase } .stats.dup REMOVE_DUPLICATES=true\n "
310+ self ._singularity_exec (
311+ "picard" ,
312+ f"picard MarkDuplicates I={ outbase } .bam_sort O={ outbase } .bam_sort_rmdup M={ outbase } .stats.dup REMOVE_DUPLICATES=true" ,
313+ )
314+ + "\n "
275315 )
276- batchfile .write (f"samtools index { outbase } .bam_sort_rmdup\n " )
277316 batchfile .write (
278- f"samtools idxstats { outbase } .bam_sort_rmdup &> { outbase } .stats.ref\n "
317+ self ._singularity_exec ("samtools" , f"samtools index { outbase } .bam_sort_rmdup" )
318+ + "\n "
319+ )
320+ batchfile .write (
321+ self ._singularity_exec (
322+ "samtools" , f"samtools idxstats { outbase } .bam_sort_rmdup"
323+ )
324+ + f" &> { outbase } .stats.ref\n "
279325 )
280326 # Removal of temp aligment files
281327 batchfile .write (f"rm { outbase } .bam { outbase } .sam\n " )
282328
283329 batchfile .write ("## Primary stats generation\n " )
284330 # Insert stats, dedupped
285331 batchfile .write (
286- f"picard CollectInsertSizeMetrics I={ outbase } .bam_sort_rmdup O={ outbase } .stats.ins H={ outbase } .hist.ins\n "
332+ self ._singularity_exec (
333+ "picard" ,
334+ f"picard CollectInsertSizeMetrics I={ outbase } .bam_sort_rmdup O={ outbase } .stats.ins H={ outbase } .hist.ins" ,
335+ )
336+ + "\n "
287337 )
288338 # Coverage
289339 batchfile .write (
290- f"samtools stats --coverage 1,10000,1 { outbase } .bam_sort_rmdup |grep ^COV | cut -f 2- &> { outbase } .stats.cov\n "
340+ self ._singularity_exec (
341+ "samtools" , f"samtools stats --coverage 1,10000,1 { outbase } .bam_sort_rmdup"
342+ )
343+ + f" |grep ^COV | cut -f 2- &> { outbase } .stats.cov\n "
291344 )
292345 # Mapped rate, no dedup,dedup in MWGS (trimming has no effect)!
293- batchfile .write (f"samtools flagstat { outbase } .bam_sort &> { outbase } .stats.map\n " )
346+ batchfile .write (
347+ self ._singularity_exec ("samtools" , f"samtools flagstat { outbase } .bam_sort" )
348+ + f" &> { outbase } .stats.map\n "
349+ )
294350 # Total reads, no dedup,dedup in MWGS (trimming has no effect)!
295- batchfile .write (f"samtools view -c { outbase } .bam_sort &> { outbase } .stats.raw\n " )
351+ batchfile .write (
352+ self ._singularity_exec ("samtools" , f"samtools view -c { outbase } .bam_sort" )
353+ + f" &> { outbase } .stats.raw\n "
354+ )
296355
297356 batchfile .write ("\n \n " )
298357 batchfile .close ()
@@ -301,6 +360,7 @@ def create_preprocsection(self):
301360 """Concatinates data, possibly trims it, then makes the unstranded reads usable"""
302361 forward = list ()
303362 reverse = list ()
363+
304364 for root , dirs , files in os .walk (self .folders .adapters ):
305365 if "NexteraPE-PE.fa" not in files :
306366 self .logger .error (
@@ -336,7 +396,17 @@ def create_preprocsection(self):
336396 ru = f"{ trimdir } /{ outfile } _trim_rev_unpair.fastq.gz"
337397 batchfile .write ("##Trimming section\n " )
338398 batchfile .write (
339- f"trimmomatic PE -threads { self .slurm_header .threads } -phred33 { self .concat_files .get ('f' )} { self .concat_files .get ('r' )} { fp } { fu } { rp } { ru } ILLUMINACLIP:{ self .folders .adapters } /NexteraPE-PE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36\n "
399+ self ._singularity_exec (
400+ "trimmomatic" ,
401+ (
402+ f"trimmomatic PE -threads { self .slurm_header .threads } "
403+ f" -phred33 { self .concat_files .get ('f' )} { self .concat_files .get ('r' )} "
404+ f" { fp } { fu } { rp } { ru } "
405+ f" ILLUMINACLIP:{ self .folders .adapters } /NexteraPE-PE.fa:2:30:10"
406+ " LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36"
407+ ),
408+ )
409+ + "\n "
340410 )
341411
342412 batchfile .write ("## Interlaced trimmed files\n " )
@@ -353,7 +423,11 @@ def create_assemblystats_section(self):
353423 batchfile .write ("# QUAST QC metrics\n " )
354424 batchfile .write (f"mkdir { self .finishdir } /assembly/quast\n " )
355425 batchfile .write (
356- f"quast.py { self .finishdir } /assembly/{ self .name } _contigs.fasta -o { self .finishdir } /assembly/quast\n "
426+ self ._singularity_exec (
427+ "quast" ,
428+ f"quast.py { self .finishdir } /assembly/{ self .name } _contigs.fasta -o { self .finishdir } /assembly/quast" ,
429+ )
430+ + "\n "
357431 )
358432 batchfile .write (
359433 f"mv { self .finishdir } /assembly/quast/report.tsv { self .finishdir } /assembly/quast/{ self .name } _report.tsv\n \n "
0 commit comments