1111import shutil
1212import subprocess
1313import sys
14- from importlib .metadata import entry_points
1514import time
1615from datetime import datetime
16+ from importlib .metadata import entry_points
1717from pathlib import Path
1818
1919import yaml
2020
2121from microSALT import __version__
22- from microSALT .config import Folders , Threshold , SlurmHeader , Regex , PubMLSTCredentials , PasteurCredentials , Singularity , Containers
22+ from microSALT .config import (
23+ Containers ,
24+ Folders ,
25+ PasteurCredentials ,
26+ PubMLSTCredentials ,
27+ Regex ,
28+ Singularity ,
29+ SlurmHeader ,
30+ Threshold ,
31+ )
2332from microSALT .store .db_manipulator import DB_Manipulator
2433from microSALT .utils .referencer import Referencer
2534
2635
2736class Job_Creator :
28- def __init__ (self , log , folders : Folders , slurm_header : SlurmHeader , regex : Regex , dry : bool , config_path : str , threshold : Threshold , pubmlst : PubMLSTCredentials , pasteur : PasteurCredentials , singularity : Singularity , containers : Containers , sampleinfo = {}, run_settings = {}):
37+ def __init__ (
38+ self ,
39+ log ,
40+ folders : Folders ,
41+ slurm_header : SlurmHeader ,
42+ regex : Regex ,
43+ dry : bool ,
44+ config_path : str ,
45+ threshold : Threshold ,
46+ pubmlst : PubMLSTCredentials ,
47+ pasteur : PasteurCredentials ,
48+ singularity : Singularity ,
49+ containers : Containers ,
50+ sampleinfo = {},
51+ run_settings = {},
52+ ):
2953 self .folders = folders
3054 self .slurm_header = slurm_header
3155 self .regex = regex
@@ -89,7 +113,15 @@ def __init__(self, log, folders: Folders, slurm_header: SlurmHeader, regex: Rege
89113 self .finishdir = f"{ folders .results } /{ self .name } _{ self .now } "
90114 self .db_pusher = DB_Manipulator (log = log , folders = folders , threshold = threshold )
91115 self .concat_files = dict ()
92- self .ref_resolver = Referencer (log = log , folders = folders , threshold = threshold , pubmlst = pubmlst , pasteur = pasteur , singularity = singularity , containers = containers )
116+ self .ref_resolver = Referencer (
117+ log = log ,
118+ folders = folders ,
119+ threshold = threshold ,
120+ pubmlst = pubmlst ,
121+ pasteur = pasteur ,
122+ singularity = singularity ,
123+ containers = containers ,
124+ )
93125
94126 def get_sbatch (self ):
95127 """Returns sbatchfile, slightly superflous"""
@@ -141,15 +173,13 @@ def verify_fastq(self):
141173 else :
142174 pairno = 2 - 1 % int (file_match [1 ]) # 1->2, 2->1
143175 # Construct mate name
144- pairname = f"{ file_match .string [:file_match .end (1 ) - 1 ]} { pairno } { file_match .string [file_match .end (1 ): file_match .end ()]} "
176+ pairname = f"{ file_match .string [: file_match .end (1 ) - 1 ]} { pairno } { file_match .string [file_match .end (1 ) : file_match .end ()]} "
145177 if pairname in files :
146178 files .pop (files .index (pairname ))
147179 verified_files .append (file_match [0 ])
148180 verified_files .append (pairname )
149181 else :
150- raise Exception (
151- f"Some fastq files have no mate in directory { self .indir } ."
152- )
182+ raise Exception (f"Some fastq files have no mate in directory { self .indir } ." )
153183 if verified_files == []:
154184 raise Exception (
155185 f"No files in directory { self .indir } match file_pattern '{ self .regex .file_pattern } '."
@@ -163,9 +193,7 @@ def verify_fastq(self):
163193 if bsize > 1000 :
164194 self .logger .warning (f"Input fastq { vfile } exceeds 1000MB" )
165195 except Exception :
166- self .logger .warning (
167- f"Unable to verify size of input file { self .indir } /{ vfile } "
168- )
196+ self .logger .warning (f"Unable to verify size of input file { self .indir } /{ vfile } " )
169197
170198 # Warn about invalid fastq files
171199 for vfile in verified_files :
@@ -198,9 +226,7 @@ def create_assemblysection(self):
198226 f"--contigs_out { contigs_file_raw } "
199227 f"--reads { self .concat_files ['f' ]} ,{ self .concat_files ['r' ]} "
200228 )
201- batchfile .write (
202- f"mkdir -p { assembly_dir } &" f"{ self ._singularity_exec ('skesa' , skesa_cmd )} \n "
203- )
229+ batchfile .write (f"mkdir -p { assembly_dir } &{ self ._singularity_exec ('skesa' , skesa_cmd )} \n " )
204230
205231 # Convert sequence naming in Skesa output into Spades format in the contigs fasta file:
206232 # ----------------------------------------------
@@ -318,13 +344,10 @@ def create_variantsection(self):
318344 + "\n "
319345 )
320346 batchfile .write (
321- self ._singularity_exec ("samtools" , f"samtools index { outbase } .bam_sort_rmdup" )
322- + "\n "
347+ self ._singularity_exec ("samtools" , f"samtools index { outbase } .bam_sort_rmdup" ) + "\n "
323348 )
324349 batchfile .write (
325- self ._singularity_exec (
326- "samtools" , f"samtools idxstats { outbase } .bam_sort_rmdup"
327- )
350+ self ._singularity_exec ("samtools" , f"samtools idxstats { outbase } .bam_sort_rmdup" )
328351 + f" &> { outbase } .stats.ref\n "
329352 )
330353 # Removal of temp aligment files
@@ -365,13 +388,6 @@ def create_preprocsection(self):
365388 forward = list ()
366389 reverse = list ()
367390
368- for root , dirs , files in os .walk (self .folders .adapters ):
369- if "NexteraPE-PE.fa" not in files :
370- self .logger .error (
371- "Adapters folder at {} does not contain NexteraPE-PE.fa. Review paths.yml"
372- )
373- else :
374- break
375391 trimdir = f"{ self .finishdir } /trimmed"
376392 files = self .verify_fastq ()
377393 batchfile = open (self .batchfile , "a+" )
@@ -406,7 +422,7 @@ def create_preprocsection(self):
406422 f"trimmomatic PE -threads { self .slurm_header .threads } "
407423 f" -phred33 { self .concat_files .get ('f' )} { self .concat_files .get ('r' )} "
408424 f" { fp } { fu } { rp } { ru } "
409- f" ILLUMINACLIP:{ self .folders . adapters } /NexteraPE-PE.fa:2:30:10"
425+ f" ILLUMINACLIP:{ self .singularity . trimmomatic_adapters } /NexteraPE-PE.fa:2:30:10"
410426 " LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36"
411427 ),
412428 )
@@ -579,9 +595,7 @@ def project_job(self, single_sample=False):
579595 else :
580596 self .create_project (self .name )
581597 except Exception :
582- self .logger .error (
583- f"LIMS interaction failed. Unable to read/write project { self .name } "
584- )
598+ self .logger .error (f"LIMS interaction failed. Unable to read/write project { self .name } " )
585599 # Writes the job creation sbatch
586600 if single_sample :
587601 try :
@@ -648,7 +662,9 @@ def project_job(self, single_sample=False):
648662
649663 def _write_mailjob (self , mailfile : str , report : str , custom_conf : str ) -> None :
650664 """Write the mailjob.sh script that runs `microsalt utils finish` after all jobs complete."""
651- _ep = next (ep for ep in entry_points (group = "console_scripts" ) if ep .value == "microSALT.cli:root" )
665+ _ep = next (
666+ ep for ep in entry_points (group = "console_scripts" ) if ep .value == "microSALT.cli:root"
667+ )
652668 microsalt_bin = Path (sys .executable ).parent / _ep .name
653669 with open (mailfile , "w+" ) as mb :
654670 mb .write ("#!/usr/bin/env bash\n \n " )
@@ -788,9 +804,7 @@ def sample_job(self):
788804 except Exception :
789805 self .logger .error (f"Unable to access LIMS info for sample { self .name } " )
790806 except Exception as e :
791- self .logger .error (
792- f"Unable to create job for sample { self .name } \n Source: { e !s} "
793- )
807+ self .logger .error (f"Unable to create job for sample { self .name } \n Source: { e !s} " )
794808 shutil .rmtree (self .finishdir , ignore_errors = True )
795809 raise
796810
@@ -824,9 +838,7 @@ def snp_job(self):
824838 batchfile = open (self .batchfile , "a+" )
825839 batchfile .close ()
826840
827- headerline = (
828- f"-A { self .slurm_header .project } -p { self .slurm_header .type } -n 1 -t 24:00:00 -J { self .slurm_header .job_prefix } _{ self .name } --qos { self .slurm_header .qos } --output { self .finishdir } /slurm_{ self .name } .log"
829- )
841+ headerline = f"-A { self .slurm_header .project } -p { self .slurm_header .type } -n 1 -t 24:00:00 -J { self .slurm_header .job_prefix } _{ self .name } --qos { self .slurm_header .qos } --output { self .finishdir } /slurm_{ self .name } .log"
830842 outfile = self .get_sbatch ()
831843 bash_cmd = f"sbatch { headerline } { outfile } "
832844 samproc = subprocess .Popen (bash_cmd .split (), stdout = subprocess .PIPE )
0 commit comments