Skip to content

Commit 679509a

Browse files
committed
remove adapters from folder
1 parent 22db7e8 commit 679509a

File tree

8 files changed

+77
-62
lines changed

8 files changed

+77
-62
lines changed

configExample.json

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,7 @@
3434
"_comment": "Download path for NCBI genomes, for alignment usage",
3535
"genomes": "/tmp/MLST/references/genomes",
3636
"_comment": "Credentials",
37-
"credentials": "/tmp/MLST/credentials",
38-
"_comment": "Trimmomatic adapter FASTA files directory",
39-
"adapters": "/path/to/trimmomatic/adapters/"
37+
"credentials": "/tmp/MLST/credentials"
4038
},
4139
"_comment": "Database/Flask configuration",
4240
"database": {
@@ -90,12 +88,13 @@
9088
},
9189
"_comment": "Absolute paths to pre-built Singularity SIF images on the cluster",
9290
"containers": {
93-
"skesa": "/fs1/resources/containers/skesa_2.5.1.sif",
94-
"blast": "/fs1/resources/containers/blast_2.12.0.sif",
95-
"bwa": "/fs1/resources/containers/bwa_0.7.17.sif",
96-
"samtools": "/fs1/resources/containers/samtools_1.13.sif",
97-
"picard": "/fs1/resources/containers/picard_2.20.3.sif",
91+
"skesa": "/fs1/resources/containers/skesa_2.5.1.sif",
92+
"blast": "/fs1/resources/containers/blast_2.12.0.sif",
93+
"bwa": "/fs1/resources/containers/bwa_0.7.17.sif",
94+
"samtools": "/fs1/resources/containers/samtools_1.13.sif",
95+
"picard": "/fs1/resources/containers/picard_2.20.3.sif",
9896
"trimmomatic": "/fs1/resources/containers/trimmomatic_0.39.sif",
99-
"quast": "/fs1/resources/containers/quast_5.3.0.sif"
97+
"quast": "/fs1/resources/containers/quast_5.3.0.sif"
10098
}
101-
}
99+
}
100+

microSALT/cli.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,11 @@
1616
from microSALT import __version__, logging_levels, setup_logger
1717
from microSALT.config import MicroSALTConfig, load_config
1818
from microSALT.exc.exceptions import RefUpdateLockError
19-
from microSALT.store.database import create_tables, get_scoped_session_registry, initialize_database
19+
from microSALT.store.database import (
20+
create_tables,
21+
get_scoped_session_registry,
22+
initialize_database,
23+
)
2024
from microSALT.utils.job_creator import Job_Creator
2125
from microSALT.utils.referencer import Referencer
2226
from microSALT.utils.reporter import Reporter
@@ -110,7 +114,6 @@ def _ensure_directories(config: MicroSALTConfig) -> None:
110114
config.folders.resistances,
111115
config.folders.genomes,
112116
config.folders.credentials,
113-
config.folders.adapters,
114117
]
115118
for path in folder_paths:
116119
p = pathlib.Path(os.path.expandvars(os.path.expanduser(path)))

microSALT/config.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ class Folders(BaseModel):
3030
resistances: str
3131
genomes: str
3232
credentials: str
33-
adapters: str
3433
expec: str = "" # filled in after construction
3534

3635

@@ -69,9 +68,11 @@ class BIGSdbCredentials(BaseModel):
6968
client_id: str = ""
7069
client_secret: str = ""
7170

71+
7272
class PubMLSTCredentials(BIGSdbCredentials):
7373
pass
7474

75+
7576
class PasteurCredentials(BIGSdbCredentials):
7677
pass
7778

microSALT/utils/job_creator.py

Lines changed: 49 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -11,21 +11,45 @@
1111
import shutil
1212
import subprocess
1313
import sys
14-
from importlib.metadata import entry_points
1514
import time
1615
from datetime import datetime
16+
from importlib.metadata import entry_points
1717
from pathlib import Path
1818

1919
import yaml
2020

2121
from microSALT import __version__
22-
from microSALT.config import Folders, Threshold, SlurmHeader, Regex, PubMLSTCredentials, PasteurCredentials, Singularity, Containers
22+
from microSALT.config import (
23+
Containers,
24+
Folders,
25+
PasteurCredentials,
26+
PubMLSTCredentials,
27+
Regex,
28+
Singularity,
29+
SlurmHeader,
30+
Threshold,
31+
)
2332
from microSALT.store.db_manipulator import DB_Manipulator
2433
from microSALT.utils.referencer import Referencer
2534

2635

2736
class Job_Creator:
28-
def __init__(self, log, folders: Folders, slurm_header: SlurmHeader, regex: Regex, dry: bool, config_path: str, threshold: Threshold, pubmlst: PubMLSTCredentials, pasteur: PasteurCredentials, singularity: Singularity, containers: Containers, sampleinfo={}, run_settings={}):
37+
def __init__(
38+
self,
39+
log,
40+
folders: Folders,
41+
slurm_header: SlurmHeader,
42+
regex: Regex,
43+
dry: bool,
44+
config_path: str,
45+
threshold: Threshold,
46+
pubmlst: PubMLSTCredentials,
47+
pasteur: PasteurCredentials,
48+
singularity: Singularity,
49+
containers: Containers,
50+
sampleinfo={},
51+
run_settings={},
52+
):
2953
self.folders = folders
3054
self.slurm_header = slurm_header
3155
self.regex = regex
@@ -89,7 +113,15 @@ def __init__(self, log, folders: Folders, slurm_header: SlurmHeader, regex: Rege
89113
self.finishdir = f"{folders.results}/{self.name}_{self.now}"
90114
self.db_pusher = DB_Manipulator(log=log, folders=folders, threshold=threshold)
91115
self.concat_files = dict()
92-
self.ref_resolver = Referencer(log=log, folders=folders, threshold=threshold, pubmlst=pubmlst, pasteur=pasteur, singularity=singularity, containers=containers)
116+
self.ref_resolver = Referencer(
117+
log=log,
118+
folders=folders,
119+
threshold=threshold,
120+
pubmlst=pubmlst,
121+
pasteur=pasteur,
122+
singularity=singularity,
123+
containers=containers,
124+
)
93125

94126
def get_sbatch(self):
95127
"""Returns sbatchfile, slightly superflous"""
@@ -141,15 +173,13 @@ def verify_fastq(self):
141173
else:
142174
pairno = 2 - 1 % int(file_match[1]) # 1->2, 2->1
143175
# Construct mate name
144-
pairname = f"{file_match.string[:file_match.end(1) - 1]}{pairno}{file_match.string[file_match.end(1):file_match.end()]}"
176+
pairname = f"{file_match.string[: file_match.end(1) - 1]}{pairno}{file_match.string[file_match.end(1) : file_match.end()]}"
145177
if pairname in files:
146178
files.pop(files.index(pairname))
147179
verified_files.append(file_match[0])
148180
verified_files.append(pairname)
149181
else:
150-
raise Exception(
151-
f"Some fastq files have no mate in directory {self.indir}."
152-
)
182+
raise Exception(f"Some fastq files have no mate in directory {self.indir}.")
153183
if verified_files == []:
154184
raise Exception(
155185
f"No files in directory {self.indir} match file_pattern '{self.regex.file_pattern}'."
@@ -163,9 +193,7 @@ def verify_fastq(self):
163193
if bsize > 1000:
164194
self.logger.warning(f"Input fastq {vfile} exceeds 1000MB")
165195
except Exception:
166-
self.logger.warning(
167-
f"Unable to verify size of input file {self.indir}/{vfile}"
168-
)
196+
self.logger.warning(f"Unable to verify size of input file {self.indir}/{vfile}")
169197

170198
# Warn about invalid fastq files
171199
for vfile in verified_files:
@@ -198,9 +226,7 @@ def create_assemblysection(self):
198226
f"--contigs_out {contigs_file_raw} "
199227
f"--reads {self.concat_files['f']},{self.concat_files['r']}"
200228
)
201-
batchfile.write(
202-
f"mkdir -p {assembly_dir} &" f"{self._singularity_exec('skesa', skesa_cmd)}\n"
203-
)
229+
batchfile.write(f"mkdir -p {assembly_dir} &{self._singularity_exec('skesa', skesa_cmd)}\n")
204230

205231
# Convert sequence naming in Skesa output into Spades format in the contigs fasta file:
206232
# ----------------------------------------------
@@ -318,13 +344,10 @@ def create_variantsection(self):
318344
+ "\n"
319345
)
320346
batchfile.write(
321-
self._singularity_exec("samtools", f"samtools index {outbase}.bam_sort_rmdup")
322-
+ "\n"
347+
self._singularity_exec("samtools", f"samtools index {outbase}.bam_sort_rmdup") + "\n"
323348
)
324349
batchfile.write(
325-
self._singularity_exec(
326-
"samtools", f"samtools idxstats {outbase}.bam_sort_rmdup"
327-
)
350+
self._singularity_exec("samtools", f"samtools idxstats {outbase}.bam_sort_rmdup")
328351
+ f" &> {outbase}.stats.ref\n"
329352
)
330353
# Removal of temp aligment files
@@ -365,13 +388,6 @@ def create_preprocsection(self):
365388
forward = list()
366389
reverse = list()
367390

368-
for root, dirs, files in os.walk(self.folders.adapters):
369-
if "NexteraPE-PE.fa" not in files:
370-
self.logger.error(
371-
"Adapters folder at {} does not contain NexteraPE-PE.fa. Review paths.yml"
372-
)
373-
else:
374-
break
375391
trimdir = f"{self.finishdir}/trimmed"
376392
files = self.verify_fastq()
377393
batchfile = open(self.batchfile, "a+")
@@ -406,7 +422,7 @@ def create_preprocsection(self):
406422
f"trimmomatic PE -threads {self.slurm_header.threads}"
407423
f" -phred33 {self.concat_files.get('f')} {self.concat_files.get('r')}"
408424
f" {fp} {fu} {rp} {ru}"
409-
f" ILLUMINACLIP:{self.folders.adapters}/NexteraPE-PE.fa:2:30:10"
425+
f" ILLUMINACLIP:{self.singularity.trimmomatic_adapters}/NexteraPE-PE.fa:2:30:10"
410426
" LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36"
411427
),
412428
)
@@ -579,9 +595,7 @@ def project_job(self, single_sample=False):
579595
else:
580596
self.create_project(self.name)
581597
except Exception:
582-
self.logger.error(
583-
f"LIMS interaction failed. Unable to read/write project {self.name}"
584-
)
598+
self.logger.error(f"LIMS interaction failed. Unable to read/write project {self.name}")
585599
# Writes the job creation sbatch
586600
if single_sample:
587601
try:
@@ -648,7 +662,9 @@ def project_job(self, single_sample=False):
648662

649663
def _write_mailjob(self, mailfile: str, report: str, custom_conf: str) -> None:
650664
"""Write the mailjob.sh script that runs `microsalt utils finish` after all jobs complete."""
651-
_ep = next(ep for ep in entry_points(group="console_scripts") if ep.value == "microSALT.cli:root")
665+
_ep = next(
666+
ep for ep in entry_points(group="console_scripts") if ep.value == "microSALT.cli:root"
667+
)
652668
microsalt_bin = Path(sys.executable).parent / _ep.name
653669
with open(mailfile, "w+") as mb:
654670
mb.write("#!/usr/bin/env bash\n\n")
@@ -788,9 +804,7 @@ def sample_job(self):
788804
except Exception:
789805
self.logger.error(f"Unable to access LIMS info for sample {self.name}")
790806
except Exception as e:
791-
self.logger.error(
792-
f"Unable to create job for sample {self.name}\nSource: {e!s}"
793-
)
807+
self.logger.error(f"Unable to create job for sample {self.name}\nSource: {e!s}")
794808
shutil.rmtree(self.finishdir, ignore_errors=True)
795809
raise
796810

@@ -824,9 +838,7 @@ def snp_job(self):
824838
batchfile = open(self.batchfile, "a+")
825839
batchfile.close()
826840

827-
headerline = (
828-
f"-A {self.slurm_header.project} -p {self.slurm_header.type} -n 1 -t 24:00:00 -J {self.slurm_header.job_prefix}_{self.name} --qos {self.slurm_header.qos} --output {self.finishdir}/slurm_{self.name}.log"
829-
)
841+
headerline = f"-A {self.slurm_header.project} -p {self.slurm_header.type} -n 1 -t 24:00:00 -J {self.slurm_header.job_prefix}_{self.name} --qos {self.slurm_header.qos} --output {self.finishdir}/slurm_{self.name}.log"
830842
outfile = self.get_sbatch()
831843
bash_cmd = f"sbatch {headerline} {outfile}"
832844
samproc = subprocess.Popen(bash_cmd.split(), stdout=subprocess.PIPE)

scripts/compare_typing_reports.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,6 @@ def parse_report(path: Path) -> Report:
139139
# Each sample detail section has an overview table containing "CG Prov ID".
140140
# We walk all tables and use context to pair MLST/resistance tables with
141141
# the sample they belong to.
142-
current_cg_id: str | None = None
143142
current_detail: SampleDetail | None = None
144143

145144
for table in soup.find_all("table"):
@@ -258,6 +257,11 @@ def compare_reports(r1: Report, r2: Report) -> int:
258257
# MLST allele comparison
259258
d1 = r1.details.get(cg_id)
260259
d2 = r2.details.get(cg_id)
260+
if d1 is None or d2 is None:
261+
missing = "A" if d1 is None else "B"
262+
sample_diffs.append(
263+
f" {_YELLOW}WARNING: per-sample detail section missing in report {missing}{_RESET}"
264+
)
261265
if d1 and d2:
262266
loci1 = {m.loci: m.allele for m in d1.mlst}
263267
loci2 = {m.loci: m.allele for m in d2.mlst}

tests/conftest.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
import json
22
import logging
33
import pathlib
4-
import pytest
54
from importlib.resources import files as resource_files
65

6+
import pytest
7+
8+
from microSALT import setup_logger
79
from microSALT.config import (
810
Containers,
911
Database,
@@ -16,7 +18,6 @@
1618
SlurmHeader,
1719
Threshold,
1820
)
19-
from microSALT import setup_logger
2021
from microSALT.store.database import initialize_database
2122
from microSALT.store.db_manipulator import DB_Manipulator
2223

@@ -65,7 +66,6 @@ def config(tmp_path_factory: pytest.TempPathFactory) -> MicroSALTConfig:
6566
resistances=str(resistances),
6667
genomes=str(genomes),
6768
credentials=str(credentials),
68-
adapters="/path/to/trimmomatic/adapters/",
6969
),
7070
database=Database(
7171
SQLALCHEMY_DATABASE_URI=f"sqlite:///{db_path}",
@@ -78,9 +78,7 @@ def config(tmp_path_factory: pytest.TempPathFactory) -> MicroSALTConfig:
7878
singularity=Singularity(),
7979
containers=Containers(),
8080
)
81-
cfg.folders.expec = str(
82-
resource_files("microSALT").joinpath("unique_references", "ExPEC.fsa")
83-
)
81+
cfg.folders.expec = str(resource_files("microSALT").joinpath("unique_references", "ExPEC.fsa"))
8482
cfg.config_path = str(base / "config.json")
8583

8684
setup_logger(logging_level="INFO")

tests/test_cli.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@ def setup_config(tmp_path: Path) -> MicroSALTConfig:
5858
resistances=str(base / "resistances"),
5959
genomes=str(base / "genomes"),
6060
credentials=str(base / "credentials"),
61-
adapters=str(base / "adapters"),
6261
),
6362
database=Database(SQLALCHEMY_DATABASE_URI=f"sqlite:///{base / 'microsalt.db'}"),
6463
threshold=Threshold(),

tests/test_config.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,13 @@
44
from pydantic import ValidationError
55

66
from microSALT.config import (
7-
MicroSALTConfig,
8-
load_config,
7+
Database,
98
Folders,
9+
MicroSALTConfig,
1010
Regex,
1111
SlurmHeader,
1212
Threshold,
13-
Database,
13+
load_config,
1414
)
1515

1616
CONFIGEXAMPLE = str(pathlib.Path(__file__).parent.parent / "configExample.json")
@@ -53,7 +53,6 @@ def test_folders_fields():
5353
assert cfg.folders.resistances
5454
assert cfg.folders.genomes
5555
assert cfg.folders.credentials
56-
assert cfg.folders.adapters
5756

5857

5958
def test_expec_path_injected():

0 commit comments

Comments
 (0)