Skip to content

Commit fdf6e6d

Browse files
Merge pull request #542 from fritzsedlazeck/master-pub
Merged version 2.6.0
2 parents 0b36db9 + 28d6fa5 commit fdf6e6d

File tree

14 files changed

+539
-148
lines changed

14 files changed

+539
-148
lines changed

.github/workflows/release.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ jobs:
2020
- name: Build a binary wheel and a source tarball
2121
run: python3 -m build
2222
- name: Store the distribution packages
23-
uses: actions/upload-artifact@v3
23+
uses: actions/upload-artifact@v4
2424
with:
2525
name: python-package-distributions
2626
path: dist/
@@ -40,7 +40,7 @@ jobs:
4040

4141
steps:
4242
- name: Download all the dists
43-
uses: actions/download-artifact@v3
43+
uses: actions/download-artifact@v4
4444
with:
4545
name: python-package-distributions
4646
path: dist/

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,11 @@ You can install Sniffles2 using pip or conda using:
1717

1818
or
1919

20-
`conda install sniffles=2.5.3`
20+
`conda install sniffles=2.6.0`
2121

2222
If you previously installed Sniffles1 using conda and want to upgrade to Sniffles2, you can use:
2323

24-
`conda update sniffles=2.5.3`
24+
`conda update sniffles=2.6.0`
2525

2626
## Requirements
2727
* Python ==3.10.15

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[metadata]
22
name = sniffles
3-
version = 2.5.3
3+
version = 2.6.0
44
author = Moritz Smolka, Hermann Romanek
55
author_email = moritz.g.smolka@gmail.com, sniffles@romanek.at
66
description = A fast structural variation caller for long-read sequencing data

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
setup(
44
name='sniffles',
5-
version='2.5.3',
5+
version='2.6.0',
66
packages=find_packages(),
77
url='https://github.com/fritzsedlazeck/Sniffles',
88
license='MIT',

src/sniffles/config.py

Lines changed: 40 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import sys
1414
import datetime
1515
import argparse
16+
import tempfile
1617
from collections import defaultdict
1718

1819
from typing import Union, Optional
@@ -21,7 +22,7 @@
2122
from sniffles.region import Region
2223

2324
VERSION = "Sniffles2"
24-
BUILD = "2.5.3"
25+
BUILD = "2.6.0"
2526
SNF_VERSION = "S2_rc4"
2627

2728

@@ -41,6 +42,8 @@ def tobool(v):
4142

4243

4344
class SnifflesConfig(argparse.Namespace):
45+
GLOBAL: 'SnifflesConfig'
46+
4447
header = f"Sniffles2: A fast structural variant (SV) caller for long-read sequencing data\n Version {BUILD}\n Contact: sniffles@romanek.at"
4548
example = """ Usage example A - Call SVs for a single sample:
4649
sniffles --input sorted_indexed_alignments.bam --vcf output.vcf
@@ -91,6 +94,7 @@ def sort(self):
9194
threads: int
9295
contig: Optional[str]
9396
run_id: str
97+
tmp_dir: str
9498

9599
@property
96100
def vcf_output_bgz(self) -> Optional[bool]:
@@ -101,8 +105,8 @@ def vcf_output_bgz(self) -> Optional[bool]:
101105
path, ext = os.path.splitext(self.vcf)
102106
return ext == ".gz" or ext == ".bgz"
103107

104-
105-
def add_main_args(self, parser):
108+
@staticmethod
109+
def add_main_args(parser):
106110
main_args = parser.add_argument_group("Common parameters")
107111
main_args.add_argument("-i", "--input", metavar="IN", type=str, help="For single-sample calling: A coordinate-sorted and indexed .bam/.cram (BAM/CRAM format) file containing aligned reads. - OR - For multi-sample calling: Multiple .snf files (generated before by running Sniffles2 for individual samples with --snf)", required=True, nargs="+")
108112
main_args.add_argument("-v", "--vcf", metavar="OUT.vcf", type=str, help="VCF output filename to write the called and refined SVs to. If the given filename ends with .gz, the VCF file will be automatically bgzipped and a .tbi index built for it.", required=False)
@@ -113,12 +117,15 @@ def add_main_args(self, parser):
113117
main_args.add_argument("-t", "--threads", metavar="N", type=int, help="Number of parallel threads to use (speed-up for multi-core CPUs)", default=4)
114118
main_args.add_argument("-c", "--contig", default=None, type=str, help="(Optional) Only process the specified contigs. May be given more than once.", action="append")
115119
main_args.add_argument("--regions", metavar="REGIONS.bed", type=str, help="(Optional) Only process the specified regions.", default=None)
120+
main_args.add_argument("--tmp-dir", type=str, help="(Optional) Directory where temporary files are written, must exist. If it doesn't, default path is used", default="")
116121

117122
minsupport: Union[str, int]
118123
minsvlen: int
119124
minsvlen_screen_ratio: float
125+
max_unknown_pct: float
120126

121-
def add_filter_args(self, parser):
127+
@staticmethod
128+
def add_filter_args(parser):
122129
filter_args = parser.add_argument_group("SV Filtering parameters")
123130
filter_args.add_argument("--minsupport", metavar="auto", type=str, help="Minimum number of supporting reads for a SV to be reported (default: automatically choose based on coverage)", default="auto")
124131
filter_args.add_argument("--minsupport-auto-mult", metavar="0.1/0.025", type=float, help="Coverage based minimum support multiplier for germline mode (only for auto minsupport) ", default=None)
@@ -143,12 +150,14 @@ def add_filter_args(self, parser):
143150
filter_args.add_argument("--min-alignment-length", metavar="N", type=int, help="Reads with alignments shorter than this length (in bp) will be ignored", default=argparse.SUPPRESS)
144151
filter_args.add_argument("--phase-conflict-threshold", metavar="F", type=float, help="Maximum fraction of conflicting reads permitted for SV phase information to be labelled as PASS (only for --phase)", default=0.1)
145152
filter_args.add_argument("--detect-large-ins", help="Infer insertions that are longer than most reads and therefore are spanned by few alignments only.", metavar="True", type=tobool, default=True)
153+
filter_args.add_argument("--max-unknown-pct", help="Maximum percentage of N for an SV to be emitted.", metavar="0.5", type=float, default=0.5)
146154
# filter_args.add_argument("--large-ins-threshold", metavar="N", type=int, help="Minimum clipping at read ends to be considered a potential large insertion (only with --detect-large-ins)", default=5000)
147155

148156
cluster_binsize: int
149157
cluster_binsize_combine_mult: int
150158

151-
def add_cluster_args(self, parser):
159+
@staticmethod
160+
def add_cluster_args(parser):
152161
cluster_args = parser.add_argument_group("SV Clustering parameters")
153162
cluster_args.add_argument("--cluster-binsize", metavar="N", type=int, help="Initial screening bin size in bp", default=100)
154163
cluster_args.add_argument("--cluster-r", metavar="R", type=float, help="Multiplier for SV start position standard deviation criterion in cluster merging", default=2.5)
@@ -161,7 +170,8 @@ def add_cluster_args(self, parser):
161170
genotype_ploidy: int
162171
genotype_vcf: str
163172

164-
def add_genotype_args(self, parser):
173+
@staticmethod
174+
def add_genotype_args(parser):
165175
genotype_args = parser.add_argument_group("SV Genotyping parameters")
166176
genotype_args.add_argument("--genotype-ploidy", metavar="N", type=int, help="Sample ploidy (currently fixed at value 2)", default=2)
167177
genotype_args.add_argument("--genotype-error", metavar="N", type=float, help="Estimated false positive rate for leads (relating to total coverage)", default=0.05)
@@ -182,6 +192,7 @@ def add_genotype_args(self, parser):
182192
combine_pctseq: float
183193
combine_max_inmemory_results: int
184194
combine_support_threshold: int
195+
combine_population: str
185196

186197
@classmethod
187198
def add_multi_args(cls, parser):
@@ -203,6 +214,7 @@ def add_multi_args(cls, parser):
203214
multi_args.add_argument("--combine-pctseq", default=0.7, type=float, help="Minimum alignment distance as percent of SV length to be merged. Set to 0 to disable alignments for merging.")
204215
multi_args.add_argument("--combine-max-inmemory-results", default=20, type=int, help=argparse.SUPPRESS)
205216
multi_args.add_argument("--combine-support-threshold", default=3, metavar="N", type=int, help="Minimum support for SVs to be considered for multi-sample calling.")
217+
multi_args.add_argument("--combine-population", metavar="population.snf", type=str, help="Name of a population SNF to enable population annotation.")
206218
multi_args.add_argument("--re-qc", metavar="auto", default="auto", type=str, help="Re-QC SVs from SNF files. Set to 0 to disable re-qc of SNF files. Set to 1 to force re-qc. Default of 'auto' will try to fix known errors in SNF files.")
207219

208220
# multi_args.add_argument("--combine-exhaustive", help="(DEV) Disable performance optimization in multi-calling", default=False, action="store_true")
@@ -211,7 +223,11 @@ def add_multi_args(cls, parser):
211223

212224
allow_overwrite: bool
213225

214-
def add_postprocess_args(self, parser):
226+
@staticmethod
227+
def add_postprocess_args(parser):
228+
"""
229+
Postprocessing arguments
230+
"""
215231
postprocess_args = parser.add_argument_group("SV Postprocessing, QC and output parameters")
216232
postprocess_args.add_argument("--output-rnames", help="Output names of all supporting reads for each SV in the RNAMEs info field", default=False, action="store_true")
217233
postprocess_args.add_argument("--no-consensus", help="Disable consensus sequence generation for insertion SV calls (may improve performance)", default=False, action="store_true")
@@ -228,7 +244,8 @@ def add_postprocess_args(self, parser):
228244
mosaic_min_reads: int = 3
229245
mosaic_use_strand_thresholds: int = 10
230246

231-
def add_mosaic_args(self, parser):
247+
@staticmethod
248+
def add_mosaic_args(parser):
232249
mosaic_args = parser.add_argument_group("Mosaic calling mode parameters")
233250
mosaic_args.add_argument("--mosaic", help="Set Sniffles run mode to detect rare, somatic and mosaic SVs", default=False, action="store_true")
234251
mosaic_args.add_argument("--mosaic-af-max", help="Maximum allele frequency for which SVs are considered mosaic", metavar="F", default=0.218, type=float)
@@ -243,8 +260,15 @@ def add_mosaic_args(self, parser):
243260
qc_nm: bool
244261
combine_consensus: bool
245262
low_memory: bool
263+
dev_population_snf: str
264+
dev_population_min_gt: float
265+
dev_debug: int
246266

247-
def add_developer_args(self, parser):
267+
@staticmethod
268+
def add_developer_args(parser):
269+
"""
270+
Developer arguments
271+
"""
248272
developer_args = parser.add_argument_group("Developer parameters")
249273

250274
developer_args.add_argument("--dev-emit-sv-lengths", default=False, action="store_true", help=argparse.SUPPRESS)
@@ -275,13 +299,16 @@ def add_developer_args(self, parser):
275299
developer_args.add_argument("--cluster-resplit-binsize", metavar="N", type=int, default=20, help=argparse.SUPPRESS)
276300
developer_args.add_argument("--dev-trace-read", default=False, metavar="read_id", type=str, help=argparse.SUPPRESS)
277301
developer_args.add_argument("--dev-split-max-query-distance-mult", metavar="N", type=int, default=5, help=argparse.SUPPRESS)
278-
developer_args.add_argument("--dev-no-qc", default=False, action="store_true", help=argparse.SUPPRESS) # noqc + mapq0 + minAlnLen0
302+
developer_args.add_argument("--dev-no-qc", default=False, action="store_true", help=argparse.SUPPRESS) # noqc + mapq0 + minAlnLen0
279303
developer_args.add_argument("--dev-disable-interblock-threads", default=False, help=argparse.SUPPRESS, action="store_true")
280304
developer_args.add_argument("--dev-combine-medians", default=False, help=argparse.SUPPRESS, action="store_true")
281305
developer_args.add_argument("--dev-monitor-memory", metavar="N", type=int, default=0, help=argparse.SUPPRESS)
282306
developer_args.add_argument("--dev-monitor-filename", metavar="memory.csv", type=str, help=argparse.SUPPRESS)
283307
developer_args.add_argument("--dev-debug-log", default=False, action="store_true", help=argparse.SUPPRESS)
284308
developer_args.add_argument("--dev-progress-log", default=False, action="store_true", help=argparse.SUPPRESS)
309+
developer_args.add_argument("--dev-population-snf", metavar="population.snf", type=str, help=argparse.SUPPRESS)
310+
developer_args.add_argument("--dev-population-min-gt", default=0.75, type=float, help=argparse.SUPPRESS) # min
311+
developer_args.add_argument("--dev-debug", default=0, type=int, help=argparse.SUPPRESS) # Enable debug connection on the given port
285312

286313
# developer_args.add_argument("--qc-strand", help="(DEV)", default=False, action="store_true")
287314

@@ -302,6 +329,9 @@ def __init__(self, *args, **kwargs):
302329

303330
parser.parse_args(args=args or None, namespace=self)
304331

332+
if not self.tmp_dir or not os.path.exists(self.tmp_dir):
333+
self.tmp_dir = tempfile.gettempdir()
334+
305335
if self.quiet:
306336
sys.stdout = open(os.devnull, "w")
307337

src/sniffles/genotyping.py

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,14 @@
1212
Genotyping
1313
"""
1414
import math
15+
from dataclasses import dataclass
16+
from typing import Any
1517

1618
from sniffles.postprocessing import rescale_support
1719
from sniffles.sv import SVCall
1820

1921

20-
class UnknownGenotype(Exception):
22+
class UnknownGenotypeError(Exception):
2123
"""
2224
Unable to determine genotype
2325
"""
@@ -41,6 +43,23 @@ def likelihood_ratio(q1, q2):
4143
return 0
4244

4345

46+
class UnknownGenotype:
47+
...
48+
49+
50+
@dataclass
51+
class Genotype:
52+
a: int
53+
b: int
54+
qual: int # GQ, 0-60
55+
dr: int
56+
dv: int
57+
phase: Any
58+
59+
UNKNOWN = UnknownGenotype()
60+
61+
62+
4463
class Genotyper:
4564
"""
4665
Generic genotyping
@@ -85,9 +104,16 @@ def _get_coverage_from_list(self, coverage_list: list = None) -> int:
85104
coverage_list = [each_coverage for each_coverage in coverage_list if each_coverage != 0]
86105

87106
if len(coverage_list) > 0:
88-
return round(sum(coverage_list) / len(coverage_list))
107+
if None in coverage_list:
108+
new_coverage_list = [cov_value for cov_value in coverage_list if cov_value is not None]
109+
if len(new_coverage_list) > 0:
110+
return round(sum(new_coverage_list) / len(new_coverage_list))
111+
else:
112+
raise UnknownGenotypeError()
113+
else:
114+
return round(sum(coverage_list) / len(coverage_list))
89115
else:
90-
raise UnknownGenotype()
116+
raise UnknownGenotypeError()
91117

92118
def _filter_by_z_score(self, z_score: float) -> bool:
93119
"""
@@ -106,7 +132,7 @@ def calculate(self):
106132
support = self._calculate_support()
107133
try:
108134
coverage = self._calculate_coverage(support)
109-
except UnknownGenotype:
135+
except UnknownGenotypeError:
110136
return
111137

112138
if support > coverage:
@@ -191,7 +217,13 @@ def _calculate_coverage(self, support: int) -> int:
191217

192218

193219
class DeletionGenotyper(Genotyper):
194-
...
220+
221+
def _calculate_coverage(self, support: int) -> int:
222+
svcall = self.svcall
223+
if support_sa := svcall.get_info('SUPPORT_SA'):
224+
return self._get_coverage_from_list([svcall.coverage_start + support_sa, svcall.coverage_center + support_sa, svcall.coverage_end + support_sa])
225+
else:
226+
return super()._calculate_coverage(support)
195227

196228

197229
GENOTYPER_BY_TYPE = {

src/sniffles/leadprov.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828

2929
@dataclass
3030
class Lead:
31-
read_id: int = None
31+
read_id: int = None # or tuple[int, str, str] for phased reads, with (read_id, HP, PS)
3232
read_qname: str = None
3333
contig: str = None
3434
ref_start: int = None
@@ -440,7 +440,7 @@ def read_itersplits(read_id, read, contig, config, read_nm):
440440
if trace_read:
441441
print(f"[DEV_TRACE_READ] [0c/4] [LeadProvider.read_itersplits] [{read.query_name}] all_leads: {all_leads}")
442442

443-
sv.classify_splits(read, all_leads, config, contig)
443+
all_leads = sv.classify_splits(read, all_leads, config, contig)
444444

445445
if trace_read:
446446
print(f"[DEV_TRACE_READ] [0c/4] [LeadProvider.read_itersplits] [{read.query_name}] classify_splits(all_leads): {all_leads}")

src/sniffles/parallel.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from sniffles import sv
3131
from sniffles.region import Region
3232
from sniffles.result import Result, ErrorResult, CallResult, GenotypeResult, CombineResult
33+
from sniffles.snfp import PopulationSNF
3334

3435

3536
@dataclass
@@ -93,7 +94,7 @@ def build_leadtab(self):
9394
externals = self.lead_provider.build_leadtab(self.regions if self.regions else [Region(self.contig, self.start, self.end)], self.bam)
9495
return externals, self.lead_provider.read_count
9596

96-
def call_candidates(self, keep_qc_fails, config):
97+
def call_candidates(self, keep_qc_fails, config) -> list[sv.SVCall]:
9798
candidates = []
9899
for svtype in sv.TYPES:
99100
for svcluster in cluster.resolve(svtype, self.lead_provider, config, self.tandem_repeats):
@@ -343,6 +344,9 @@ def execute(self, worker: 'SnifflesWorker' = None):
343344
if self.config.combine_close_handles:
344345
snf_in.close()
345346

347+
if self.config.combine_population:
348+
self.config.combine_population = PopulationSNF.open(self.config.combine_population)
349+
346350
result = self.result_class(self, [], 0)
347351

348352
# block_groups_keep_threshold=5000

0 commit comments

Comments
 (0)