Skip to content

Commit b94e596

Browse files
committed
- Handles tabs in headers and spaces in assembly paths.
- Switch from pathlib.Path to os.PathLike for more API flexibility. - Pre-load locus gene count array and locus indices to reduce iteration. - Better logging for warnings and errors. - Better logging for no alignments.
1 parent 8c4516f commit b94e596

File tree

8 files changed

+241
-261
lines changed

8 files changed

+241
-261
lines changed

kaptive/__main__.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,13 @@
1919
import re
2020
import argparse
2121
from io import TextIOWrapper
22+
from os import path
23+
24+
from Bio import __version__ as biopython_version
2225

2326
from kaptive.version import __version__
2427
from kaptive.log import bold, quit_with_error, log
25-
from kaptive.misc import check_python_version, check_biopython_version, get_logo, check_out, check_file, check_cpus
28+
from kaptive.utils import get_logo, check_out, check_cpus
2629

2730
# Constants -----------------------------------------------------------------------------------------------------------
2831
_URL = 'https://kaptive.readthedocs.io/en/latest/'
@@ -71,7 +74,7 @@ def assembly_subparser(subparsers):
7174
help='In silico serotyping of assemblies', usage="kaptive assembly <db> <fasta> [<fasta> ...] [options]")
7275
opts = assembly_parser.add_argument_group(bold('Inputs'), "")
7376
opts.add_argument('db', metavar='db path/keyword', help='Kaptive database path or keyword')
74-
opts.add_argument('input', nargs='+', metavar='fasta', type=check_file, help='Assemblies in fasta format')
77+
opts.add_argument('input', nargs='+', metavar='fasta', help='Assemblies in fasta(.gz|.xz|.bz2) format')
7578
opts = assembly_parser.add_argument_group(bold('Output options'), "\nNote, text outputs accept '-' for stdout")
7679
# Note these are different to the convert output options as TSV is the main output and fna is the main fasta output
7780
opts.add_argument('-o', '--out', metavar='', default=sys.stdout, type=argparse.FileType('at'),
@@ -105,7 +108,7 @@ def assembly_subparser(subparsers):
105108
" 3: Proportion of genes found\n"
106109
" 4: blen (aligned bases of genes found)\n"
107110
" 5: q_len (query length of genes found)")
108-
opts.add_argument('--n-best', type=int, default=2, metavar='', choices=range(1, 51),
111+
opts.add_argument('--n-best', type=int, default=2, metavar='',
109112
help='Number of best loci from the 1st round of scoring to be\n'
110113
'fully aligned to the assembly (default: %(default)s)')
111114

@@ -215,8 +218,12 @@ def other_opts(opts: argparse.ArgumentParser):
215218

216219
# Main -----------------------------------------------------------------------------------------------------------------
217220
def main():
218-
check_python_version(3, 9) # Check the python version
219-
check_biopython_version(1, 83) # Check the biopython version
221+
if sys.version_info.major < 3 or sys.version_info.minor < 9:
222+
quit_with_error(f'Python version 3.9 or greater required')
223+
224+
if int(biopython_version.split('.')[0]) < 1 or int(biopython_version.split('.')[1]) < 83:
225+
quit_with_error('Biopython version 1.83 or greater required')
226+
220227
args = parse_args(sys.argv[1:]) # Parse the arguments
221228

222229
# Assembly mode ----------------------------------------------------------------------------------------------------
@@ -238,9 +245,8 @@ def main():
238245

239246
# Extract mode -----------------------------------------------------------------------------------------------------
240247
elif args.subparser_name == 'extract':
241-
from kaptive.database import parse_database, get_database
242-
243-
for locus in parse_database(get_database(args.db), args.filter, args.fna, args.faa, args.verbose,
248+
from kaptive.database import parse_database
249+
for locus in parse_database(args.db, args.filter, args.fna, args.faa, args.verbose,
244250
locus_regex=args.locus_regex, type_regex=args.type_regex):
245251
locus.write(args.fna, args.ffn, args.faa)
246252

kaptive/alignment.py

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,7 @@
1313
from __future__ import annotations
1414
from typing import Iterable, Generator
1515
from itertools import groupby
16-
from kaptive.misc import range_overlap
17-
from kaptive.log import warning
16+
from kaptive.utils import range_overlap
1817

1918

2019
# Classes -------------------------------------------------------------------------------------------------------------
@@ -95,19 +94,6 @@ def partial(self) -> bool:
9594

9695

9796
# Functions ------------------------------------------------------------------------------------------------------------
98-
def iter_alns(data: str) -> Generator[Alignment, None, None]:
99-
"""Iterate over alignments in a chunk of data"""
100-
# It's probably better to decode the data here rather than in the Alignment class
101-
if not data:
102-
return None
103-
for line in data.splitlines():
104-
try:
105-
yield Alignment.from_paf_line(line)
106-
except AlignmentError:
107-
warning(f"Skipping invalid alignment line: {line}")
108-
continue
109-
110-
11197
def group_alns(alignments: Iterable[Alignment], key: str = 'q') -> Generator[tuple[str, Generator[Alignment]]]:
11298
"""Group alignments by a key"""
11399
yield from groupby(sorted(alignments, key=lambda x: getattr(x, key)), key=lambda x: getattr(x, key))

0 commit comments

Comments
 (0)