Skip to content

Commit bbbe15c

Browse files
committed
v0.2.4
1 parent eddce72 commit bbbe15c

File tree

4 files changed

+23
-5
lines changed

4 files changed

+23
-5
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
11
# Changelog
2+
## [0.2.4] - 2024-12-08
3+
### Added
4+
- Add `-g` to control the minimum number of unique marker genes (default: 1) required for a species to report its genome copies. Increase `-g` (1 -> 2) lowers recall (detection limit: 0.125 -> 0.25) but improves precision.
5+
6+
27
## [0.2.3] - 2024-12-01
38
### Fixed
49
- Fix a bug introduced in v0.2.2 causing ties not resolved properly. Results should be identical to v0.2.1.

src/melon/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
__version__ = '0.2.3'
1+
__version__ = '0.2.4'
22

33
from .melon import GenomeProfiler

src/melon/cli.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,14 @@ def cli(argv=sys.argv):
104104
default=0.9,
105105
help='Min. secondary-to-primary score ratio to report secondary alignments (-p in minimap2). [0.9]')
106106

107+
additional.add_argument(
108+
'-g',
109+
metavar='INT',
110+
type=int,
111+
choices=range(1, 9),
112+
default=1,
113+
help='Min. number of unique marker genes required for a species to report its genome copies. [1]')
114+
107115
additional_em.add_argument(
108116
'-a',
109117
metavar='INT',
@@ -186,7 +194,7 @@ def run(opt):
186194
GenomeProfiler(file, opt.db, opt.output, opt.threads).run(
187195
db_kraken=opt.db_kraken, skip_profile=opt.skip_profile, skip_clean=opt.skip_clean,
188196
max_target_seqs=opt.m, evalue=opt.e, identity=opt.i, subject_cover=opt.s,
189-
secondary_num=opt.n, secondary_ratio=opt.p,
197+
secondary_num=opt.n, secondary_ratio=opt.p, min_markers=opt.g,
190198
max_iterations=opt.a, epsilon=opt.c)
191199

192200
if index == len(opt.FILE) - 1:

src/melon/melon.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@ def run_em(self, max_iterations=1000, epsilon=1e-10):
289289

290290
def run(self, db_kraken=None, skip_profile=False, skip_clean=False,
291291
max_target_seqs=25, evalue=1e-15, identity=0, subject_cover=75,
292-
secondary_num=2147483647, secondary_ratio=0.9,
292+
secondary_num=2147483647, secondary_ratio=0.9, min_markers=1,
293293
max_iterations=1000, epsilon=1e-10):
294294
'''
295295
Run the pipeline.
@@ -326,10 +326,15 @@ def run(self, db_kraken=None, skip_profile=False, skip_clean=False,
326326
) for kingdom, replacement in replacements.items()}
327327

328328
## count assigned taxonomic labels
329-
self.hits = [[*hit, self.assignments.get(hit[0], replacements.get(hit[1]))] for hit in self.hits]
330-
counts, total_counts, lineage2identity = defaultdict(lambda: 0), defaultdict(lambda: 0), defaultdict(list)
329+
lineage2rpg = defaultdict(set)
330+
for hit in self.hits:
331+
hit.append(self.assignments.get(hit[0], replacements.get(hit[1])))
332+
lineage2rpg[hit[-1]].add(hit[2])
331333

334+
counts, total_counts, lineage2identity = defaultdict(lambda: 0), defaultdict(lambda: 0), defaultdict(list)
332335
for hit in self.hits:
336+
if len(lineage2rpg.get(hit[-1])) < min_markers:
337+
hit[-1] = replacements.get(hit[1])
333338
total_counts[hit[1]] += 1
334339
counts[(hit[-1], hit[1])] += 1
335340
lineage2identity[hit[-1]].append(self.identities.get((hit[0], hit[-1]), (0, 0)))

0 commit comments

Comments
 (0)