Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ This library is provided under the [GNU General Public License v3.0](https://cho
The Prodigal code was written by [Doug Hyatt](https://github.com/hyattpd) and is distributed under the
terms of the GPLv3 as well. See `vendor/Prodigal/LICENSE` for more information.

*This project is in no way not affiliated, sponsored, or otherwise endorsed
*This project is in no way affiliated, sponsored, or otherwise endorsed
by the [original Prodigal authors](https://github.com/hyattpd). It was developed
by [Martin Larralde](https://github.com/althonos/) during his PhD project
at the [European Molecular Biology Laboratory](https://www.embl.de/) in
Expand Down
4 changes: 3 additions & 1 deletion docs/guide/cli.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ exceptions:
however the stream cannot be compressed.
- The GenBank output of Pyrodigal is a full GenBank record including the
input sequence, unlike Prodigal which only outputs the features section.
- Start codons can be enforced now with ``-c start``, for backwards compatibility,
``-c`` alone is equivalent to ``-c none``.

Flags
-----
Expand All @@ -28,7 +30,7 @@ The Pyrodigal CLI has all the flags of the original CLI:
.. code-block:: text

-a trans_file Write protein translations to the selected file.
-c Closed ends. Do not allow genes to run off edges.
-c [mode] Closed ends. Do not allow genes to run off edges. Can be at start and/or stop site. Options are 'both', 'start', or 'none'. (default: none)
-d nuc_file Write nucleotide sequences of genes to the selected file.
-f output_type Select output format.
-g tr_table Specify a translation table to use.
Expand Down
1 change: 0 additions & 1 deletion docs/guide/publications.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ Pyrodigal is being used in the following research works:
- Sirén K., Millard, A., Petersen, B., Gilbert, M. T. P., Clokie, M. RJ., Sicheritz-Pontén, T. Rapid discovery of novel prophages using biological feature engineering and machine learning. bioRxiv 2020.08.09.243022; :doi:`10.1101/2020.08.09.243022`.
- Turkington, C. J. R., Nezam Abadi, N., Edwards, R. A., Grasis, J. A. hafeZ: Active prophage identification through read mapping. bioRxiv 2021.07.21.453177; :doi:`10.1101/2021.07.21.453177`.
- Reynolds, R., Hyun, S., Tully, B., Bien, J., Levine, N. M. Identification of microbial metabolic functional guilds from large genomic datasets. Front Microbiol. 2023 Jun 30;14:1197329. :doi:`10.3389/fmicb.2023.1197329`.
- Bouras, G., Grigson, S., Papudeshi, B., Mallawaarachchi, V., Roach, M. Dnaapler: A tool to reorient circular microbial genomes. Journal of Open Source Software, 9(93), 5968. :doi:`10.21105/joss.05968`.
- Mainguy J, Hoede C. Binette: a fast and accurate bin refinement tool to construct high quality Metagenome Assembled Genomes. bioRxiv 2024.04.20.585171; :doi:`10.1101/2024.04.20.585171`.
- Ridley, R. S. Jr, Conrad, R. E., Lindner, B. G., Woo, S., Konstantinidis, K. T. Potential routes of plastics biotransformation involving novel plastizymes revealed by global multi-omic analysis of plastic associated microbes. Sci Rep 14, 8798 (2024). :doi:`10.1038/s41598-024-59279-x`.
- Figueroa, J. L. III, Dhungel, E., Bellanger, M., Brouwer, C. R., White, R. A. III. MetaCerberus: distributed highly parallelized HMM-based processing for robust functional annotation across the tree of life. Bioinformatics, Volume 40, Issue 3, March 2024, btae119. :doi:`10.1093/bioinformatics/btae119`.
Expand Down
28 changes: 23 additions & 5 deletions src/pyrodigal/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,22 @@ def argument_parser(
metavar="trans_file",
help="Write protein translations to the selected file.",
)
#parser.add_argument(
# "-c",
# required=False,
# action="store_true",
# help="Closed ends. Do not allow genes to run off edges.",
# default=False,
#)
parser.add_argument(
"-c",
required=False,
action="store_true",
help="Closed ends. Do not allow genes to run off edges.",
default=False,
metavar="mode",
help="Closed ends. Do not allow genes to run off edges. Can be at start and/or stop site. Options are 'both', 'start', or 'none'.",
choices={"both", "start", "none"},
default="none",
nargs='?',
const="both",
)
parser.add_argument(
"-d",
Expand Down Expand Up @@ -256,10 +266,18 @@ def main(
# open input (with support for compressed files)
input_file = stdin if args.i is None else ctx.enter_context(zopen(args.i))

# select closed type
if args.c == "both" or args.c == "":
closed = [True, True]
elif args.c == "start":
closed = [True, False]
else:
closed = [False, False]

# initialize the ORF finder
gene_finder = gene_finder_factory(
meta=args.p == "meta",
closed=args.c,
closed=closed,
mask=args.m,
training_info=training_info,
min_gene=args.min_gene,
Expand All @@ -268,7 +286,7 @@ def main(
)

# pre-train if in training mode
if args.p == "single":
if args.p == "single" and training_info is None:
# use the same interleaving logic as Prodigal
sequences = list(parse(input_file))
training_info = gene_finder.train(
Expand Down
10 changes: 7 additions & 3 deletions src/pyrodigal/lib.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,8 @@ cdef class Nodes:
self,
Sequence sequence,
const int translation_table,
const bint closed,
const bint closed_start,
const bint closed_stop,
const int min_gene,
const int min_edge_gene
) except -1 nogil
Expand All @@ -261,7 +262,8 @@ cdef class Nodes:
self,
Sequence seq,
const _training* tinf,
const bint closed,
const bint closed_start,
const bint closed_stop,
const bint is_meta
) except -1 nogil
cdef int _sort(self) except 1 nogil
Expand Down Expand Up @@ -391,7 +393,9 @@ cdef _metagenomic_bin _METAGENOMIC_BINS[NUM_META]
cdef class GeneFinder:
cdef readonly size_t _num_seq
cdef readonly str backend
cdef readonly bint closed
cdef readonly object closed
cdef readonly bint closed_start
cdef readonly bint closed_stop
cdef readonly object lock
cdef readonly bint mask
cdef readonly int min_mask
Expand Down
76 changes: 62 additions & 14 deletions src/pyrodigal/lib.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@ import datetime
import threading
import typing
from typing import (
Dict,
FrozenSet,
Iterable,
Iterator,
List,
Dict,
Optional,
TextIO,
Tuple,
Expand Down Expand Up @@ -42,12 +42,50 @@ IDEAL_SINGLE_GENOME: int
PRODIGAL_VERSION: str

_TRANSLATION_TABLE = Literal[
1, 2, 3, 4, 5, 6, 9, 10, 11, 12, 13, 14, 15, 16, 21, 22, 23, 24, 25,
26, 29, 30, 32, 33
1,
2,
3,
4,
5,
6,
9,
10,
11,
12,
13,
14,
15,
16,
21,
22,
23,
24,
25,
26,
29,
30,
32,
33,
]
_DIVISION = Literal[
"PRI", "ROD", "MAM", "VRT", "INV", "PLN", "BCT", "VRL", "PHG", "SYN",
"UNA", "EST", "PAT", "STS", "GSS", "HTG", "HTC", "ENV"
"PRI",
"ROD",
"MAM",
"VRT",
"INV",
"PLN",
"BCT",
"VRL",
"PHG",
"SYN",
"UNA",
"EST",
"PAT",
"STS",
"GSS",
"HTG",
"HTC",
"ENV",
]

# --- Sequence mask ----------------------------------------------------------
Expand Down Expand Up @@ -158,7 +196,8 @@ class Nodes(typing.Sequence[Node]):
self,
sequence: Sequence,
*,
closed: bool = False,
closed_start: bool = False,
closed_stop: bool = False,
min_gene: int = 90,
min_edge_gene: int = 60,
translation_table: _TRANSLATION_TABLE = 11,
Expand All @@ -169,7 +208,8 @@ class Nodes(typing.Sequence[Node]):
sequence: Sequence,
training_info: TrainingInfo,
*,
closed: bool = False,
closed_start: bool = False,
closed_stop: bool = False,
is_meta: bool = False,
) -> None: ...
def sort(self) -> None: ...
Expand Down Expand Up @@ -347,11 +387,15 @@ class TrainingInfo:
@property
def upstream_compositions(self) -> memoryview: ...
@upstream_compositions.setter
def upstream_compositions(self, upstream_compositions: Iterable[Iterable[float]]) -> None: ...
def upstream_compositions(
self, upstream_compositions: Iterable[Iterable[float]]
) -> None: ...
@property
def motif_weights(self) -> memoryview: ...
@motif_weights.setter
def motif_weights(self, motif_weights: Iterable[Iterable[Iterable[float]]]) -> None: ...
def motif_weights(
self, motif_weights: Iterable[Iterable[Iterable[float]]]
) -> None: ...
@property
def missing_motif_weight(self) -> float: ...
@missing_motif_weight.setter
Expand All @@ -363,7 +407,6 @@ class TrainingInfo:
def to_dict(self) -> Dict[str, object]: ...
def dump(self, fp: typing.BinaryIO) -> None: ...


# --- Metagenomic Bins -------------------------------------------------------

class MetagenomicBin:
Expand All @@ -384,8 +427,9 @@ class MetagenomicBins(typing.Sequence[MetagenomicBin]):
@typing.overload
def __getitem__(self, index: slice) -> MetagenomicBins: ...
@typing.overload
def __getitem__(self, index: Union[int, slice]) -> Union[MetagenomicBin, MetagenomicBins]: ...

def __getitem__(
self, index: Union[int, slice]
) -> Union[MetagenomicBin, MetagenomicBins]: ...

# --- Pyrodigal --------------------------------------------------------------

Expand All @@ -398,7 +442,7 @@ class GeneFinder:
*,
meta: bool = False,
metagenomic_bins: Optional[MetagenomicBins] = None,
closed: bool = False,
closed: list = [False, False],
mask: bool = False,
min_mask: int = 50,
min_gene: int = 90,
Expand All @@ -413,7 +457,11 @@ class GeneFinder:
@property
def meta(self) -> bool: ...
@property
def closed(self) -> bool: ...
def closed(self) -> list: ...
@property
def closed_start(self) -> bool: ...
@property
def closed_stop(self) -> bool: ...
@property
def mask(self) -> bool: ...
@property
Expand Down
Loading