|
8 | 8 | from . import __version__ |
9 | 9 |
|
10 | 10 | def main(): |
11 | | - ap = argparse.ArgumentParser(prog="gffquant", formatter_class=argparse.RawTextHelpFormatter) |
12 | | - ap.add_argument( |
13 | | - "annotation_db", type=str, |
14 | | - help=textwrap.dedent("""\ |
15 | | - Path to a text file containing the reference annotation. |
16 | | - The required type of file is determined by the --mode argument (gff3 or tsv).""" |
17 | | - ) |
18 | | - ) |
19 | | - ap.add_argument( |
20 | | - "bam_file", type=str, |
21 | | - help=textwrap.dedent("""\ |
22 | | - Path to a position-sorted bam file. Ambiguous alignments need to be flagged as secondary |
23 | | - alignments with the same read id as their primary alignment. |
24 | | - (e.g. output from BWA mem -a). All alignments of an ambiguous group need to have MAPQ=0.""" |
25 | | - ) |
26 | | - ) |
27 | | - ap.add_argument( |
28 | | - "--mode", "-m", type=str, default="genome", choices=("genome", "genes", "gene", "domain"), |
29 | | - help=textwrap.dedent("""\ |
30 | | - Run mode:" |
31 | | - - 'genome' counts reads aligned against contigs, which are annotated with a gff3 file. |
32 | | - The gff3 needs to have been indexed with gffindex prior to the run. |
33 | | - - 'gene' counts reads aligned against gene sequences, which are annotated with a tab-separated file. |
34 | | - - 'genes' is an alias for the 'gene' mode |
35 | | - - 'domain' counts reads against domain annotations within gene sequences, which are annotated with a bed4 file.""" |
36 | | - ) |
37 | | - ) |
38 | | - ap.add_argument( |
39 | | - "--out_prefix", "-o", type=str, default="gffquant", |
40 | | - help="Prefix for output files." |
41 | | - ) |
42 | | - ap.add_argument( |
43 | | - "--ambig_mode", type=str, choices=("unique_only", "all1", "primary_only", "1overN"), default="unique_only", |
44 | | - help=textwrap.dedent("""\ |
45 | | - Setting how ambiguous alignments should be treated. This setting mimics NGLess' behaviour. |
46 | | - - 'unique_only' ignores any alignment flagged as ambiguous (MAPQ=0). This is the default setting. |
47 | | - - 'all1' treats each alignment as unique (each ambiguous alignment contributes 1 count to features it aligns to.) |
48 | | - - 'primary_only' takes the unique alignments and the primary and alignment of each ambiguous read group. |
49 | | - - '1overN' each alignment contributes 1/(n=number of ambiguous alignments of the same read) counts to features it aligns to.""" |
50 | | - ) |
51 | | - ) |
52 | | - ap.add_argument( |
53 | | - "--strand_specific", action="store_true", |
54 | | - help="Perform strand-specific counting for RNAseq reads. This currently only works for single-end data. This flag is ignored for paired-end data." |
55 | | - ) |
56 | | - ap.add_argument("--version", "-v", action="version", version="%(prog)s " + __version__) |
| 11 | + ap = argparse.ArgumentParser(prog="gffquant", formatter_class=argparse.RawTextHelpFormatter) |
| 12 | + ap.add_argument( |
| 13 | + "annotation_db", type=str, |
| 14 | + help=textwrap.dedent("""\ |
| 15 | + Path to a text file containing the reference annotation. |
| 16 | + The required type of file is determined by the --mode argument (gff3 or tsv).""" |
| 17 | + ) |
| 18 | + ) |
| 19 | + ap.add_argument( |
| 20 | + "bam_file", type=str, |
| 21 | + help=textwrap.dedent("""\ |
| 22 | + Path to a position-sorted bam file. Ambiguous alignments need to be flagged as secondary |
| 23 | + alignments with the same read id as their primary alignment. |
| 24 | + (e.g. output from BWA mem -a). All alignments of an ambiguous group need to have MAPQ=0.""" |
| 25 | + ) |
| 26 | + ) |
| 27 | + ap.add_argument( |
| 28 | + "--mode", "-m", type=str, default="genome", choices=("genome", "genes", "gene", "domain"), |
| 29 | + help=textwrap.dedent("""\ |
| 30 | + Run mode:" |
| 31 | + - 'genome' counts reads aligned against contigs, which are annotated with a gff3 file. |
| 32 | + The gff3 needs to have been indexed with gffindex prior to the run. |
| 33 | + - 'gene' counts reads aligned against gene sequences, which are annotated with a tab-separated file. |
| 34 | + - 'genes' is an alias for the 'gene' mode |
| 35 | + - 'domain' counts reads against domain annotations within gene sequences, which are annotated with a bed4 file.""" |
| 36 | + ) |
| 37 | + ) |
| 38 | + ap.add_argument( |
| 39 | + "--out_prefix", "-o", type=str, default="gffquant", |
| 40 | + help="Prefix for output files." |
| 41 | + ) |
| 42 | + ap.add_argument( |
| 43 | + "--ambig_mode", type=str, choices=("unique_only", "all1", "primary_only", "1overN"), default="unique_only", |
| 44 | + help=textwrap.dedent("""\ |
| 45 | + Setting how ambiguous alignments should be treated. This setting mimics NGLess' behaviour. |
| 46 | + - 'unique_only' ignores any alignment flagged as ambiguous (MAPQ=0). This is the default setting. |
| 47 | + - 'all1' treats each alignment as unique (each ambiguous alignment contributes 1 count to features it aligns to.) |
| 48 | + - 'primary_only' takes the unique alignments and the primary and alignment of each ambiguous read group. |
| 49 | + - '1overN' each alignment contributes 1/(n=number of ambiguous alignments of the same read) counts to features it aligns to.""" |
| 50 | + ) |
| 51 | + ) |
| 52 | + ap.add_argument( |
| 53 | + "--strand_specific", action="store_true", |
| 54 | + help="Perform strand-specific counting for RNAseq reads. This currently only works for single-end data. This flag is ignored for paired-end data." |
| 55 | + ) |
| 56 | + ap.add_argument("--version", "-v", action="version", version="%(prog)s " + __version__) |
57 | 57 |
|
58 | 58 |
|
59 | | - args = ap.parse_args() |
| 59 | + args = ap.parse_args() |
60 | 60 |
|
61 | | - print("Version:", __version__) |
62 | | - print("Command:", os.path.basename(sys.argv[0]), *sys.argv[1:]) |
| 61 | + print("Version:", __version__) |
| 62 | + print("Command:", os.path.basename(sys.argv[0]), *sys.argv[1:]) |
63 | 63 |
|
64 | | - if not os.path.exists(args.bam_file): |
65 | | - raise ValueError("bam file does not exist", args.bam_file) |
66 | | - if not os.path.exists(args.annotation_db): |
67 | | - raise ValueError("annotation database does not exist", args.annotation_db) |
| 64 | + if not os.path.exists(args.bam_file): |
| 65 | + raise ValueError("bam file does not exist", args.bam_file) |
| 66 | + if not os.path.exists(args.annotation_db): |
| 67 | + raise ValueError("annotation database does not exist", args.annotation_db) |
68 | 68 |
|
69 | | - db_index = None |
70 | | - if args.mode == "genome": |
71 | | - db_index = args.annotation_db + ".index" |
72 | | - if not os.path.exists(db_index): |
73 | | - raise ValueError("gff index '{}' does not exist (please generate index with 'gffindex {}')".format(db_index, args.annotation_db)) |
| 69 | + db_index = None |
| 70 | + if args.mode == "genome": |
| 71 | + db_index = args.annotation_db + ".index" |
| 72 | + if not os.path.exists(db_index): |
| 73 | + raise ValueError("gff index '{}' does not exist (please generate index with 'gffindex {}')".format(db_index, args.annotation_db)) |
74 | 74 |
|
75 | | - if os.path.dirname(args.out_prefix): |
76 | | - pathlib.Path(os.path.dirname(args.out_prefix)).mkdir(exist_ok=True, parents=True) |
| 75 | + if os.path.dirname(args.out_prefix): |
| 76 | + pathlib.Path(os.path.dirname(args.out_prefix)).mkdir(exist_ok=True, parents=True) |
77 | 77 |
|
78 | | - fq = FeatureQuantifier( |
79 | | - db=args.annotation_db, |
80 | | - db_index=db_index, |
81 | | - out_prefix=args.out_prefix, |
82 | | - ambig_mode=args.ambig_mode, |
83 | | - do_overlap_detection=args.mode in ("genome", "domain"), |
84 | | - strand_specific=args.strand_specific |
85 | | - ) |
| 78 | + fq = FeatureQuantifier( |
| 79 | + db=args.annotation_db, |
| 80 | + db_index=db_index, |
| 81 | + out_prefix=args.out_prefix, |
| 82 | + ambig_mode=args.ambig_mode, |
| 83 | + do_overlap_detection=args.mode in ("genome", "domain"), |
| 84 | + strand_specific=args.strand_specific |
| 85 | + ) |
86 | 86 |
|
87 | | - fq.process_bamfile(args.bam_file) |
| 87 | + fq.process_bamfile(args.bam_file) |
88 | 88 |
|
89 | 89 |
|
90 | 90 | if __name__ == "__main__": |
91 | | - main() |
| 91 | + main() |
0 commit comments