Skip to content

Commit 193bd46

Browse files
committed
refactor, fix bugs
1 parent f3a6915 commit 193bd46

File tree

14 files changed

+492
-365
lines changed

14 files changed

+492
-365
lines changed

apollo/annotations/__init__.py

Lines changed: 103 additions & 106 deletions
Large diffs are not rendered by default.

apollo/util.py

Lines changed: 36 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -89,36 +89,36 @@ def AssertAdmin(user):
8989

9090

9191
def _tnType(feature):
92-
if feature.type in ('gene', 'mRNA', 'exon', 'CDS', 'terminator', 'tRNA'):
92+
if feature.type in ('gene', 'mRNA', 'exon', 'CDS', 'terminator', 'tRNA', 'snRNA', 'snoRNA', 'ncRNA', 'rRNA', 'miRNA', 'repeat_region', 'transposable_element', 'pseudogene', 'transcript'):
9393
return feature.type
9494
else:
9595
return 'exon'
9696

9797

9898
def _yieldGeneData(gene, disable_cds_recalculation=False, use_name=False):
9999
current = _yieldSubFeatureData(gene, disable_cds_recalculation=disable_cds_recalculation, use_name=use_name)
100-
sub_features = gene.sub_features
101100

102-
# TODO: is this handling multiple isoforms properly?
103-
if sub_features:
104-
# current['children'] = []
105-
# child_data = []
106-
for sf in sub_features:
101+
if gene.sub_features:
102+
current['children'] = []
103+
for sf in gene.sub_features:
107104
if _tnType(sf) in coding_transcript_types:
108-
# child_data.append(_yieldCodingTranscriptData(sf, disable_cds_recalculation=disable_cds_recalculation, use_name=use_name))
109-
return _yieldCodingTranscriptData(sf, disable_cds_recalculation=disable_cds_recalculation, use_name=use_name)
110-
if _tnType(sf) in noncoding_transcript_types:
111-
# child_data.append(_yieldCodingTranscriptData(sf, disable_cds_recalculation=disable_cds_recalculation, use_name=use_name))
112-
return _yieldCodingTranscriptData(sf, disable_cds_recalculation=disable_cds_recalculation, use_name=use_name)
113-
# return child_data
105+
current['children'].append(_yieldCodingTranscriptData(sf, disable_cds_recalculation=disable_cds_recalculation, use_name=use_name))
106+
elif _tnType(sf) in noncoding_transcript_types:
107+
current['children'].append(_yieldNonCodingTranscriptData(sf, disable_cds_recalculation=disable_cds_recalculation, use_name=use_name))
114108

115109
# # TODO: handle comments
116110
# # TODO: handle dbxrefs
117111
# # TODO: handle attributes
118112
# # TODO: handle aliases
119113
# # TODO: handle description
120114
# # TODO: handle GO, Gene Product, Provenance
121-
return current
115+
116+
if 'children' in current and gene.type == 'gene':
117+
# Only sending mRNA level as apollo is more comfortable with orphan mRNAs
118+
return current['children']
119+
else:
120+
# No children, return a generic gene feature
121+
return current
122122

123123

124124
def _yieldSubFeatureData(f, disable_cds_recalculation=False, use_name=False):
@@ -135,14 +135,17 @@ def _yieldSubFeatureData(f, disable_cds_recalculation=False, use_name=False):
135135
}
136136
},
137137
}
138-
if disable_cds_recalculation is True:
138+
if disable_cds_recalculation:
139139
current['use_cds'] = 'true'
140140

141141
if f.type in (coding_transcript_types + noncoding_transcript_types + gene_types + pseudogenes_types
142142
+ single_level_feature_types):
143143
current['name'] = f.qualifiers.get('Name', [f.id])[0]
144144

145-
if use_name is True:
145+
if 'ID' in f.qualifiers:
146+
current['gff_id'] = f.qualifiers['ID'][0]
147+
148+
if use_name:
146149
current['use_name'] = True
147150

148151
# if OGS:
@@ -169,6 +172,14 @@ def _yieldCodingTranscriptData(f, disable_cds_recalculation=False, use_name=Fals
169172
}
170173
},
171174
}
175+
176+
if f.type in (coding_transcript_types + noncoding_transcript_types + gene_types + pseudogenes_types
177+
+ single_level_feature_types):
178+
current['name'] = f.qualifiers.get('Name', [f.id])[0]
179+
180+
if 'ID' in f.qualifiers:
181+
current['gff_id'] = f.qualifiers['ID'][0]
182+
172183
if len(f.sub_features) > 0:
173184
current['children'] = []
174185
for sf in f.sub_features:
@@ -178,16 +189,8 @@ def _yieldCodingTranscriptData(f, disable_cds_recalculation=False, use_name=Fals
178189
return current
179190

180191

181-
def print_file(path):
182-
with open(path) as file:
183-
print(file.read())
184-
file.close()
185-
186-
187-
# TODO: we may need specify something different here, but for now this works
188-
189-
# def _yieldNonCodingTranscriptData(features):
190-
# pass
192+
def _yieldNonCodingTranscriptData(features, disable_cds_recalculation=False, use_name=False):
193+
return _yieldCodingTranscriptData(features, disable_cds_recalculation, use_name)
191194

192195

193196
# def _yieldSingleLevelFeatureData(features):
@@ -198,13 +201,12 @@ def yieldApolloData(feature, use_name=False, disable_cds_recalculation=False):
198201
feature_type = _tnType(feature)
199202
if feature_type in gene_types:
200203
return _yieldGeneData(feature)
201-
if feature_type in pseudogenes_types:
204+
elif feature_type in pseudogenes_types:
202205
return _yieldGeneData(feature)
203206
elif feature_type in coding_transcript_types:
204207
return _yieldCodingTranscriptData(feature)
205208
elif feature_type in noncoding_transcript_types:
206-
return _yieldCodingTranscriptData(feature)
207-
# return _yieldNonCodingTranscriptData(current_feature)
209+
return _yieldNonCodingTranscriptData(feature)
208210
elif feature_type in single_level_feature_types:
209211
# return _yieldSingleLevelFeatureData(current_feature)
210212
return _yieldSubFeatureData(feature)
@@ -235,14 +237,17 @@ def _yieldFeatData(features, use_name=False, disable_cds_recalculation=False):
235237
}
236238
},
237239
}
238-
if disable_cds_recalculation is True:
240+
if disable_cds_recalculation:
239241
current['use_cds'] = 'true'
240242

241243
if f.type in (coding_transcript_types + noncoding_transcript_types + gene_types + pseudogenes_types
242244
+ single_level_feature_types):
243245
current['name'] = f.qualifiers.get('Name', [f.id])[0]
244246

245-
if use_name is True:
247+
if 'ID' in f.qualifiers:
248+
current['gff_id'] = f.qualifiers['ID'][0]
249+
250+
if use_name:
246251
current['use_name'] = True
247252

248253
# if OGS:

arrow/commands/annotations/load_gff3.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,6 @@
3333
help="Disable CDS recalculation and instead use the one provided",
3434
is_flag=True
3535
)
36-
@click.option(
37-
"--verbose",
38-
help="Verbose mode",
39-
is_flag=True
40-
)
4136
@click.option(
4237
"--timing",
4338
help="Output loading performance metrics",
@@ -46,11 +41,11 @@
4641
@pass_context
4742
@custom_exception
4843
@str_output
49-
def cli(ctx, organism, gff3, source="", batch_size=1, test=False, use_name=False, disable_cds_recalculation=False, verbose=False, timing=False):
44+
def cli(ctx, organism, gff3, source="", batch_size=1, test=False, use_name=False, disable_cds_recalculation=False, timing=False):
5045
"""Load a full GFF3 into annotation track
5146
5247
Output:
5348
5449
Loading report
5550
"""
56-
return ctx.gi.annotations.load_gff3(organism, gff3, source=source, batch_size=batch_size, test=test, use_name=use_name, disable_cds_recalculation=disable_cds_recalculation, verbose=verbose, timing=timing)
51+
return ctx.gi.annotations.load_gff3(organism, gff3, source=source, batch_size=batch_size, test=test, use_name=use_name, disable_cds_recalculation=disable_cds_recalculation, timing=timing)

docs/commands/annotations.rst

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -531,7 +531,6 @@ Load a full GFF3 into annotation track
531531
--use_name Use the given name instead of generating one.
532532
--disable_cds_recalculation Disable CDS recalculation and instead use the one
533533
provided
534-
--verbose Verbose mode
535534
--timing Output loading performance metrics
536535
-h, --help Show this message and exit.
537536

test-data/exported_cdna.fa

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
>c7ce0a38-beee-4aa3-8f34-5f35f549f287 (mRNA) 690 residues [Merlin:2-691 + strand] [cdna] name=Unknown
1+
>bbc20cfa-4286-4160-bfaf-545e3bf4ccee (mRNA) 690 residues [Merlin:2-691 + strand] [cdna] name=Merlin_1_mRNA-00001
22
CGTTTAGACAAAGGTACATTATTGTATCGTGGCCAAAAATTAGACCTTCCTACATTCGAG
33
CATAACGCAGAGAATAAGTTGTTCTATTTCAGAAACTACGTTTCAACTTCATTAAAGCCT
44
CTGATCTTTGGTGAATTTGGTCGTATGTTTATGGCACTAGATGACGATACTACAATTTAT
@@ -11,13 +11,13 @@ GGTTCAATCGTTCCGCCTTCTGAGCAAATTGATGAATCTGTTGAGATTTATGACGGTGAT
1111
CTGTTCATGGAAACAGGTGAAGTAGTAAAACTGTCCGGATTCATGCAGTTCGTCAACGAA
1212
TCTGCATACGATGAAGAGCAAAACCAGATGGCTGCTGAGATTCTGTCTGGATTCTTGGAC
1313
ATTGATGACATGCCACGTAAGTTCCGCTAG
14-
>74f8e03d-f003-490c-9eeb-15b3b68763c0 (mRNA) 288 residues [Merlin:752-1039 + strand] [cdna] name=Unknown
14+
>90903c55-1f5b-4699-9967-c21b46dbd005 (mRNA) 288 residues [Merlin:752-1039 + strand] [cdna] name=mrna-name-00001
1515
ATGAAATCAATTTTTCGTATCAACGGTGTAGAAATTGTAGTTGAAGATGTAGTTCCTATG
1616
TCTTATGAATTCAATGAAGTTGTTTTCAAAGAGCTTAAGAAAATTTTAGGCGATAAGAAG
1717
CTTCAAAGTACTCCAATTGGACGTTTTGGAATGAAAGAAAACGTTGATACTTATATTGAA
1818
AGTGTAGTGACAGGGCAGTTAGAAGGTGAATTTTCTGTAGCAGTTCAAACTGTAGAAAAT
1919
GATGAAGTTATTTTAACTTTACCAGCTTTCGTAATTTTCCGCAAATAA
20-
>5280a04b-53f0-4ae6-ae5c-2c358e5c5a93 (mRNA) 945 residues [Merlin:1067-2011 - strand] [cdna] name=Unknown
20+
>d754c330-e43b-4aae-95ee-008c3d0c5ac0 (mRNA) 945 residues [Merlin:1067-2011 - strand] [cdna] name=Merlin_3_mRNA-00001
2121
ATGCTAACTTTAGATGAATTTAAAAACCAAGCGGGTAATATAGACTTTCAGCGTACTAAT
2222
ATGTTTAGTTGTGTATTTGCAACTACTCCGTCAGCAAAGTCTCAACAATTACTCGATCAA
2323
TTTGGCGGTATGCTCTTTAATAACCTTCCGTTGAATAATGACTGGCTTGGATTAACACAA
@@ -34,7 +34,7 @@ GATGTTACATTTGCTTACAGAGTAATGCAAACGGGTGCTGTTGGACGTCAAGCTGCTCTT
3434
GATTGGATTGAAGATAGAGCTGTTAATTCTATAACTGGAATTAATAGTGAAATGTCTCTT
3535
AATGGAAGTTTAAGTAGATTATCTAGACTTGGAGGAGCTGCTGGAGGGTTGTCTCACGTC
3636
ATTAATTCGACCCGAAACTCTACTTCGAAAATACTTGGATTGTAA
37-
>8d6e6288-a8d4-4b81-b7fe-766119917628 (mRNA) 1056 residues [Merlin:2011-3066 - strand] [cdna] name=Unknown
37+
>296145b5-8924-4146-84aa-41c8a04d7bdc (mRNA) 1056 residues [Merlin:2011-3066 - strand] [cdna] name=Merlin_4_mRNA-00001
3838
ATGAGCATTAAAGTCAGAGAATTAGATGATAAGACTGATGCTTTAATTAGCGGAGTTAAA
3939
ACCTCCGCTGGTCAAAGTTCACAATCAGCAAAAATAAAATCCACTATAACTGCGCAATAT
4040
CCGTCTGAACGTTCAGCTGGTAATGACACATCTGGTTCTTTACGAGTTCATGATCTTTAT
@@ -53,7 +53,7 @@ TTCGTCAGAAACTTTGGAACCACAAGTAAATTCGATGGACGTGCTGAAGTATTCGGTCCA
5353
TGTCAAATTCAGAGTATCCGTTTTGATAAAACTCCAAATGGAAACTTTAACGGTTTAGCT
5454
ATAGCTCCAAACCTGCCAAGTACATTCACATTAGAAATTACTATGCGTGAAATCTTGACA
5555
TTGAACCGAGCTTCAGTATATGCGGAAGGATTCTGA
56-
>154a6d4e-dc94-4de2-9403-63aa47a01d82 (mRNA) 1662 residues [Merlin:3066-4796 - strand] [cdna] name=multiexongene
56+
>ac134b2b-dc09-4089-8032-83637c62fd82 (mRNA) 1662 residues [Merlin:3066-4796 - strand] [cdna] name=Merlin_5_mRNA-00001
5757
ATGAAAAGCGAAAACATGTCCACAATGAGACGTCGTAAAGTTATCGCTGATTCAAAGGGT
5858
GAAAGAGATGCAGCCTCGACTGCATCTGATCAAGTAGACTCTTTAGAATTAATCGGCCTT
5959
AAACTTGATGATGTACAAAGCGCTAATGAACTAGTTGCTGAAGTAATTGAAGAAAAGGGC
@@ -82,7 +82,7 @@ ACTCCGAAGCCTGCGGCTCCAGCTACTTCGGAAGATAATCAACGAGTTCAAAATATTCAA
8282
AAAGCTGAAAATGCTAAAGAGCAATCTAAAAAATCAACCGGTGATATGAATGTTGCTAAC
8383
ACTCAGGTTAATAACGTAAATAATAGTAAGACTATTCACCAGGTTCAAACAGTCACGGCT
8484
ACTCCAGCTCCTGGAGTATTCGGGGCAACAGGAGTTAATTAA
85-
>ce047673-3c00-425c-862b-20fd004eca42 (mRNA) 1056 residues [Merlin:5011-6066 - strand] [cdna] name=cds-not-under-exon
85+
>065d1481-1403-4a2b-8566-8ee84216e885 (mRNA) 1056 residues [Merlin:5011-6066 - strand] [cdna] name=Merlin_42_mRNA-00001
8686
CTTTAATGACGCTGGTGAATCAATAAAAGAGATGATCGGTGCAATTTATGAATCAAAACC
8787
TCTTATAGCACCTGCGATGAACACAATCAACACATATGTTCCTCGAGTTCCATGGACGAG
8888
TAACATAACTGAATACAAGAAATATGTTCGAGATGTTGCATTAGCAGTAGATAATGACCA

test-data/exported_cds.fa

Lines changed: 40 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
>c7ce0a38-beee-4aa3-8f34-5f35f549f287 (mRNA) 690 residues [Merlin:2-691 + strand] [cds] name=Unknown
1+
>bbc20cfa-4286-4160-bfaf-545e3bf4ccee (mRNA) 690 residues [Merlin:2-691 + strand] [cds] name=Merlin_1_mRNA-00001
22
CGTTTAGACAAAGGTACATTATTGTATCGTGGCCAAAAATTAGACCTTCCTACATTCGAG
33
CATAACGCAGAGAATAAGTTGTTCTATTTCAGAAACTACGTTTCAACTTCATTAAAGCCT
44
CTGATCTTTGGTGAATTTGGTCGTATGTTTATGGCACTAGATGACGATACTACAATTTAT
@@ -11,12 +11,30 @@ GGTTCAATCGTTCCGCCTTCTGAGCAAATTGATGAATCTGTTGAGATTTATGACGGTGAT
1111
CTGTTCATGGAAACAGGTGAAGTAGTAAAACTGTCCGGATTCATGCAGTTCGTCAACGAA
1212
TCTGCATACGATGAAGAGCAAAACCAGATGGCTGCTGAGATTCTGTCTGGATTCTTGGAC
1313
ATTGATGACATGCCACGTAAGTTCCGCTAG
14-
>74f8e03d-f003-490c-9eeb-15b3b68763c0 (mRNA) 9 residues [Merlin:752-1039 + strand] [cds] name=Unknown
15-
AAATTTTAG
16-
>5280a04b-53f0-4ae6-ae5c-2c358e5c5a93 (mRNA) 108 residues [Merlin:1067-2011 - strand] [cds] name=Unknown
17-
CACCTCAATTATCACTGCCGGTACTCAACAGCTGGTAAGAAAGTCTGGTGTATCGAAATA
18-
TCTTATTGGAGCAATGAGCAATCGTGTTGTTCAGTCTTTATTAGGTGA
19-
>8d6e6288-a8d4-4b81-b7fe-766119917628 (mRNA) 1056 residues [Merlin:2011-3066 - strand] [cds] name=Unknown
14+
>90903c55-1f5b-4699-9967-c21b46dbd005 (mRNA) 288 residues [Merlin:752-1039 + strand] [cds] name=mrna-name-00001
15+
ATGAAATCAATTTTTCGTATCAACGGTGTAGAAATTGTAGTTGAAGATGTAGTTCCTATG
16+
TCTTATGAATTCAATGAAGTTGTTTTCAAAGAGCTTAAGAAAATTTTAGGCGATAAGAAG
17+
CTTCAAAGTACTCCAATTGGACGTTTTGGAATGAAAGAAAACGTTGATACTTATATTGAA
18+
AGTGTAGTGACAGGGCAGTTAGAAGGTGAATTTTCTGTAGCAGTTCAAACTGTAGAAAAT
19+
GATGAAGTTATTTTAACTTTACCAGCTTTCGTAATTTTCCGCAAATAA
20+
>d754c330-e43b-4aae-95ee-008c3d0c5ac0 (mRNA) 945 residues [Merlin:1067-2011 - strand] [cds] name=Merlin_3_mRNA-00001
21+
ATGCTAACTTTAGATGAATTTAAAAACCAAGCGGGTAATATAGACTTTCAGCGTACTAAT
22+
ATGTTTAGTTGTGTATTTGCAACTACTCCGTCAGCAAAGTCTCAACAATTACTCGATCAA
23+
TTTGGCGGTATGCTCTTTAATAACCTTCCGTTGAATAATGACTGGCTTGGATTAACACAA
24+
GGTGAGTTCACATCAGGACTCACCTCAATTATCACTGCCGGTACTCAACAGCTGGTAAGA
25+
AAGTCTGGTGTATCGAAATATCTTATTGGAGCAATGAGCAATCGTGTTGTTCAGTCTTTA
26+
TTAGGTGAATTTGAAGTCGGAACTTATTTGTTAGACTTCTTTAACATGGCTTATCCGCAA
27+
TCTGGATTGATGATTTATTCGGTCAAAATTCCAGAGAACAGATTGTCTCATGAAATGGAT
28+
TTCAACCATAACTCACCGAATATTAGAATAACTGGACGTGAACTCGATCCGTTAACTATA
29+
TCATTCAGAATGGATCCCGAAGCAAGTAACTATCGTGCAATGCAAGATTGGGTGAACTCC
30+
GTTCAAGACCCGGTTACTGGATTGCGAGCATTACCAACTGACGTCGAAGCTGACATTCAG
31+
GTTAACCTTCATGCTCGAAATGGATTACCTCATACTGTGATAATGTTCACAGGTTGTGTT
32+
CCTGTTGCGTGTGGAGCTCCTGAGCTTACATATGAAGGAGATAACCAAATTGCGGTTTTC
33+
GATGTTACATTTGCTTACAGAGTAATGCAAACGGGTGCTGTTGGACGTCAAGCTGCTCTT
34+
GATTGGATTGAAGATAGAGCTGTTAATTCTATAACTGGAATTAATAGTGAAATGTCTCTT
35+
AATGGAAGTTTAAGTAGATTATCTAGACTTGGAGGAGCTGCTGGAGGGTTGTCTCACGTC
36+
ATTAATTCGACCCGAAACTCTACTTCGAAAATACTTGGATTGTAA
37+
>296145b5-8924-4146-84aa-41c8a04d7bdc (mRNA) 1056 residues [Merlin:2011-3066 - strand] [cds] name=Merlin_4_mRNA-00001
2038
ATGAGCATTAAAGTCAGAGAATTAGATGATAAGACTGATGCTTTAATTAGCGGAGTTAAA
2139
ACCTCCGCTGGTCAAAGTTCACAATCAGCAAAAATAAAATCCACTATAACTGCGCAATAT
2240
CCGTCTGAACGTTCAGCTGGTAATGACACATCTGGTTCTTTACGAGTTCATGATCTTTAT
@@ -35,7 +53,7 @@ TTCGTCAGAAACTTTGGAACCACAAGTAAATTCGATGGACGTGCTGAAGTATTCGGTCCA
3553
TGTCAAATTCAGAGTATCCGTTTTGATAAAACTCCAAATGGAAACTTTAACGGTTTAGCT
3654
ATAGCTCCAAACCTGCCAAGTACATTCACATTAGAAATTACTATGCGTGAAATCTTGACA
3755
TTGAACCGAGCTTCAGTATATGCGGAAGGATTCTGA
38-
>154a6d4e-dc94-4de2-9403-63aa47a01d82 (mRNA) 1662 residues [Merlin:3066-4796 - strand] [cds] name=multiexongene
56+
>ac134b2b-dc09-4089-8032-83637c62fd82 (mRNA) 1662 residues [Merlin:3066-4796 - strand] [cds] name=Merlin_5_mRNA-00001
3957
ATGAAAAGCGAAAACATGTCCACAATGAGACGTCGTAAAGTTATCGCTGATTCAAAGGGT
4058
GAAAGAGATGCAGCCTCGACTGCATCTGATCAAGTAGACTCTTTAGAATTAATCGGCCTT
4159
AAACTTGATGATGTACAAAGCGCTAATGAACTAGTTGCTGAAGTAATTGAAGAAAAGGGC
@@ -64,5 +82,17 @@ ACTCCGAAGCCTGCGGCTCCAGCTACTTCGGAAGATAATCAACGAGTTCAAAATATTCAA
6482
AAAGCTGAAAATGCTAAAGAGCAATCTAAAAAATCAACCGGTGATATGAATGTTGCTAAC
6583
ACTCAGGTTAATAACGTAAATAATAGTAAGACTATTCACCAGGTTCAAACAGTCACGGCT
6684
ACTCCAGCTCCTGGAGTATTCGGGGCAACAGGAGTTAATTAA
67-
>ce047673-3c00-425c-862b-20fd004eca42 (mRNA) 6 residues [Merlin:5011-6066 - strand] [cds] name=cds-not-under-exon
68-
CTTTAA
85+
>065d1481-1403-4a2b-8566-8ee84216e885 (mRNA) 777 residues [Merlin:5011-6066 - strand] [cds] name=Merlin_42_mRNA-00001
86+
TTTAATGACGCTGGTGAATCAATAAAAGAGATGATCGGTGCAATTTATGAATCAAAACCT
87+
CTTATAGCACCTGCGATGAACACAATCAACACATATGTTCCTCGAGTTCCATGGACGAGT
88+
AACATAACTGAATACAAGAAATATGTTCGAGATGTTGCATTAGCAGTAGATAATGACCAA
89+
TTCGTTTTTGTATGGGAAGATATCTATGGCTTGAACATGATGGATTATGACGCAATGATT
90+
AACCAAGAATCAATCAAGGTTATTGTCGGTGAACCACGCACAATAGGTCAATTTGTCGGT
91+
GAGCTGGAATATAATCTCGCTTATGACTTCCAGTGGTTAACGAAGGCTAATGCCCATACA
92+
CGCGATCCTATTTTTAACGCTACAATCTATTCACACTCATTCTTGGATAATAACCTTCCT
93+
AGAATAGTAACAGGTGATGGACAGAATAGCATCTTCGTTTCTCGCTCGGGTGCATATTCT
94+
GAAATGACTTATCGAAATGGATATGAAGAAGCTATCAGGCTTCAGACTATGGCACAATAC
95+
GACGGTTATGCAACTTGTAAAATGGTTGGAGACTTTGAAATGACTCCTGGAGATAAGATT
96+
AATTTCTTTGATCCAAAGAAACAATTCAAAGCTGATTTTTACATTGATGAAGTAATTCAT
97+
GAAGTAAGTAATAACCAAAGCATAACTACACTTTATATGTTTACTAACTCTCGTAAGTTG
98+
GAAACAGTAGAACCAATAAAGGTTAAAAATGAACTTAAATCTGATACTACCACTTAA

test-data/exported_peptide.fa

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,26 @@
1-
>c7ce0a38-beee-4aa3-8f34-5f35f549f287 (mRNA) 229 residues [Merlin:2-691 + strand] [peptide] name=Unknown
1+
>bbc20cfa-4286-4160-bfaf-545e3bf4ccee (mRNA) 229 residues [Merlin:2-691 + strand] [peptide] name=Merlin_1_mRNA-00001
22
RLDKGTLLYRGQKLDLPTFEHNAENKLFYFRNYVSTSLKPLIFGEFGRMFMALDDDTTIY
33
TAETPDDYNRFANPEDIIDIGATQKDSFDDNNNDGTSINIGKQVNLGFVISGAENVRVIV
44
PGSLTEYPEEAEVILPRGTLLKINKITTQVDKRSNKFMVEGSIVPPSEQIDESVEIYDGD
55
LFMETGEVVKLSGFMQFVNESAYDEEQNQMAAEILSGFLDIDDMPRKFR
6-
>74f8e03d-f003-490c-9eeb-15b3b68763c0 (mRNA) 2 residues [Merlin:752-1039 + strand] [peptide] name=Unknown
7-
KF
8-
>5280a04b-53f0-4ae6-ae5c-2c358e5c5a93 (mRNA) 35 residues [Merlin:1067-2011 - strand] [peptide] name=Unknown
9-
HLNYHCRYSTAGKKVWCIEISYWSNEQSCCSVFIR
10-
>8d6e6288-a8d4-4b81-b7fe-766119917628 (mRNA) 351 residues [Merlin:2011-3066 - strand] [peptide] name=Unknown
6+
>90903c55-1f5b-4699-9967-c21b46dbd005 (mRNA) 95 residues [Merlin:752-1039 + strand] [peptide] name=mrna-name-00001
7+
MKSIFRINGVEIVVEDVVPMSYEFNEVVFKELKKILGDKKLQSTPIGRFGMKENVDTYIE
8+
SVVTGQLEGEFSVAVQTVENDEVILTLPAFVIFRK
9+
>d754c330-e43b-4aae-95ee-008c3d0c5ac0 (mRNA) 314 residues [Merlin:1067-2011 - strand] [peptide] name=Merlin_3_mRNA-00001
10+
MLTLDEFKNQAGNIDFQRTNMFSCVFATTPSAKSQQLLDQFGGMLFNNLPLNNDWLGLTQ
11+
GEFTSGLTSIITAGTQQLVRKSGVSKYLIGAMSNRVVQSLLGEFEVGTYLLDFFNMAYPQ
12+
SGLMIYSVKIPENRLSHEMDFNHNSPNIRITGRELDPLTISFRMDPEASNYRAMQDWVNS
13+
VQDPVTGLRALPTDVEADIQVNLHARNGLPHTVIMFTGCVPVACGAPELTYEGDNQIAVF
14+
DVTFAYRVMQTGAVGRQAALDWIEDRAVNSITGINSEMSLNGSLSRLSRLGGAAGGLSHV
15+
INSTRNSTSKILGL
16+
>296145b5-8924-4146-84aa-41c8a04d7bdc (mRNA) 351 residues [Merlin:2011-3066 - strand] [peptide] name=Merlin_4_mRNA-00001
1117
MSIKVRELDDKTDALISGVKTSAGQSSQSAKIKSTITAQYPSERSAGNDTSGSLRVHDLY
1218
KNGLLFTAYDMNSRTTGDMRSMRLGEMKRTANSVVKSITGTNTNKVDKIPVVNILLPRSK
1319
SDVESVSHKFNDVGDSLISRGGGTATGVLSNVASTAVFGGLESLTQGLMADHNEQIYNTA
1420
RSMYGGADNRTKVFTWDLTPRSVQDLIAIIEIYEYFNYYSYGETGTSTYAKEVKSQLDEW
1521
YKSTFLDTLTPDEANKNDTVFEKITSFLSNVIVVSNPTVWFVRNFGTTSKFDGRAEVFGP
1622
CQIQSIRFDKTPNGNFNGLAIAPNLPSTFTLEITMREILTLNRASVYAEGF
17-
>154a6d4e-dc94-4de2-9403-63aa47a01d82 (mRNA) 553 residues [Merlin:3066-4796 - strand] [peptide] name=multiexongene
23+
>ac134b2b-dc09-4089-8032-83637c62fd82 (mRNA) 553 residues [Merlin:3066-4796 - strand] [peptide] name=Merlin_5_mRNA-00001
1824
MKSENMSTMRRRKVIADSKGERDAASTASDQVDSLELIGLKLDDVQSANELVAEVIEEKG
1925
NNLIDSVDNVAEGTELAAEASERTTESIKTLTGVASTISDKLSKLASMLESKVQAVEQKV
2026
QESGASASTGLSVIEDKLPDPDEPFFPPVPQEPENNKKDQKKDDKKPTDMLGDLLKTTKG
@@ -25,5 +31,9 @@ GEFKTRAFDWVLGRENKIDSTQASDRDQETQNLKAMAPEKREETLIKQNEARAAVQRLEK
2531
YIGDVDPENPTNMQSLEKAYNSAKKSISDSAISDQPATKKELDKRFQRVESKYQKLKEDN
2632
TPKPAAPATSEDNQRVQNIQKAENAKEQSKKSTGDMNVANTQVNNVNNSKTIHQVQTVTA
2733
TPAPGVFGATGVN
28-
>ce047673-3c00-425c-862b-20fd004eca42 (mRNA) 1 residues [Merlin:5011-6066 - strand] [peptide] name=cds-not-under-exon
29-
L
34+
>065d1481-1403-4a2b-8566-8ee84216e885 (mRNA) 258 residues [Merlin:5011-6066 - strand] [peptide] name=Merlin_42_mRNA-00001
35+
FNDAGESIKEMIGAIYESKPLIAPAMNTINTYVPRVPWTSNITEYKKYVRDVALAVDNDQ
36+
FVFVWEDIYGLNMMDYDAMINQESIKVIVGEPRTIGQFVGELEYNLAYDFQWLTKANAHT
37+
RDPIFNATIYSHSFLDNNLPRIVTGDGQNSIFVSRSGAYSEMTYRNGYEEAIRLQTMAQY
38+
DGYATCKMVGDFEMTPGDKINFFDPKKQFKADFYIDEVIHEVSNNQSITTLYMFTNSRKL
39+
ETVEPIKVKNELKSDTTT

test-data/gene-top.gff

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
##gff-version 3
22
##sequence-region Merlin 1 172788
3-
Merlin GeneMark.hmm gene 2 691 -856.563659 + . ID=Merlin_1;seqid=Merlin
4-
Merlin GeneMark.hmm mRNA 2 691 . + . ID=Merlin_1_mRNA;Parent=Merlin_1;seqid=Merlin;color=#00ff00
5-
Merlin GeneMark.hmm exon 2 691 . + . ID=Merlin_1_exon;Parent=Merlin_1_mRNA;seqid=Merlin
6-
Merlin GeneMark.hmm CDS 2 691 . + 0 ID=Merlin_1_CDS;Parent=Merlin_1_mRNA;seqid=Merlin
3+
Merlin GeneMark.hmm gene 2 691 -856.563659 + . ID=Merlin_123;seqid=Merlin
4+
Merlin GeneMark.hmm mRNA 2 691 . + . ID=Merlin_123_mRNA;Parent=Merlin_123;seqid=Merlin;color=#00ff00
5+
Merlin GeneMark.hmm exon 2 691 . + . ID=Merlin_123_exon;Parent=Merlin_123_mRNA;seqid=Merlin
6+
Merlin GeneMark.hmm CDS 2 691 . + 0 ID=Merlin_123_CDS;Parent=Merlin_123_mRNA;seqid=Merlin

0 commit comments

Comments
 (0)