@@ -89,36 +89,36 @@ def AssertAdmin(user):
8989
9090
9191def _tnType (feature ):
92- if feature .type in ('gene' , 'mRNA' , 'exon' , 'CDS' , 'terminator' , 'tRNA' ):
92+ if feature .type in ('gene' , 'mRNA' , 'exon' , 'CDS' , 'terminator' , 'tRNA' , 'snRNA' , 'snoRNA' , 'ncRNA' , 'rRNA' , 'miRNA' , 'repeat_region' , 'transposable_element' , 'pseudogene' , 'transcript' ):
9393 return feature .type
9494 else :
9595 return 'exon'
9696
9797
9898def _yieldGeneData (gene , disable_cds_recalculation = False , use_name = False ):
9999 current = _yieldSubFeatureData (gene , disable_cds_recalculation = disable_cds_recalculation , use_name = use_name )
100- sub_features = gene .sub_features
101100
102- # TODO: is this handling multiple isoforms properly?
103- if sub_features :
104- # current['children'] = []
105- # child_data = []
106- for sf in sub_features :
101+ if gene .sub_features :
102+ current ['children' ] = []
103+ for sf in gene .sub_features :
107104 if _tnType (sf ) in coding_transcript_types :
108- # child_data.append(_yieldCodingTranscriptData(sf, disable_cds_recalculation=disable_cds_recalculation, use_name=use_name))
109- return _yieldCodingTranscriptData (sf , disable_cds_recalculation = disable_cds_recalculation , use_name = use_name )
110- if _tnType (sf ) in noncoding_transcript_types :
111- # child_data.append(_yieldCodingTranscriptData(sf, disable_cds_recalculation=disable_cds_recalculation, use_name=use_name))
112- return _yieldCodingTranscriptData (sf , disable_cds_recalculation = disable_cds_recalculation , use_name = use_name )
113- # return child_data
105+ current ['children' ].append (_yieldCodingTranscriptData (sf , disable_cds_recalculation = disable_cds_recalculation , use_name = use_name ))
106+ elif _tnType (sf ) in noncoding_transcript_types :
107+ current ['children' ].append (_yieldNonCodingTranscriptData (sf , disable_cds_recalculation = disable_cds_recalculation , use_name = use_name ))
114108
115109 # # TODO: handle comments
116110 # # TODO: handle dbxrefs
117111 # # TODO: handle attributes
118112 # # TODO: handle aliases
119113 # # TODO: handle description
120114 # # TODO: handle GO, Gene Product, Provenance
121- return current
115+
116+ if 'children' in current and gene .type == 'gene' :
117+ # Only sending mRNA level as apollo is more comfortable with orphan mRNAs
118+ return current ['children' ]
119+ else :
120+ # No children, return a generic gene feature
121+ return current
122122
123123
124124def _yieldSubFeatureData (f , disable_cds_recalculation = False , use_name = False ):
@@ -135,14 +135,17 @@ def _yieldSubFeatureData(f, disable_cds_recalculation=False, use_name=False):
135135 }
136136 },
137137 }
138- if disable_cds_recalculation is True :
138+ if disable_cds_recalculation :
139139 current ['use_cds' ] = 'true'
140140
141141 if f .type in (coding_transcript_types + noncoding_transcript_types + gene_types + pseudogenes_types
142142 + single_level_feature_types ):
143143 current ['name' ] = f .qualifiers .get ('Name' , [f .id ])[0 ]
144144
145- if use_name is True :
145+ if 'ID' in f .qualifiers :
146+ current ['gff_id' ] = f .qualifiers ['ID' ][0 ]
147+
148+ if use_name :
146149 current ['use_name' ] = True
147150
148151 # if OGS:
@@ -169,6 +172,14 @@ def _yieldCodingTranscriptData(f, disable_cds_recalculation=False, use_name=Fals
169172 }
170173 },
171174 }
175+
176+ if f .type in (coding_transcript_types + noncoding_transcript_types + gene_types + pseudogenes_types
177+ + single_level_feature_types ):
178+ current ['name' ] = f .qualifiers .get ('Name' , [f .id ])[0 ]
179+
180+ if 'ID' in f .qualifiers :
181+ current ['gff_id' ] = f .qualifiers ['ID' ][0 ]
182+
172183 if len (f .sub_features ) > 0 :
173184 current ['children' ] = []
174185 for sf in f .sub_features :
@@ -178,16 +189,8 @@ def _yieldCodingTranscriptData(f, disable_cds_recalculation=False, use_name=Fals
178189 return current
179190
180191
181- def print_file (path ):
182- with open (path ) as file :
183- print (file .read ())
184- file .close ()
185-
186-
187- # TODO: we may need specify something different here, but for now this works
188-
189- # def _yieldNonCodingTranscriptData(features):
190- # pass
192+ def _yieldNonCodingTranscriptData (features , disable_cds_recalculation = False , use_name = False ):
193+ return _yieldCodingTranscriptData (features , disable_cds_recalculation , use_name )
191194
192195
193196# def _yieldSingleLevelFeatureData(features):
@@ -198,13 +201,12 @@ def yieldApolloData(feature, use_name=False, disable_cds_recalculation=False):
198201 feature_type = _tnType (feature )
199202 if feature_type in gene_types :
200203 return _yieldGeneData (feature )
201- if feature_type in pseudogenes_types :
204+ elif feature_type in pseudogenes_types :
202205 return _yieldGeneData (feature )
203206 elif feature_type in coding_transcript_types :
204207 return _yieldCodingTranscriptData (feature )
205208 elif feature_type in noncoding_transcript_types :
206- return _yieldCodingTranscriptData (feature )
207- # return _yieldNonCodingTranscriptData(current_feature)
209+ return _yieldNonCodingTranscriptData (feature )
208210 elif feature_type in single_level_feature_types :
209211 # return _yieldSingleLevelFeatureData(current_feature)
210212 return _yieldSubFeatureData (feature )
@@ -235,14 +237,17 @@ def _yieldFeatData(features, use_name=False, disable_cds_recalculation=False):
235237 }
236238 },
237239 }
238- if disable_cds_recalculation is True :
240+ if disable_cds_recalculation :
239241 current ['use_cds' ] = 'true'
240242
241243 if f .type in (coding_transcript_types + noncoding_transcript_types + gene_types + pseudogenes_types
242244 + single_level_feature_types ):
243245 current ['name' ] = f .qualifiers .get ('Name' , [f .id ])[0 ]
244246
245- if use_name is True :
247+ if 'ID' in f .qualifiers :
248+ current ['gff_id' ] = f .qualifiers ['ID' ][0 ]
249+
250+ if use_name :
246251 current ['use_name' ] = True
247252
248253 # if OGS:
0 commit comments