@@ -89,12 +89,139 @@ def AssertAdmin(user):
8989
9090
9191def _tnType (feature ):
92- if feature .type in ('gene' , 'mRNA' , 'exon' , 'CDS' , 'terminator' , 'tRNA' ):
92+ if feature .type in ('gene' , 'mRNA' , 'exon' , 'CDS' , 'terminator' , 'tRNA' , 'snRNA' , 'snoRNA' , 'ncRNA' , 'rRNA' , 'miRNA' , 'repeat_region' , 'transposable_element' , 'pseudogene' , 'transcript' ):
9393 return feature .type
9494 else :
9595 return 'exon'
9696
9797
98+ def _yieldGeneData (gene , disable_cds_recalculation = False , use_name = False ):
99+ current = _yieldSubFeatureData (gene , disable_cds_recalculation = disable_cds_recalculation , use_name = use_name )
100+
101+ if gene .sub_features :
102+ current ['children' ] = []
103+ for sf in gene .sub_features :
104+ if _tnType (sf ) in coding_transcript_types :
105+ current ['children' ].append (_yieldCodingTranscriptData (sf , disable_cds_recalculation = disable_cds_recalculation , use_name = use_name ))
106+ elif _tnType (sf ) in noncoding_transcript_types :
107+ current ['children' ].append (_yieldNonCodingTranscriptData (sf , disable_cds_recalculation = disable_cds_recalculation , use_name = use_name ))
108+
109+ # # TODO: handle comments
110+ # # TODO: handle dbxrefs
111+ # # TODO: handle attributes
112+ # # TODO: handle aliases
113+ # # TODO: handle description
114+ # # TODO: handle GO, Gene Product, Provenance
115+
116+ if 'children' in current and gene .type == 'gene' :
117+ # Only sending mRNA level as apollo is more comfortable with orphan mRNAs
118+ return current ['children' ]
119+ else :
120+ # No children, return a generic gene feature
121+ return current
122+
123+
124+ def _yieldSubFeatureData (f , disable_cds_recalculation = False , use_name = False ):
125+ current = {
126+ 'location' : {
127+ 'strand' : f .strand ,
128+ 'fmin' : int (f .location .start ),
129+ 'fmax' : int (f .location .end ),
130+ },
131+ 'type' : {
132+ 'name' : _tnType (f ),
133+ 'cv' : {
134+ 'name' : 'sequence' ,
135+ }
136+ },
137+ }
138+ if disable_cds_recalculation :
139+ current ['use_cds' ] = 'true'
140+
141+ if f .type in (coding_transcript_types + noncoding_transcript_types + gene_types + pseudogenes_types
142+ + single_level_feature_types ):
143+ current ['name' ] = f .qualifiers .get ('Name' , [f .id ])[0 ]
144+
145+ if 'ID' in f .qualifiers :
146+ current ['gff_id' ] = f .qualifiers ['ID' ][0 ]
147+
148+ if use_name :
149+ current ['use_name' ] = True
150+
151+ # if OGS:
152+ # TODO: handle comments
153+ # TODO: handle dbxrefs
154+ # TODO: handle attributes
155+ # TODO: handle aliases
156+ # TODO: handle description
157+ # TODO: handle GO, Gene Product, Provenance
158+ return current
159+
160+
161+ def _yieldCodingTranscriptData (f , disable_cds_recalculation = False , use_name = False ):
162+ current = {
163+ 'location' : {
164+ 'strand' : f .strand ,
165+ 'fmin' : int (f .location .start ),
166+ 'fmax' : int (f .location .end ),
167+ },
168+ 'type' : {
169+ 'name' : _tnType (f ),
170+ 'cv' : {
171+ 'name' : 'sequence' ,
172+ }
173+ },
174+ }
175+
176+ if f .type in (coding_transcript_types + noncoding_transcript_types + gene_types + pseudogenes_types
177+ + single_level_feature_types ):
178+ current ['name' ] = f .qualifiers .get ('Name' , [f .id ])[0 ]
179+
180+ if 'ID' in f .qualifiers :
181+ current ['gff_id' ] = f .qualifiers ['ID' ][0 ]
182+
183+ if len (f .sub_features ) > 0 :
184+ current ['children' ] = []
185+ for sf in f .sub_features :
186+ current ['children' ].append (
187+ _yieldSubFeatureData (sf , disable_cds_recalculation = disable_cds_recalculation , use_name = use_name ))
188+
189+ return current
190+
191+
192+ def _yieldNonCodingTranscriptData (features , disable_cds_recalculation = False , use_name = False ):
193+ return _yieldCodingTranscriptData (features , disable_cds_recalculation , use_name )
194+
195+
196+ # def _yieldSingleLevelFeatureData(features):
197+ # return _yieldSubFeatureData(features[0])
198+
199+
200+ def yieldApolloData (feature , use_name = False , disable_cds_recalculation = False ):
201+ feature_type = _tnType (feature )
202+ if feature_type in gene_types :
203+ return _yieldGeneData (feature )
204+ elif feature_type in pseudogenes_types :
205+ return _yieldGeneData (feature )
206+ elif feature_type in coding_transcript_types :
207+ return _yieldCodingTranscriptData (feature )
208+ elif feature_type in noncoding_transcript_types :
209+ return _yieldNonCodingTranscriptData (feature )
210+ elif feature_type in single_level_feature_types :
211+ # return _yieldSingleLevelFeatureData(current_feature)
212+ return _yieldSubFeatureData (feature )
213+ else :
214+ return _yieldSubFeatureData (feature )
215+
216+ # # if OGS:
217+ # # TODO: handle comments
218+ # # TODO: handle dbxrefs
219+ # # TODO: handle attributes
220+ # # TODO: handle aliases
221+ # # TODO: handle description
222+ # # TODO: handle GO, Gene Product, Provenance
223+
224+
98225def _yieldFeatData (features , use_name = False , disable_cds_recalculation = False ):
99226 for f in features :
100227 current = {
@@ -110,14 +237,17 @@ def _yieldFeatData(features, use_name=False, disable_cds_recalculation=False):
110237 }
111238 },
112239 }
113- if disable_cds_recalculation is True :
240+ if disable_cds_recalculation :
114241 current ['use_cds' ] = 'true'
115242
116243 if f .type in (coding_transcript_types + noncoding_transcript_types + gene_types + pseudogenes_types
117244 + single_level_feature_types ):
118245 current ['name' ] = f .qualifiers .get ('Name' , [f .id ])[0 ]
119246
120- if use_name is True :
247+ if 'ID' in f .qualifiers :
248+ current ['gff_id' ] = f .qualifiers ['ID' ][0 ]
249+
250+ if use_name :
121251 current ['use_name' ] = True
122252
123253 # if OGS:
@@ -150,6 +280,20 @@ def add_property_to_feature(feature, property_key, property_value):
150280 return feature
151281
152282
283+ def features_to_apollo_schema (features , use_name = False , disable_cds_recalculation = False ):
284+ """
285+
286+ :param disable_cds_recalculation:
287+ :param use_name:
288+ :param features:
289+ :return:
290+ """
291+ compiled = []
292+ for f in features :
293+ compiled .append (yieldApolloData (f , use_name = use_name , disable_cds_recalculation = disable_cds_recalculation ))
294+ return compiled
295+
296+
153297def features_to_feature_schema (features , use_name = False , disable_cds_recalculation = False ):
154298 """
155299
0 commit comments