@@ -117,9 +117,14 @@ def _create_gene(feature, gene_accession):
117117 if biotype :
118118 biotype_set .add (biotype )
119119
120+ source_set = set ()
121+ if feature .source :
122+ source_set .add (feature .source )
123+
120124 return {
121125 "gene_symbol" : gene_name ,
122126 "biotype" : biotype_set ,
127+ "source" : source_set ,
123128 "id" : gene_accession ,
124129 "description" : description
125130 }
@@ -132,6 +137,7 @@ def _create_transcript(feature, transcript_accession, gene_data):
132137 "gene_version" : gene_data .get ("id" ),
133138 "exons" : [],
134139 "biotype" : set (),
140+ "source" : set (),
135141 CONTIG : feature .iv .chrom ,
136142 STRAND : feature .iv .strand ,
137143 }
@@ -169,7 +175,6 @@ def _add_transcript_data(self, transcript_accession, transcript, feature):
169175 if note := feature .attr .get ("Note" ):
170176 transcript ["note" ] = note
171177
172-
173178 def _finish_process_features (self ):
174179 for transcript_accession , transcript_data in self .transcript_data_by_accession .items ():
175180 features_by_type = self .transcript_features_by_type .get (transcript_accession , {})
@@ -458,6 +463,10 @@ def handle_feature(self, feature):
458463 gene_data ["biotype" ].add (biotype )
459464 transcript ["biotype" ].add (biotype )
460465
466+ if feature .source :
467+ gene_data ["source" ].add (feature .source )
468+ transcript ["source" ].add (feature .source )
469+
461470 self ._handle_protein_version (transcript_accession , feature )
462471 self ._add_tags_to_transcript_data (transcript , feature )
463472
@@ -511,6 +520,9 @@ def handle_feature(self, feature):
511520 if m := self .hgnc_pattern .match (description ):
512521 gene_data ["hgnc" ] = m .group (2 )
513522
523+ if feature .source :
524+ gene_data ["source" ].add (feature .source )
525+
514526 self .gene_accession_by_feature_id [feature .attr ["ID" ]] = gene_accession
515527 else :
516528 transcript_accession = None
@@ -564,6 +576,8 @@ def handle_feature(self, feature):
564576 elif feature .type not in EXCLUDE_BIOTYPES :
565577 transcript ["biotype" ].add (feature .type )
566578
579+ if feature .source :
580+ transcript ["source" ].add (feature .source )
567581
568582
569583 @staticmethod
0 commit comments