@@ -499,6 +499,7 @@ def set_gene_attributes(self):
499499 # assigns an ordered list of all known exons and introns to self.exons and self.introns
500500 # returns 2 maps, isoform id -> intron / exon list
501501 def set_introns_and_exons (self ):
502+ used_cds = False
502503 # dictionary: isoform id -> ordered list of intron coordinates
503504 all_isoforms_introns = {}
504505 # dictionary: isoform id -> ordered list of exon coordinates
@@ -507,18 +508,35 @@ def set_introns_and_exons(self):
507508 for gene_db in self .gene_db_list :
508509 for t in self .db .children (gene_db , featuretype = ('transcript' , 'mRNA' ), order_by = 'start' ):
509510 exons = []
510- for e in self .db .children (t , order_by = 'start' ):
511- if e .featuretype == 'exon' :
512- exons .append ((e .start , e .end ))
511+ all_features = []
512+ for e in self .db .children (t , featuretype = ('exon' , 'CDS' ), order_by = 'start' ):
513+ all_features .append ((e .featuretype , e .start , e .end ))
514+
515+ for i , e in enumerate (all_features ):
516+ if e [0 ] == 'exon' :
517+ exons .append ((e [1 ], e [2 ]))
518+ continue
519+
520+ cds = (e [1 ], e [2 ])
521+ prev_exon = None if i == 0 else (all_features [i - 1 ][1 ], all_features [i - 1 ][2 ])
522+ next_exon = None if i == len (all_features )- 1 else (all_features [i + 1 ][1 ], all_features [i + 1 ][2 ])
523+ if (prev_exon and overlaps (cds , prev_exon )) or (next_exon and overlaps (cds , next_exon )):
524+ continue
525+ exons .append (cds )
526+ used_cds = True
527+
513528 if not exons :
514- logger .warning ("Malformed transcript %s has no exons" % t .id )
529+ logger .warning ("Malformed transcript %s has no exons or CDS " % t .id )
515530 continue
516531 all_isoforms_exons [t .id ] = exons
517532 all_isoforms_introns [t .id ] = junctions_from_blocks (all_isoforms_exons [t .id ])
518533
519534 if self .db and not all_isoforms_exons :
520535 logger .warning ("Gene %s has no exons / transcripts, check your input annotation" % self .gene_db_list [0 ].id )
521536
537+ if used_cds :
538+ logger .warning ("Gene %s has only CDS features for some transcripts, will be treating those as exons" % self .gene_db_list [0 ].id )
539+
522540 introns = set ()
523541 exons = set ()
524542 for i in all_isoforms_introns .keys ():
0 commit comments