Skip to content

Commit bb4e729

Browse files
committed
use CDS features when exons are absent #309
1 parent 4be3322 commit bb4e729

File tree

1 file changed

+22
-4
lines changed

1 file changed

+22
-4
lines changed

src/gene_info.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -499,6 +499,7 @@ def set_gene_attributes(self):
499499
# assigns an ordered list of all known exons and introns to self.exons and self.introns
500500
# returns 2 maps, isoform id -> intron / exon list
501501
def set_introns_and_exons(self):
502+
used_cds = False
502503
# dictionary: isoform id -> ordered list of intron coordinates
503504
all_isoforms_introns = {}
504505
# dictionary: isoform id -> ordered list of exon coordinates
@@ -507,18 +508,35 @@ def set_introns_and_exons(self):
507508
for gene_db in self.gene_db_list:
508509
for t in self.db.children(gene_db, featuretype=('transcript', 'mRNA'), order_by='start'):
509510
exons = []
510-
for e in self.db.children(t, order_by='start'):
511-
if e.featuretype == 'exon':
512-
exons.append((e.start, e.end))
511+
all_features = []
512+
for e in self.db.children(t, featuretype=('exon', 'CDS'), order_by='start'):
513+
all_features.append((e.featuretype, e.start, e.end))
514+
515+
for i, e in enumerate(all_features):
516+
if e[0] == 'exon':
517+
exons.append((e[1], e[2]))
518+
continue
519+
520+
cds = (e[1], e[2])
521+
prev_exon = None if i == 0 else (all_features[i-1][1], all_features[i-1][2])
522+
next_exon = None if i == len(all_features)-1 else (all_features[i+1][1], all_features[i+1][2])
523+
if (prev_exon and overlaps(cds, prev_exon)) or (next_exon and overlaps(cds, next_exon)):
524+
continue
525+
exons.append(cds)
526+
used_cds = True
527+
513528
if not exons:
514-
logger.warning("Malformed transcript %s has no exons" % t.id)
529+
logger.warning("Malformed transcript %s has no exons or CDS" % t.id)
515530
continue
516531
all_isoforms_exons[t.id] = exons
517532
all_isoforms_introns[t.id] = junctions_from_blocks(all_isoforms_exons[t.id])
518533

519534
if self.db and not all_isoforms_exons:
520535
logger.warning("Gene %s has no exons / transcripts, check your input annotation" % self.gene_db_list[0].id)
521536

537+
if used_cds:
538+
logger.warning("Gene %s has only CDS features for some transcripts, will be treating those as exons" % self.gene_db_list[0].id)
539+
522540
introns = set()
523541
exons = set()
524542
for i in all_isoforms_introns.keys():

0 commit comments

Comments
 (0)