Skip to content

Commit 20390bd

Browse files
committed
issue #73 - missing gene symbols
1 parent c65ad96 commit 20390bd

File tree

2 files changed

+4
-11
lines changed

2 files changed

+4
-11
lines changed

generate_transcript_data/gff_parser.py

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -108,17 +108,10 @@ def _finish(self):
108108

109109
@staticmethod
110110
def _create_gene(feature, gene_accession):
111-
biotype_set = set()
112-
description = None
113-
114-
# Non mandatory - Ensembl doesn't have some stuff on some RNAs
115-
if feature.type in {"gene", "pseudogene", "ncRNA_gene", "mt_gene"}:
116-
gene_name = feature.attr.get("Name")
117-
description = feature.attr.get("description")
118-
else:
119-
gene_name = feature.attr.get("gene_name")
111+
gene_name = feature.attr.get("gene_name") or feature.attr.get("Name")
112+
description = feature.attr.get("description")
120113

121-
# RefSeq GRCh38 has gene.gene_biotype
114+
biotype_set = set()
122115
biotype = feature.attr.get("gene_biotype") or feature.attr.get("biotype")
123116
if biotype:
124117
biotype_set.add(biotype)
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
# After 0.2.22 we split version into separate code (pip) and data schema versions
22
# The cdot client will use its own major/minor to determine whether it can read these data files
3-
JSON_SCHEMA_VERSION = "0.2.24"
3+
JSON_SCHEMA_VERSION = "0.2.25"

0 commit comments

Comments
 (0)