Skip to content

Commit 6f36b6c

Browse files
committed
Use common JSON writing method
1 parent caf50a2 commit 6f36b6c

File tree

1 file changed

+4
-13
lines changed

1 file changed

+4
-13
lines changed

generate_transcript_data/cdot_json.py

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -434,7 +434,7 @@ def combine_builds(args):
434434

435435
urls_different_coding = defaultdict(list)
436436
genes = {}
437-
transcripts = {}
437+
transcript_versions = {}
438438
for genome_build, f in genome_build_file.items():
439439
# TODO: Check cdot versions
440440
json_builds = next(ijson.items(f, "genome_builds"))
@@ -444,7 +444,7 @@ def combine_builds(args):
444444
f.seek(0) # Reset for next ijson call
445445
for transcript_id, build_transcript in ijson.kvitems(f, "transcripts"):
446446
genome_builds = {}
447-
existing_transcript = transcripts.get(transcript_id)
447+
existing_transcript = transcript_versions.get(transcript_id)
448448
if existing_transcript:
449449
genome_builds = existing_transcript["genome_builds"]
450450
# Latest always used, but check existing - if codons are different old versions are wrong so remove
@@ -460,24 +460,15 @@ def combine_builds(args):
460460
genome_builds[genome_build] = build_transcript["genome_builds"][genome_build]
461461
# Use latest (with merged genome builds)
462462
build_transcript["genome_builds"] = genome_builds
463-
transcripts[transcript_id] = build_transcript
463+
transcript_versions[transcript_id] = build_transcript
464464

465465
f.seek(0) # Reset for next ijson call
466466
for gene_id, gene_data in ijson.kvitems(f, "genes"):
467467
genes[gene_id] = gene_data
468468

469469
f.close()
470470

471-
print("Writing cdot data")
472-
with gzip.open(args.output, 'wt') as outfile:
473-
data = {
474-
"transcripts": transcripts,
475-
"cdot_version": JSON_SCHEMA_VERSION,
476-
"genome_builds": list(genome_build_file.keys()),
477-
}
478-
if genes:
479-
data["genes"] = genes
480-
json.dump(data, outfile)
471+
write_cdot_json(args.output, genes, transcript_versions, list(genome_build_file.keys()))
481472

482473
if urls_different_coding:
483474
print("Some transcripts were removed as they had different coding coordinates from latest")

0 commit comments

Comments
 (0)