@@ -434,7 +434,7 @@ def combine_builds(args):
434434
435435 urls_different_coding = defaultdict (list )
436436 genes = {}
437- transcripts = {}
437+ transcript_versions = {}
438438 for genome_build , f in genome_build_file .items ():
439439 # TODO: Check cdot versions
440440 json_builds = next (ijson .items (f , "genome_builds" ))
@@ -444,7 +444,7 @@ def combine_builds(args):
444444 f .seek (0 ) # Reset for next ijson call
445445 for transcript_id , build_transcript in ijson .kvitems (f , "transcripts" ):
446446 genome_builds = {}
447- existing_transcript = transcripts .get (transcript_id )
447+ existing_transcript = transcript_versions .get (transcript_id )
448448 if existing_transcript :
449449 genome_builds = existing_transcript ["genome_builds" ]
450450 # Latest always used, but check existing - if codons are different old versions are wrong so remove
@@ -460,24 +460,15 @@ def combine_builds(args):
460460 genome_builds [genome_build ] = build_transcript ["genome_builds" ][genome_build ]
461461 # Use latest (with merged genome builds)
462462 build_transcript ["genome_builds" ] = genome_builds
463- transcripts [transcript_id ] = build_transcript
463+ transcript_versions [transcript_id ] = build_transcript
464464
465465 f .seek (0 ) # Reset for next ijson call
466466 for gene_id , gene_data in ijson .kvitems (f , "genes" ):
467467 genes [gene_id ] = gene_data
468468
469469 f .close ()
470470
471- print ("Writing cdot data" )
472- with gzip .open (args .output , 'wt' ) as outfile :
473- data = {
474- "transcripts" : transcripts ,
475- "cdot_version" : JSON_SCHEMA_VERSION ,
476- "genome_builds" : list (genome_build_file .keys ()),
477- }
478- if genes :
479- data ["genes" ] = genes
480- json .dump (data , outfile )
471+ write_cdot_json (args .output , genes , transcript_versions , list (genome_build_file .keys ()))
481472
482473 if urls_different_coding :
483474 print ("Some transcripts were removed as they had different coding coordinates from latest" )
0 commit comments