@@ -469,12 +469,14 @@ def _get_computed_reference_sequence(
469469
470470
471471def _get_mapped_reference_sequence (
472+ metadata : TargetGene ,
472473 layer : AnnotationLayer ,
473474 tx_output : TxSelectResult | TxSelectError | None = None ,
474475 align_result : AlignmentResult | None = None ,
475476) -> MappedReferenceSequence | None :
476477 """Report the mapped reference sequence for a score set
477478
479+ :param metadata: Target gene metadata from MaveDB API
478480 :param layer: AnnotationLayer
479481 :param tx_output: Transcript data for a score set
480482 :return A MappedReferenceSequence object
@@ -500,13 +502,21 @@ def _get_mapped_reference_sequence(
500502 sequence_id = vrs_id ,
501503 sequence_accessions = [tx_output .np ],
502504 )
503- seq_id = get_chromosome_identifier (align_result .chrom )
505+ # accession-based score sets with genomic accession do not have alignment results
506+ if (
507+ align_result is None
508+ and metadata .target_accession_id
509+ and metadata .target_accession_id .startswith ("NC" )
510+ ):
511+ seq_id = metadata .target_accession_id
512+ else :
513+ seq_id = get_chromosome_identifier (align_result .chrom )
504514 vrs_id = get_vrs_id_from_identifier (seq_id )
505515 if vrs_id is None :
506516 # TODO catch this error, don't fail whole job for one target
507- # msg = "ID could not be acquired from Seqrepo for chromosome identifier"
508- # raise ValueError(msg)
509- return None
517+ msg = "ID could not be acquired from Seqrepo for chromosome identifier"
518+ raise ValueError (msg )
519+ # return None
510520 return MappedReferenceSequence (
511521 sequence_type = TargetSequenceType .DNA ,
512522 sequence_id = vrs_id ,
@@ -593,9 +603,11 @@ def save_mapped_output_json(
593603 "computed_reference_sequence" : None ,
594604 "mapped_reference_sequence" : None ,
595605 }
596- for layer in preferred_layers
606+ # TODO change this back after reimplementing multi-target mapping
607+ for layer in AnnotationLayer
597608 }
598-
609+ # sometimes Nonetype layers show up in preferred layers dict; remove these
610+ preferred_layers .discard (None )
599611 for layer in preferred_layers :
600612 reference_sequences [target_gene ][layer ][
601613 "computed_reference_sequence"
@@ -605,7 +617,10 @@ def save_mapped_output_json(
605617 reference_sequences [target_gene ][layer ][
606618 "mapped_reference_sequence"
607619 ] = _get_mapped_reference_sequence (
608- layer , tx_output [target_gene ], align_results [target_gene ]
620+ metadata .target_genes [target_gene ],
621+ layer ,
622+ tx_output [target_gene ],
623+ align_results [target_gene ],
609624 )
610625
611626 for m in mappings [target_gene ]:
@@ -615,21 +630,43 @@ def save_mapped_output_json(
615630 # drop annotation layer from mapping object
616631 mapped_scores .append (ScoreAnnotation (** m .model_dump ()))
617632
618- # drop Nonetype reference sequences
619- for target_gene in reference_sequences :
620- for layer in list (reference_sequences [target_gene ].keys ()):
621- if (
622- reference_sequences [target_gene ][layer ]["mapped_reference_sequence" ]
623- is None
624- and reference_sequences [target_gene ][layer ][
625- "computed_reference_sequence"
626- ]
627- is None
628- ) or layer is None :
629- del reference_sequences [target_gene ][layer ]
630-
633+ # TODO drop this "continue" after reimplementing multi-target mapping
634+ continue
635+
636+ # TODO add this back after reimplementing multi-target mapping
637+ # drop Nonetype reference sequences
638+ # for target_gene in reference_sequences:
639+ # for layer in list(reference_sequences[target_gene].keys()):
640+ # if (
641+ # reference_sequences[target_gene][layer]["mapped_reference_sequence"]
642+ # is None
643+ # and reference_sequences[target_gene][layer][
644+ # "computed_reference_sequence"
645+ # ]
646+ # is None
647+ # ) or layer is None:
648+ # del reference_sequences[target_gene][layer]
649+
650+ # TODO drop this "continue" after reimplementing multi-target mapping
651+ continue
652+ # TODO drop this after reimplementing multi-target mapping
653+ reference_sequences = reference_sequences .popitem ()[1 ] # get only value in dict
654+ # TODO change this back after reimplementing multi-target mapping
655+ # this only works for --prefer_genomic right now, which is fine because we're going to change it back after reimplementing multi-target mapping
631656 output = ScoresetMapping (
632657 metadata = metadata .model_dump (),
658+ computed_protein_reference_sequence = reference_sequences [
659+ AnnotationLayer .PROTEIN
660+ ]["computed_reference_sequence" ],
661+ mapped_protein_reference_sequence = reference_sequences [AnnotationLayer .PROTEIN ][
662+ "mapped_reference_sequence"
663+ ],
664+ computed_genomic_reference_sequence = reference_sequences [
665+ AnnotationLayer .GENOMIC
666+ ]["computed_reference_sequence" ],
667+ mapped_genomic_reference_sequence = reference_sequences [AnnotationLayer .GENOMIC ][
668+ "mapped_reference_sequence"
669+ ],
633670 reference_sequences = reference_sequences ,
634671 mapped_scores = mapped_scores ,
635672 )
0 commit comments