1818)
1919from ga4gh .vrs .normalize import normalize
2020from mavehgvs .util import parse_variant_strings
21- from mavehgvs .variant import Variant
21+ from mavehgvs .variant import AA_CODES , Variant
2222
2323from dcd_mapping .lookup import (
2424 cdot_rest ,
@@ -112,6 +112,9 @@ def _create_pre_mapped_hgvs_strings(
112112 msg = f"Alignment result or accession ID must be provided for { layer } annotations (Alignment was `{ alignment } `)."
113113 raise ValueError (msg )
114114
115+ if raw_description == "p.=" and tx is not None :
116+ raw_description = _construct_transcript_identical_hgvs_string (tx )
117+
115118 raw_variant_strings = _parse_raw_variant_str (raw_description )
116119 variants , errors = parse_variant_strings (raw_variant_strings )
117120
@@ -123,7 +126,7 @@ def _create_pre_mapped_hgvs_strings(
123126
124127 # ga4gh hgvs_tools does not support intronic variants, so they will err out when vrs allele translator is called
125128 # therefore skip them there
126- if is_intronic_variant (variant ):
129+ if layer != AnnotationLayer . PROTEIN and is_intronic_variant (variant ):
127130 msg = f"Variant is intronic and cannot be processed: { variant } "
128131 raise ValueError (msg )
129132
@@ -174,6 +177,9 @@ def _create_post_mapped_hgvs_strings(
174177 msg = f"Alignment result must be provided for { layer } annotations (Alignment was `{ alignment } `)."
175178 raise ValueError (msg )
176179
180+ if raw_description == "p.=" and tx is not None :
181+ raw_description = _construct_transcript_identical_hgvs_string (tx )
182+
177183 raw_variants = _parse_raw_variant_str (raw_description )
178184 variants , errors = parse_variant_strings (raw_variants )
179185
@@ -185,7 +191,7 @@ def _create_post_mapped_hgvs_strings(
185191
186192 # ga4gh hgvs_tools does not support intronic variants, so they will err out when vrs allele translator is called
187193 # therefore skip them there
188- if is_intronic_variant (variant ):
194+ if layer != AnnotationLayer . PROTEIN and is_intronic_variant (variant ):
189195 msg = f"Variant is intronic and cannot be processed: { variant } "
190196 raise ValueError (msg )
191197
@@ -326,6 +332,23 @@ def _parse_raw_variant_str(raw_description: str) -> list[str]:
326332 return [raw_description ]
327333
328334
335+ def _construct_transcript_identical_hgvs_string (tx : TxSelectResult ) -> str :
336+ """Construct an HGVS string for a transcript identical variant.
337+
338+ :param tx: TxSelectResult object containing the transcript sequence and start position
339+ :return: The constructed HGVS string
340+ """
341+ description = "p.["
342+
343+ for i , code in enumerate (tx .sequence , start = 1 ):
344+ if i == 1 :
345+ description += f"{ AA_CODES [code ]} { tx .start + i } ="
346+ else :
347+ description += f";{ AA_CODES [code ]} { tx .start + i } ="
348+
349+ return description + "]"
350+
351+
329352def _map_protein_coding_pro (
330353 row : ScoreRow ,
331354 sequence_id : str ,
@@ -340,7 +363,7 @@ def _map_protein_coding_pro(
340363 :param transcript: The transcript selection information for a score set
341364 :return: VRS mapping object if mapping succeeds
342365 """
343- if row .hgvs_pro in {"_wt" , "_sy" , "NA" } or len ( row . hgvs_pro ) == 3 :
366+ if row .hgvs_pro in {"_wt" , "_sy" , "NA" }:
344367 _logger .warning (
345368 "Can't process variant syntax %s for %s" , row .hgvs_pro , row .accession
346369 )
@@ -677,11 +700,7 @@ def _hgvs_pro_is_valid(hgvs_pro: str) -> bool:
677700 :param hgvs_nt: MAVE_HGVS protein expression
678701 :return: True if expression appears populated and valid
679702 """
680- return (
681- (hgvs_pro not in {"_wt" , "_sy" , "NA" })
682- and (len (hgvs_pro ) != 3 )
683- and ("fs" not in hgvs_pro )
684- )
703+ return (hgvs_pro not in {"_wt" , "_sy" , "NA" }) and ("fs" not in hgvs_pro )
685704
686705
687706def _map_protein_coding (
@@ -815,14 +834,16 @@ def _map_accession(
815834 hgvs_nt_mappings = _map_genomic (row , sequence_id , align_result )
816835 variations .append (hgvs_nt_mappings )
817836 else :
818- [
819- MappedScore (
820- accession_id = row .accession ,
821- score = row .score ,
822- error_message = f"Unrecognized accession prefix for accession id { metadata .target_accession_id } " ,
823- )
824- for row in records
825- ]
837+ variations .append (
838+ [
839+ MappedScore (
840+ accession_id = row .accession ,
841+ score = row .score ,
842+ error_message = f"Unrecognized accession prefix for accession id { metadata .target_accession_id } " ,
843+ )
844+ for row in records
845+ ]
846+ )
826847
827848 return variations
828849
0 commit comments