@@ -413,6 +413,7 @@ async def genomic_to_tx_segment(
413413 transcript : str | None = None ,
414414 gene : str | None = None ,
415415 coordinate_type : CoordinateType = CoordinateType .INTER_RESIDUE ,
416+ assembly : Assembly = Assembly .GRCH38 ,
416417 ) -> GenomicTxSegService :
417418 """Get transcript segment data for genomic data, lifted over to GRCh38.
418419
@@ -452,6 +453,8 @@ async def genomic_to_tx_segment(
452453 value is provided.
453454 :param coordinate_type: Coordinate type for ``seg_start_genomic`` and
454455 ``seg_end_genomic``. Expects inter-residue coordinates by default
456+ :param assembly: The assembly that the supplied coordinate comes from. Set to
457+ GRCh38 be default
455458 :return: Genomic data (inter-residue coordinates)
456459 """
457460 errors = []
@@ -477,6 +480,7 @@ async def genomic_to_tx_segment(
477480 gene = gene ,
478481 is_seg_start = True ,
479482 coordinate_type = coordinate_type ,
483+ assembly = assembly ,
480484 )
481485 if start_tx_seg_data .errors :
482486 return _return_service_errors (start_tx_seg_data .errors )
@@ -497,6 +501,7 @@ async def genomic_to_tx_segment(
497501 gene = gene ,
498502 is_seg_start = False ,
499503 coordinate_type = coordinate_type ,
504+ assembly = assembly ,
500505 )
501506 if end_tx_seg_data .errors :
502507 return _return_service_errors (end_tx_seg_data .errors )
@@ -727,6 +732,7 @@ async def _genomic_to_tx_segment(
727732 gene : str | None = None ,
728733 is_seg_start : bool = True ,
729734 coordinate_type : CoordinateType = CoordinateType .INTER_RESIDUE ,
735+ assembly : Assembly = Assembly .GRCH38 ,
730736 ) -> GenomicTxSeg :
731737 """Given genomic data, generate a boundary for a transcript segment.
732738
@@ -749,6 +755,8 @@ async def _genomic_to_tx_segment(
749755 ``False`` if ``genomic_pos`` is where the transcript segment ends.
750756 :param coordinate_type: Coordinate type for ``seg_start_genomic`` and
751757 ``seg_end_genomic``. Expects inter-residue coordinates by default
758+ :param assembly: The assembly that the supplied coordinate comes from. Set to
759+ GRCh38 be default
752760 :return: Data for a transcript segment boundary (inter-residue coordinates)
753761 """
754762 params = {key : None for key in GenomicTxSeg .model_fields }
@@ -771,6 +779,9 @@ async def _genomic_to_tx_segment(
771779 genomic_ac_validation = await self .uta_db .validate_genomic_ac (genomic_ac )
772780 if not genomic_ac_validation :
773781 return GenomicTxSeg (errors = [f"{ genomic_ac } does not exist in UTA" ])
782+ if assembly == Assembly .GRCH37 :
783+ grch38_ac = await self .uta_db .get_newest_assembly_ac (genomic_ac )
784+ genomic_ac = grch38_ac [0 ]
774785 else :
775786 genomic_acs , err_msg = self .seqrepo_access .chromosome_to_acs (chromosome )
776787
@@ -780,13 +791,15 @@ async def _genomic_to_tx_segment(
780791 )
781792 genomic_ac = genomic_acs [0 ]
782793
783- # Always liftover to GRCh38
784- genomic_ac , genomic_pos , err_msg = await self ._get_grch38_ac_pos (
785- genomic_ac ,
786- genomic_pos ,
787- )
788- if err_msg :
789- return GenomicTxSeg (errors = [err_msg ])
794+ # Liftover to GRCh38 if the provided assembly is GRCh37
795+ if assembly == Assembly .GRCH37 :
796+ genomic_pos = await self ._get_grch38_pos (genomic_pos , genomic_ac )
797+ if not genomic_pos :
798+ return GenomicTxSeg (
799+ errors = [
800+ f"Lifting over { genomic_pos } on { genomic_ac } from { Assembly .GRCH37 .value } to { Assembly .GRCH38 .value } was unsuccessful."
801+ ]
802+ )
790803
791804 # Select a transcript if not provided
792805 if not transcript :
@@ -896,59 +909,23 @@ async def _genomic_to_tx_segment(
896909 tx_ac = transcript ,
897910 )
898911
899- async def _get_grch38_ac_pos (
900- self ,
901- genomic_ac : str ,
902- genomic_pos : int ,
903- grch38_ac : str | None = None ,
904- ) -> tuple [str | None , int | None , str | None ]:
905- """Get GRCh38 genomic representation for accession and position
906-
907- :param genomic_ac: RefSeq genomic accession (GRCh37 or GRCh38 assembly)
908- :param genomic_pos: Genomic position on ``genomic_ac``
909- :param grch38_ac: A valid GRCh38 genomic accession for ``genomic_ac``. If not
910- provided, will attempt to retrieve associated GRCh38 accession from UTA.
911- :return: Tuple containing GRCh38 accession, GRCh38 position, and error message
912- if unable to get GRCh38 representation
913- """
914- # Validate accession exists
915- if not grch38_ac :
916- grch38_ac = await self .uta_db .get_newest_assembly_ac (genomic_ac )
917- if not grch38_ac :
918- return None , None , f"Unrecognized genomic accession: { genomic_ac } ."
919-
920- grch38_ac = grch38_ac [0 ]
921-
922- if grch38_ac != genomic_ac :
923- # Ensure genomic_ac is GRCh37
924- chromosome , _ = self .seqrepo_access .translate_identifier (
925- genomic_ac , Assembly .GRCH37 .value
926- )
927- if not chromosome :
928- _logger .warning (
929- "SeqRepo could not find associated %s assembly for genomic accession %s." ,
930- Assembly .GRCH37 .value ,
931- genomic_ac ,
932- )
933- return (
934- None ,
935- None ,
936- f"`genomic_ac` must use { Assembly .GRCH37 .value } or { Assembly .GRCH38 .value } assembly." ,
937- )
938- chromosome = chromosome [- 1 ].split (":" )[- 1 ]
939- liftover_data = self .liftover .get_liftover (
940- chromosome , genomic_pos , Assembly .GRCH38
941- )
942- if liftover_data is None :
943- return (
944- None ,
945- None ,
946- f"Lifting over { genomic_pos } on { genomic_ac } from { Assembly .GRCH37 .value } to { Assembly .GRCH38 .value } was unsuccessful." ,
947- )
948- genomic_pos = liftover_data [1 ]
949- genomic_ac = grch38_ac
912+ async def _get_grch38_pos (self , genomic_pos : int , genomic_ac : str ) -> int | None :
913+ """Liftover a GRCh37 coordinate to GRCh38
950914
951- return genomic_ac , genomic_pos , None
915+ :param genomic_pos: A genomic coordinate in GRCh37
916+ :param genomic_ac: The genomic accession in GRCh38
917+ :return The genomic coordinate in GRCh38
918+ """
919+ chromosome , _ = self .seqrepo_access .translate_identifier (
920+ genomic_ac , target_namespaces = Assembly .GRCH38 .value
921+ )
922+ chromosome = chromosome [- 1 ].split (":" )[- 1 ]
923+ liftover_data = self .liftover .get_liftover (
924+ chromosome , genomic_pos , Assembly .GRCH38
925+ )
926+ if liftover_data is None :
927+ return None
928+ return liftover_data [1 ]
952929
953930 async def _validate_gene_coordinates (
954931 self ,
0 commit comments