Skip to content

Commit bfb5e98

Browse files
authored
feat!: Add support for LinkerElement in Arriba translator (#241)
1 parent 33d341b commit bfb5e98

File tree

3 files changed

+33
-0
lines changed

3 files changed

+33
-0
lines changed

src/fusor/models.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
model_validator,
2424
)
2525

26+
LINKER_REGEX = r"\|([atcg]+)\|"
27+
2628

2729
class BaseModelForbidExtra(BaseModel, extra="forbid"):
2830
"""Base Pydantic model class with extra values forbidden."""

src/fusor/translator.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@
33
"""
44

55
import logging
6+
import re
67

78
import polars as pl
89
from civicpy.civic import ExonCoordinate, MolecularProfile
910
from cool_seq_tool.schemas import Assembly, CoordinateType
1011
from ga4gh.core.models import MappableConcept
12+
from ga4gh.vrs.models import LiteralSequenceExpression
1113
from pydantic import BaseModel
1214

1315
from fusor.fusion_caller_models import (
@@ -24,6 +26,7 @@
2426
)
2527
from fusor.fusor import FUSOR
2628
from fusor.models import (
29+
LINKER_REGEX,
2730
AnchoredReads,
2831
Assay,
2932
AssayedFusion,
@@ -33,6 +36,7 @@
3336
ContigSequence,
3437
EventType,
3538
GeneElement,
39+
LinkerElement,
3640
MultiplePossibleGenesElement,
3741
ReadData,
3842
SpanningReads,
@@ -76,6 +80,7 @@ def _format_fusion(
7680
rf: bool | None = None,
7781
assay: Assay | None = None,
7882
contig: ContigSequence | None = None,
83+
linker_sequence: LinkerElement | None = None,
7984
reads: ReadData | None = None,
8085
molecular_profiles: list[MolecularProfile] | None = None,
8186
) -> AssayedFusion | CategoricalFusion:
@@ -90,6 +95,7 @@ def _format_fusion(
9095
:param rf: A boolean indicating if the reading frame is preserved
9196
:param assay: Assay
9297
:param contig: The contig sequence
98+
:param linker_sequence: The non-template linker sequence
9399
:param reads: The read data
94100
:return AssayedFusion or CategoricalFusion object
95101
"""
@@ -109,6 +115,8 @@ def _format_fusion(
109115
params["structure"] = [gene_5prime, tr_3prime]
110116
else:
111117
params["structure"] = [tr_5prime, tr_3prime]
118+
if linker_sequence:
119+
params["structure"].insert(1, linker_sequence)
112120
return fusion_type(**params)
113121

114122
def _get_causative_event(
@@ -585,10 +593,22 @@ async def from_arriba(
585593
)
586594
)
587595
rf = bool(arriba.rf == "in-frame") if arriba.rf != "." else None
596+
597+
# Process read data and fusion_transcript sequence
588598
read_data = ReadData(
589599
spanning=SpanningReads(spanningReads=arriba.discordant_mates)
590600
)
591601
contig = ContigSequence(contig=arriba.fusion_transcript)
602+
linker_sequence = re.search(LINKER_REGEX, arriba.fusion_transcript)
603+
linker_sequence = (
604+
LinkerElement(
605+
linkerSequence=LiteralSequenceExpression(
606+
sequence=linker_sequence.group(1).upper()
607+
)
608+
)
609+
if linker_sequence
610+
else None
611+
)
592612

593613
return self._format_fusion(
594614
AssayedFusion,
@@ -604,6 +624,7 @@ async def from_arriba(
604624
rf,
605625
contig=contig,
606626
reads=read_data,
627+
linker_sequence=linker_sequence,
607628
)
608629

609630
async def from_cicero(

tests/test_translators.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -616,6 +616,16 @@ async def test_arriba(
616616
assert arriba_fusor_nonexonic.readData == fusion_data_example_nonexonic.readData
617617
assert arriba_fusor_nonexonic.contig == fusion_data_example_nonexonic.contig
618618

619+
# Test Linker Sequence
620+
arriba_linker = arriba.model_copy(deep=True)
621+
arriba_linker.fusion_transcript = "ATAGAT|atatacgat|TATGAT"
622+
arriba_fusor_linker = await translator_instance.from_arriba(
623+
arriba_linker, CoordinateType.RESIDUE.value, Assembly.GRCH38.value
624+
)
625+
linker_element = arriba_fusor_linker.structure[1]
626+
assert linker_element
627+
assert linker_element.linkerSequence.sequence.root == "ATATACGAT"
628+
619629
# Test unknown partners
620630
arriba.gene1 = "NA"
621631
arriba_fusor_unknown = await translator_instance.from_arriba(

0 commit comments

Comments
 (0)