Skip to content

Commit ea1d6b5

Browse files
authored
feat: Add transcript information in output (#339)
closes #337
1 parent 42594d9 commit ea1d6b5

File tree

14 files changed

+69
-17
lines changed

14 files changed

+69
-17
lines changed

notebooks/evidence_matching/fusion_evidence_matching.ipynb

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -207,111 +207,111 @@
207207
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/harvester.py\", line 412, in load_records\n",
208208
" cat_fusion = await self.translator.translate(civic=fusion)\n",
209209
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
210-
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 984, in translate\n",
210+
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 993, in translate\n",
211211
" raise ValueError(msg)\n",
212212
"ValueError: Translation cannot proceed as GRCh37 transcripts and exons lacks genomic breakpoints\n",
213213
"ERROR:fusor.harvester:Cannot translate fusion: SQSTM1(entrez:8878)::NTRK1(entrez:4914) due to the following reason: Translation cannot proceed as GRCh37 transcripts and exons lacks genomic breakpoints\n",
214214
"Traceback (most recent call last):\n",
215215
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/harvester.py\", line 412, in load_records\n",
216216
" cat_fusion = await self.translator.translate(civic=fusion)\n",
217217
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
218-
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 984, in translate\n",
218+
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 993, in translate\n",
219219
" raise ValueError(msg)\n",
220220
"ValueError: Translation cannot proceed as GRCh37 transcripts and exons lacks genomic breakpoints\n",
221221
"ERROR:fusor.harvester:Cannot translate fusion: v::NTRK3(entrez:4916) due to the following reason: Translation cannot proceed as GRCh37 transcripts and exons lacks genomic breakpoints\n",
222222
"Traceback (most recent call last):\n",
223223
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/harvester.py\", line 412, in load_records\n",
224224
" cat_fusion = await self.translator.translate(civic=fusion)\n",
225225
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
226-
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 984, in translate\n",
226+
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 993, in translate\n",
227227
" raise ValueError(msg)\n",
228228
"ValueError: Translation cannot proceed as GRCh37 transcripts and exons lacks genomic breakpoints\n",
229229
"ERROR:fusor.harvester:Cannot translate fusion: v::RET(entrez:5979) due to the following reason: Translation cannot proceed as GRCh37 transcripts and exons lacks genomic breakpoints\n",
230230
"Traceback (most recent call last):\n",
231231
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/harvester.py\", line 412, in load_records\n",
232232
" cat_fusion = await self.translator.translate(civic=fusion)\n",
233233
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
234-
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 984, in translate\n",
234+
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 993, in translate\n",
235235
" raise ValueError(msg)\n",
236236
"ValueError: Translation cannot proceed as GRCh37 transcripts and exons lacks genomic breakpoints\n",
237237
"ERROR:fusor.harvester:Cannot translate fusion: ATP1B1(entrez:481)::NRG1(entrez:3084) due to the following reason: Translation cannot proceed as GRCh37 transcripts and exons lacks genomic breakpoints\n",
238238
"Traceback (most recent call last):\n",
239239
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/harvester.py\", line 412, in load_records\n",
240240
" cat_fusion = await self.translator.translate(civic=fusion)\n",
241241
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
242-
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 984, in translate\n",
242+
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 993, in translate\n",
243243
" raise ValueError(msg)\n",
244244
"ValueError: Translation cannot proceed as GRCh37 transcripts and exons lacks genomic breakpoints\n",
245245
"ERROR:fusor.harvester:Cannot translate fusion: SDC4(entrez:6385)::NRG1(entrez:3084) due to the following reason: Translation cannot proceed as GRCh37 transcripts and exons lacks genomic breakpoints\n",
246246
"Traceback (most recent call last):\n",
247247
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/harvester.py\", line 412, in load_records\n",
248248
" cat_fusion = await self.translator.translate(civic=fusion)\n",
249249
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
250-
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 984, in translate\n",
250+
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 993, in translate\n",
251251
" raise ValueError(msg)\n",
252252
"ValueError: Translation cannot proceed as GRCh37 transcripts and exons lacks genomic breakpoints\n",
253253
"ERROR:fusor.harvester:Cannot translate fusion: FGFR1OP2(entrez:26127)::FGFR1(entrez:2260) due to the following reason: Translation cannot proceed as GRCh37 transcripts and exons lacks genomic breakpoints\n",
254254
"Traceback (most recent call last):\n",
255255
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/harvester.py\", line 412, in load_records\n",
256256
" cat_fusion = await self.translator.translate(civic=fusion)\n",
257257
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
258-
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 984, in translate\n",
258+
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 993, in translate\n",
259259
" raise ValueError(msg)\n",
260260
"ValueError: Translation cannot proceed as GRCh37 transcripts and exons lacks genomic breakpoints\n",
261261
"ERROR:fusor.harvester:Cannot translate fusion: GOPC(entrez:57120)::ROS1(entrez:6098) due to the following reason: Translation cannot proceed as GRCh37 transcripts and exons lacks genomic breakpoints\n",
262262
"Traceback (most recent call last):\n",
263263
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/harvester.py\", line 412, in load_records\n",
264264
" cat_fusion = await self.translator.translate(civic=fusion)\n",
265265
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
266-
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 984, in translate\n",
266+
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 993, in translate\n",
267267
" raise ValueError(msg)\n",
268268
"ValueError: Translation cannot proceed as GRCh37 transcripts and exons lacks genomic breakpoints\n",
269269
"ERROR:fusor.harvester:Cannot translate fusion: TPM3(entrez:7170)::NTRK1(entrez:4914) due to the following reason: Translation cannot proceed as GRCh37 transcripts and exons lacks genomic breakpoints\n",
270270
"Traceback (most recent call last):\n",
271271
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/harvester.py\", line 412, in load_records\n",
272272
" cat_fusion = await self.translator.translate(civic=fusion)\n",
273273
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
274-
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 984, in translate\n",
274+
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 993, in translate\n",
275275
" raise ValueError(msg)\n",
276276
"ValueError: Translation cannot proceed as GRCh37 transcripts and exons lacks genomic breakpoints\n",
277277
"ERROR:fusor.harvester:Cannot translate fusion: RCSD1(entrez:92241)::ABL2(entrez:27) due to the following reason: Translation cannot proceed as GRCh37 transcripts and exons lacks genomic breakpoints\n",
278278
"Traceback (most recent call last):\n",
279279
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/harvester.py\", line 412, in load_records\n",
280280
" cat_fusion = await self.translator.translate(civic=fusion)\n",
281281
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
282-
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 984, in translate\n",
282+
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 993, in translate\n",
283283
" raise ValueError(msg)\n",
284284
"ValueError: Translation cannot proceed as GRCh37 transcripts and exons lacks genomic breakpoints\n",
285285
"ERROR:fusor.harvester:Cannot translate fusion: v::TFE3(entrez:7030) due to the following reason: Translation cannot proceed as GRCh37 transcripts and exons lacks genomic breakpoints\n",
286286
"Traceback (most recent call last):\n",
287287
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/harvester.py\", line 412, in load_records\n",
288288
" cat_fusion = await self.translator.translate(civic=fusion)\n",
289289
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
290-
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 984, in translate\n",
290+
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 993, in translate\n",
291291
" raise ValueError(msg)\n",
292292
"ValueError: Translation cannot proceed as GRCh37 transcripts and exons lacks genomic breakpoints\n",
293293
"ERROR:fusor.harvester:Cannot translate fusion: TCF3(entrez:6929)::PBX1(entrez:5087) due to the following reason: Translation cannot proceed as GRCh37 transcripts and exons lacks genomic breakpoints\n",
294294
"Traceback (most recent call last):\n",
295295
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/harvester.py\", line 412, in load_records\n",
296296
" cat_fusion = await self.translator.translate(civic=fusion)\n",
297297
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
298-
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 984, in translate\n",
298+
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 993, in translate\n",
299299
" raise ValueError(msg)\n",
300300
"ValueError: Translation cannot proceed as GRCh37 transcripts and exons lacks genomic breakpoints\n",
301301
"ERROR:fusor.harvester:Cannot translate fusion: v::NUTM1(entrez:256646) due to the following reason: Translation cannot proceed as GRCh37 transcripts and exons lacks genomic breakpoints\n",
302302
"Traceback (most recent call last):\n",
303303
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/harvester.py\", line 412, in load_records\n",
304304
" cat_fusion = await self.translator.translate(civic=fusion)\n",
305305
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
306-
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 984, in translate\n",
306+
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 993, in translate\n",
307307
" raise ValueError(msg)\n",
308308
"ValueError: Translation cannot proceed as GRCh37 transcripts and exons lacks genomic breakpoints\n",
309309
"ERROR:fusor.harvester:Cannot translate fusion: ENST00000275493.7(EGFR):e.24::ENST00000267868.8(RAD51):e.4 due to the following reason: Translation cannot proceed as GRCh37 transcripts and exons lacks genomic breakpoints\n",
310310
"Traceback (most recent call last):\n",
311311
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/harvester.py\", line 412, in load_records\n",
312312
" cat_fusion = await self.translator.translate(civic=fusion)\n",
313313
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
314-
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 984, in translate\n",
314+
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 993, in translate\n",
315315
" raise ValueError(msg)\n",
316316
"ValueError: Translation cannot proceed as GRCh37 transcripts and exons lacks genomic breakpoints\n",
317317
"WARNING:fusor.harvester:15 fusion(s) were dropped during translation\n",
@@ -405,6 +405,7 @@
405405
"text/plain": [
406406
"{'structure': [{'type': <FUSORTypes.TRANSCRIPT_SEGMENT_ELEMENT: 'TranscriptSegmentElement'>,\n",
407407
" 'transcript': 'refseq:NM_019063.5',\n",
408+
" 'transcriptStatus': <TranscriptPriority.MANE_SELECT: 'mane_select'>,\n",
408409
" 'strand': <Strand.POSITIVE: 1>,\n",
409410
" 'exonEnd': 13,\n",
410411
" 'exonEndOffset': 0,\n",
@@ -423,6 +424,7 @@
423424
" 'end': 42295516}},\n",
424425
" {'type': <FUSORTypes.TRANSCRIPT_SEGMENT_ELEMENT: 'TranscriptSegmentElement'>,\n",
425426
" 'transcript': 'refseq:NM_004304.5',\n",
427+
" 'transcriptStatus': <TranscriptPriority.MANE_SELECT: 'mane_select'>,\n",
426428
" 'strand': <Strand.NEGATIVE: -1>,\n",
427429
" 'exonStart': 20,\n",
428430
" 'exonStartOffset': 0,\n",
@@ -480,6 +482,7 @@
480482
"text/plain": [
481483
"{'structure': [{'type': <FUSORTypes.TRANSCRIPT_SEGMENT_ELEMENT: 'TranscriptSegmentElement'>,\n",
482484
" 'transcript': 'refseq:NM_019063.5',\n",
485+
" 'transcriptStatus': <TranscriptPriority.MANE_SELECT: 'mane_select'>,\n",
483486
" 'strand': <Strand.POSITIVE: 1>,\n",
484487
" 'exonEnd': 20,\n",
485488
" 'exonEndOffset': 0,\n",
@@ -498,6 +501,7 @@
498501
" 'end': 42325554}},\n",
499502
" {'type': <FUSORTypes.TRANSCRIPT_SEGMENT_ELEMENT: 'TranscriptSegmentElement'>,\n",
500503
" 'transcript': 'refseq:NM_004304.5',\n",
504+
" 'transcriptStatus': <TranscriptPriority.MANE_SELECT: 'mane_select'>,\n",
501505
" 'strand': <Strand.NEGATIVE: -1>,\n",
502506
" 'exonStart': 20,\n",
503507
" 'exonStartOffset': 0,\n",

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ dependencies = [
3030
"biocommons.seqrepo",
3131
"gene-normalizer ~=0.10.0",
3232
"civicpy ~=5.0",
33-
"cool-seq-tool ~=0.14.5"
33+
"cool-seq-tool ~=0.15.0"
3434
]
3535
dynamic=["version"]
3636

src/fusor/examples/bcr_abl1.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
{
55
"type": "TranscriptSegmentElement",
66
"transcript": "refseq:NM_004327.3",
7+
"transcriptStatus": "longest_compatible_remaining",
78
"strand": 1,
89
"exonEnd": 2,
910
"exonEndOffset": 182,
@@ -40,6 +41,7 @@
4041
{
4142
"type": "TranscriptSegmentElement",
4243
"transcript": "refseq:NM_005157.5",
44+
"transcriptStatus": "longest_compatible_remaining",
4345
"strand": 1,
4446
"exonStart": 2,
4547
"exonStartOffset": -173,

src/fusor/examples/bcr_abl1_expanded.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
{
55
"type": "TranscriptSegmentElement",
66
"transcript": "refseq:NM_004327.3",
7+
"transcriptStatus": "longest_compatible_remaining",
78
"strand": 1,
89
"exonEnd": 2,
910
"exonEndOffset": 182,
@@ -218,6 +219,7 @@
218219
{
219220
"type": "TranscriptSegmentElement",
220221
"transcript": "refseq:NM_005157.5",
222+
"transcriptStatus": "longest_compatible_remaining",
221223
"strand": 1,
222224
"exonStart": 2,
223225
"exonStartOffset": -173,

src/fusor/examples/tpm3_itd.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
{
55
"type": "TranscriptSegmentElement",
66
"transcript": "refseq:NM_152263.3",
7+
"transcriptStatus": "longest_compatible_remaining",
78
"strand": -1,
89
"exonEnd": 8,
910
"exonEndOffset": 0,
@@ -32,6 +33,7 @@
3233
{
3334
"type": "TranscriptSegmentElement",
3435
"transcript": "refseq:NM_152263.3",
36+
"transcriptStatus": "longest_compatible_remaining",
3537
"strand": -1,
3638
"exonEnd": 8,
3739
"exonEndOffset": 0,

src/fusor/examples/tpm3_ntrk1.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
{
55
"type": "TranscriptSegmentElement",
66
"transcript": "refseq:NM_152263.3",
7+
"transcriptStatus": "longest_compatible_remaining",
78
"strand": -1,
89
"exonEnd": 8,
910
"exonEndOffset": 0,
@@ -32,6 +33,7 @@
3233
{
3334
"type": "TranscriptSegmentElement",
3435
"transcript": "refseq:NM_002529.3",
36+
"transcriptStatus": "longest_compatible_remaining",
3537
"strand": 1,
3638
"exonStart": 10,
3739
"exonStartOffset": 0,

src/fusor/examples/tpm3_pdgfrb.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
{
55
"type": "TranscriptSegmentElement",
66
"transcript": "refseq:NM_152263.3",
7+
"transcriptStatus": "longest_compatible_remaining",
78
"strand": -1,
89
"exonStart": 1,
910
"exonStartOffset": 0,
@@ -46,6 +47,7 @@
4647
{
4748
"type": "TranscriptSegmentElement",
4849
"transcript": "refseq:NM_002609.3",
50+
"transcriptStatus": "longest_compatible_remaining",
4951
"strand": -1,
5052
"exonStart": 11,
5153
"exonStartOffset": 0,

src/fusor/fusor.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,7 @@ async def transcript_segment_element(
342342
return (
343343
TranscriptSegmentElement(
344344
transcript=data.tx_ac,
345+
transcriptStatus=data.tx_status,
345346
strand=data.strand,
346347
# offset by 1 because in CST exons are 0-based
347348
exonStart=seg_start.exon_ord + 1 if seg_start else None,

src/fusor/models.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from pathlib import Path
88
from typing import Annotated, Any, Literal, Self
99

10-
from cool_seq_tool.schemas import Strand
10+
from cool_seq_tool.schemas import Strand, TranscriptPriority
1111
from ga4gh.core.models import Extension, MappableConcept
1212
from ga4gh.vrs.models import (
1313
LiteralSequenceExpression,
@@ -281,6 +281,7 @@ class TranscriptSegmentElement(BaseStructuralElement):
281281
FUSORTypes.TRANSCRIPT_SEGMENT_ELEMENT
282282
)
283283
transcript: Annotated[str, StringConstraints(pattern=CURIE_REGEX)]
284+
transcriptStatus: TranscriptPriority
284285
strand: Strand
285286
exonStart: StrictInt | None = None
286287
exonStartOffset: StrictInt | None = 0
@@ -325,6 +326,7 @@ def check_exons(self) -> Self:
325326
"example": {
326327
"type": "TranscriptSegmentElement",
327328
"transcript": "refseq:NM_152263.3",
329+
"transcriptStatus": "longest_compatible_remaining",
328330
"strand": -1,
329331
"exonStart": 1,
330332
"exonStartOffset": 0,
@@ -948,6 +950,7 @@ class CategoricalFusion(AbstractFusion):
948950
{
949951
"type": "TranscriptSegmentElement",
950952
"transcript": "refseq:NM_152263.3",
953+
"transcriptStatus": "longest_compatible_remaining",
951954
"strand": -1,
952955
"exonStart": 1,
953956
"exonStartOffset": 0,
@@ -1094,6 +1097,7 @@ def enforce_itd_element_quantities(self) -> Self:
10941097
{
10951098
"type": "TranscriptSegmentElement",
10961099
"transcript": "refseq:NM_152263.3",
1100+
"transcriptStatus": "longest_compatible_remaining",
10971101
"strand": -1,
10981102
"exonStart": 1,
10991103
"exonStartOffset": 0,
@@ -1136,6 +1140,7 @@ def enforce_itd_element_quantities(self) -> Self:
11361140
{
11371141
"type": "TranscriptSegmentElement",
11381142
"transcript": "refseq:NM_152263.3",
1143+
"transcriptStatus": "longest_compatible_remaining",
11391144
"strand": -1,
11401145
"exonStart": 1,
11411146
"exonStartOffset": 0,

tests/conftest.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -690,6 +690,7 @@ def exhaustive_example(alk_gene, braf_gene, tpm3_gene):
690690
{
691691
"type": "TranscriptSegmentElement",
692692
"transcript": "refseq:NM_152263.3",
693+
"transcriptStatus": "longest_compatible_remaining",
693694
"strand": -1,
694695
"exonStart": 1,
695696
"exonStartOffset": 0,
@@ -804,6 +805,7 @@ def fusion_example():
804805
{
805806
"type": "TranscriptSegmentElement",
806807
"transcript": "refseq:NM_152263.3",
808+
"transcriptStatus": "longest_compatible_remaining",
807809
"strand": -1,
808810
"exonStart": 1,
809811
"exonStartOffset": 0,

0 commit comments

Comments
 (0)