Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified .DS_Store

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add these macOS specific files to .gitignore

Binary file not shown.
766 changes: 766 additions & 0 deletions notebooks/examples/uorf_workflow_example.ipynb

Large diffs are not rendered by default.

15 changes: 14 additions & 1 deletion src/utrfx/variant_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import pysam

from utrfx.genome import VariantCoordinates, Strand, Contig
from utrfx.genome import VariantCoordinates, Strand, Contig, Region
from utrfx.model import FiveUTRCoordinates

class AltAlleleSeq:
Expand Down Expand Up @@ -205,6 +205,7 @@ class VariantClassifier:
"""
def __init__(
self,
canonical_uorfs_coordinates_list: typing.Collection[Region],
canonical_uorfs_lengths_list: typing.Collection[int],
variant_uorfs_lengths_list: typing.Collection[int],
canonical_uorfs_ouorf_list: typing.Collection[bool],
Expand All @@ -220,13 +221,22 @@ def __init__(
:param uorf_end_pos: integer corresponding to the end of the uORF.
:param variant_cdna_pos: integer corresponding to the variant position within the cDNA sequence.
"""
self._canonical_uorf_coordinates_list = canonical_uorfs_coordinates_list
self._canonical_uorfs_lengths_list = canonical_uorfs_lengths_list
self._variant_uorfs_lengths_list = variant_uorfs_lengths_list
self._canonical_uorfs_ouorf_list = canonical_uorfs_ouorf_list
self._variant_uorfs_ouorf_list = variant_uorfs_ouorf_list
self._uorf_end_pos_list = uorf_end_pos_list
self._variant_cdna_pos = variant_cdna_pos

def _check_if_mutation_in_uorf(self) -> typing.Optional[str]:
variant_in_uorf = False
for uorf_region in self._canonical_uorf_coordinates_list:
if uorf_region.start <= self._variant_cdna_pos <= uorf_region.end:
variant_in_uorf = True
if variant_in_uorf == False:
return "Variant does not affect any canonical uORF"

Comment on lines +232 to +239

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Without understanding the details, I see no return statement in the case of variant_in_uorf = True

This means this method _check_if_mutation_in_uorf will return None, which has a truth value of False, unlike non empty string. This is correct, but maybe one could avoid the use of variant_in_uorf completely and do

         for uorf_region in self._canonical_uorf_coordinates_list:
            if uorf_region.start <= self._variant_cdna_pos <= uorf_region.end:
                return None
        return "Variant does not affect any canonical uORF"

def _mutation_classifier_start_codon(self) -> typing.Optional[str]:
"""
Compare the number of uORFs of a variant with the one of its canonical transcript.
Expand Down Expand Up @@ -272,6 +282,9 @@ def perform_mutation_analysis(self) -> str:
"""
Check which kind of variant is it.
"""
variant_not_in_uorf = self._check_if_mutation_in_uorf()
if variant_not_in_uorf:
return variant_not_in_uorf
start_codon_mutation = self._mutation_classifier_start_codon()
if start_codon_mutation:
return start_codon_mutation
Expand Down
Binary file modified tests/.DS_Store
Binary file not shown.
40 changes: 40 additions & 0 deletions tests/data/HR_gene_results.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
tx_id Pathogenicity proved AF Type mutation NuORFs 0uORF_length 0Is_ouORF 0GC 0GCplus 0ID 0Cap_distance 0Kozak 1uORF_length 1Is_ouORF 1GC 1GCplus 1ID 1Cap_distance 1Kozak 2uORF_length 2Is_ouORF 2GC 2GCplus 2ID 2Cap_distance 2Kozak 3uORF_length 3Is_ouORF 3GC 3GCplus 3ID 3Cap_distance 3Kozak
ENST00000381418-8 canonical canonical 4 51 False 0.6274509803921569 0.9 556 16 2 105 False 0.7047619047619048 0.9 216 302 1 66 False 0.8181818181818182 0.7 47 510 1 17 True 0.5882352941176471 0 0 606 0
ENST00000381418-8-22130605-T-C Yes nan Stop codon loss mutation 4 51 False 0.6274509803921569 0.9 556 16 2 321 True 0.7632398753894081 0 0 302 1 66 False 0.8181818181818182 0.7 47 510 1 17 True 0.5882352941176471 0 0 606 0
ENST00000381418-chr8-22130611-C-T No 1.3145099728717469e-05 SNV or MNV 4 51 False 0.6274509803921569 0.9 556 16 2 105 False 0.6952380952380952 0.9 216 302 1 66 False 0.8181818181818182 0.7 47 510 1 17 True 0.5882352941176471 0 0 606 0
ENST00000381418-chr8-22130620-G-C No 6.573579867108492e-06 SNV or MNV 4 51 False 0.6274509803921569 0.9 556 16 2 105 False 0.7047619047619048 0.9 216 302 1 66 False 0.8181818181818182 0.7 47 510 1 17 True 0.5882352941176471 0 0 606 0
ENST00000381418-chr8-22130625-G-A No 6.570990080945194e-06 SNV or MNV 4 51 False 0.6274509803921569 0.9 556 16 2 105 False 0.6952380952380952 0.9 216 302 1 66 False 0.8181818181818182 0.7 47 510 1 17 True 0.5882352941176471 0 0 606 0
ENST00000381418-chr8-22130628-G-T No 5.9090798458782956e-05 SNV or MNV 4 51 False 0.6274509803921569 0.9 556 16 2 105 False 0.6952380952380952 0.9 216 302 1 66 False 0.8181818181818182 0.7 47 510 1 17 True 0.5882352941176471 0 0 606 0
ENST00000381418-chr8-22130634-C-T No 6.57262989989249e-06 SNV or MNV 4 51 False 0.6274509803921569 0.9 556 16 2 105 False 0.6952380952380952 0.9 216 302 1 66 False 0.8181818181818182 0.7 47 510 1 17 True 0.5882352941176471 0 0 606 0
ENST00000381418-chr8-22130640-C-A No 6.570910045411438e-06 SNV or MNV 4 51 False 0.6274509803921569 0.9 556 16 2 105 False 0.6952380952380952 0.9 216 302 1 66 False 0.8181818181818182 0.7 47 510 1 17 True 0.5882352941176471 0 0 606 0
ENST00000381418-chr8-22130640-C-T No 1.3141800081939436e-05 SNV or MNV 4 51 False 0.6274509803921569 0.9 556 16 2 105 False 0.6952380952380952 0.9 216 302 1 66 False 0.8181818181818182 0.7 47 510 1 17 True 0.5882352941176471 0 0 606 0
ENST00000381418-chr8-22130643-C-T No 6.569780089193955e-05 SNV or MNV 4 51 False 0.6274509803921569 0.9 556 16 2 105 False 0.6952380952380952 0.9 216 302 1 66 False 0.8181818181818182 0.7 47 510 1 17 True 0.5882352941176471 0 0 606 0
ENST00000381418-chr8-22130650-C-A No 6.571769972651964e-06 SNV or MNV 4 51 False 0.6274509803921569 0.9 556 16 2 105 False 0.6952380952380952 0.9 216 302 1 66 False 0.8181818181818182 0.7 47 510 1 17 True 0.5882352941176471 0 0 606 0
ENST00000381418-chr8-22130651-G-A No 6.569609922735253e-06 SNV or MNV 4 51 False 0.6274509803921569 0.9 556 16 2 105 False 0.6952380952380952 0.9 216 302 1 66 False 0.8181818181818182 0.7 47 510 1 17 True 0.5882352941176471 0 0 606 0
ENST00000381418-chr8-22130652-G-A No 1.9712999346666038e-05 SNV or MNV 4 51 False 0.6274509803921569 0.9 556 16 2 105 False 0.6952380952380952 0.9 216 302 1 66 False 0.8181818181818182 0.7 47 510 1 17 True 0.5882352941176471 0 0 606 0
ENST00000381418-chr8-22130658-G-A No 6.569960078195436e-06 SNV or MNV 4 51 False 0.6274509803921569 0.9 556 16 2 105 False 0.6952380952380952 0.9 216 302 1 66 False 0.8181818181818182 0.7 47 510 1 17 True 0.5882352941176471 0 0 606 0
ENST00000381418-chr8-22130663-G-A No 6.570299774466548e-06 SNV or MNV 4 51 False 0.6274509803921569 0.9 556 16 2 105 False 0.6952380952380952 0.9 216 302 1 66 False 0.8181818181818182 0.7 47 510 1 17 True 0.5882352941176471 0 0 606 0
ENST00000381418-chr8-22130666-T-C No 2.6310599423595704e-05 SNV or MNV 4 51 False 0.6274509803921569 0.9 556 16 2 105 False 0.7142857142857143 0.9 216 302 1 66 False 0.8181818181818182 0.7 47 510 1 17 True 0.5882352941176471 0 0 606 0
ENST00000381418-chr8-22130669-G-A No 5.2526498620864004e-05 SNV or MNV 4 51 False 0.6274509803921569 0.9 556 16 2 105 False 0.6952380952380952 0.9 216 302 1 66 False 0.8181818181818182 0.7 47 510 1 17 True 0.5882352941176471 0 0 606 0
ENST00000381418-chr8-22130670-C-T No 1.970959965547081e-05 SNV or MNV 4 51 False 0.6274509803921569 0.9 556 16 2 105 False 0.6952380952380952 0.9 216 302 1 66 False 0.8181818181818182 0.7 47 510 1 17 True 0.5882352941176471 0 0 606 0
ENST00000381418-chr8-22130678-G-A No 6.56495012663072e-06 SNV or MNV 4 51 False 0.6274509803921569 0.9 556 16 2 105 False 0.6952380952380952 0.9 216 302 1 66 False 0.8181818181818182 0.7 47 510 1 17 True 0.5882352941176471 0 0 606 0
ENST00000381418-chr8-22130688-C-T No 6.570820005435962e-06 SNV or MNV 4 51 False 0.6274509803921569 0.9 556 16 2 105 False 0.6952380952380952 0.9 216 302 1 66 False 0.8181818181818182 0.7 47 510 1 17 True 0.5882352941176471 0 0 606 0
ENST00000381418-chr8-22130692-G-A No 6.570040113729192e-06 SNV or MNV 4 51 False 0.6274509803921569 0.9 556 16 2 105 False 0.6952380952380952 0.9 216 302 1 66 False 0.8181818181818182 0.7 47 510 1 17 True 0.5882352941176471 0 0 606 0
ENST00000381418-chr8-22130693-C-T No 6.570559889951255e-06 SNV or MNV 4 51 False 0.6274509803921569 0.9 556 16 2 105 False 0.6952380952380952 0.9 216 302 1 66 False 0.8181818181818182 0.7 47 510 1 17 True 0.5882352941176471 0 0 606 0
ENST00000381418-chr8-22130695-G-C No 6.56987003821996e-06 SNV or MNV 4 51 False 0.6274509803921569 0.9 556 16 2 105 False 0.7047619047619048 0.9 216 302 1 66 False 0.8181818181818182 0.7 47 510 1 17 True 0.5882352941176471 0 0 606 0
ENST00000381418-chr8-22130699-G-T No 6.56909014651319e-06 SNV or MNV 4 51 False 0.6274509803921569 0.9 556 16 2 105 False 0.6952380952380952 0.9 216 302 1 66 False 0.8181818181818182 0.7 47 510 1 17 True 0.5882352941176471 0 0 606 0
ENST00000381418-chr8-22130703-C-A No 6.570220193680143e-06 SNV or MNV 4 51 False 0.6274509803921569 0.9 556 16 2 105 False 0.6952380952380952 0.9 216 302 1 66 False 0.8181818181818182 0.7 47 510 1 17 True 0.5882352941176471 0 0 606 0
ENST00000381418-chr8-22130703-C-T No 1.971060009964276e-05 SNV or MNV 4 51 False 0.6274509803921569 0.9 556 16 2 105 False 0.6952380952380952 0.9 216 302 1 66 False 0.8181818181818182 0.7 47 510 1 17 True 0.5882352941176471 0 0 606 0
ENST00000381418-chr8-22130704-G-T No 6.570990080945194e-06 SNV or MNV 4 51 False 0.6274509803921569 0.9 556 16 2 105 False 0.6952380952380952 0.9 216 302 1 66 False 0.8181818181818182 0.7 47 510 1 17 True 0.5882352941176471 0 0 606 0
ENST00000381418-8-22130606-A-G Yes nan Insertion 4 51 False 0.6274509803921569 0.9 556 16 2 321 True 0.7632398753894081 0 0 302 1 66 False 0.8181818181818182 0.7 47 510 1 17 True 0.5882352941176471 0 0 606 0
ENST00000381418-8-22130627-C-G Yes nan SNV or MNV 4 51 False 0.6274509803921569 0.9 556 16 2 105 False 0.7047619047619048 0.9 216 302 1 66 False 0.8181818181818182 0.7 47 510 1 17 True 0.5882352941176471 0 0 606 0
ENST00000381418-8-22130633-C-T Yes nan SNV or MNV 4 51 False 0.6274509803921569 0.9 556 16 2 105 False 0.6952380952380952 0.9 216 302 1 66 False 0.8181818181818182 0.7 47 510 1 17 True 0.5882352941176471 0 0 606 0
ENST00000381418-8-22130635-G-A Yes nan SNV or MNV 4 51 False 0.6274509803921569 0.9 556 16 2 105 False 0.6952380952380952 0.9 216 302 1 66 False 0.8181818181818182 0.7 47 510 1 17 True 0.5882352941176471 0 0 606 0
ENST00000381418-8-22130636-G-C Yes nan SNV or MNV 4 51 False 0.6274509803921569 0.9 556 16 2 105 False 0.7047619047619048 0.9 216 302 1 66 False 0.8181818181818182 0.7 47 510 1 17 True 0.5882352941176471 0 0 606 0
ENST00000381418-8-22130638-A-T Yes nan SNV or MNV 4 51 False 0.6274509803921569 0.9 556 16 2 105 False 0.7047619047619048 0.9 216 302 1 66 False 0.8181818181818182 0.7 47 510 1 17 True 0.5882352941176471 0 0 606 0
ENST00000381418-8-22130689-G-T Yes nan Stop codon gain mutation 4 51 False 0.6274509803921569 0.9 556 16 2 21 False 0.6190476190476191 0.7 300 302 1 66 False 0.8181818181818182 0.7 47 510 1 17 True 0.5882352941176471 0 0 606 0
ENST00000381418-8-22130702-G-A Yes nan Stop codon gain mutation 4 51 False 0.6274509803921569 0.9 556 16 2 9 False 0.4444444444444444 0.7 312 302 1 66 False 0.8181818181818182 0.7 47 510 1 17 True 0.5882352941176471 0 0 606 0
ENST00000381418-8-22130706-C-T Yes nan Start codon loss mutation 3 51 False 0.6274509803921569 0.9 556 16 2 66 False 0.8181818181818182 0.7 47 510 1 17 True 0.5882352941176471 0 0 606 0 nan nan nan nan nan nan nan
ENST00000381418-8-22130707-A-T Yes nan Start codon loss mutation 3 51 False 0.6274509803921569 0.9 556 16 2 66 False 0.8181818181818182 0.7 47 510 1 17 True 0.5882352941176471 0 0 606 0 nan nan nan nan nan nan nan
ENST00000381418-8-22130707-A-G Yes nan Start codon loss mutation 3 51 False 0.6274509803921569 0.9 556 16 2 66 False 0.8181818181818182 0.7 47 510 1 17 True 0.5882352941176471 0 0 606 0 nan nan nan nan nan nan nan
ENST00000381418-8-22130708-T-C Yes nan Start codon loss mutation 3 51 False 0.6274509803921569 0.9 556 16 2 66 False 0.8181818181818182 0.7 47 510 1 17 True 0.5882352941176471 0 0 606 0 nan nan nan nan nan nan nan
Binary file added tests/data/Variants_gene_HR.xlsx
Binary file not shown.
24 changes: 13 additions & 11 deletions tests/test_variant_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from utrfx.variant_util import AltAlleleSeq, VCFfile, VariantClassifier
from utrfx.model import FiveUTRCoordinates
from utrfx.genome import Contig, GenomicRegion, Strand, VariantCoordinates, GenomeBuild
from utrfx.genome import Contig, GenomicRegion, Strand, VariantCoordinates, GenomeBuild, Region

class TestPrepareAltSeq:
"""
Expand Down Expand Up @@ -384,18 +384,20 @@ def test_raises_if_not_used_as_a_context_manager(
assert e.value.args == ("VCFfile must be used as a context manager",)

@pytest.mark.parametrize(
"canonical_lengths, variant_lengths, canonical_ouorf, variant_ouorf, uorf_end_pos_list, variant_cdna_pos, expected",
"canonical_uorfs_coordinates_list, canonical_lengths, variant_lengths, canonical_ouorf, variant_ouorf, uorf_end_pos_list, variant_cdna_pos, expected",
[
([9, 9], [9, 9, 9], [False, False], [False, False, False], [0, 0], 0, "Start codon gain mutation"),
([9, 9], [9], [False, False], [False], [0, 0], 0, "Start codon loss mutation"),
([9, 9], [9, 12], [False, False], [False, True], [0, 9], 8, "Stop codon loss mutation"),
([9, 9], [9, 6], [False, False], [False, False], [0, 0], 0, "Stop codon gain mutation"),
([9, 9], [9, 3], [False, False], [False, True], [0, 9], 6, "Deletion"),
([9, 9], [9, 12], [False, False], [False, True], [0, 9], 3, "Insertion"),
([9, 9], [9, 9], [False, False], [False, False], [0, 9], 0, "SNV or MNV"),
([Region(10,11), Region(12,13)], [9, 9], [9], [False, False], [False], [0, 0], 0, "Variant does not affect any canonical uORF"),
([Region(0,3), Region(12,13)], [9, 9], [9, 9, 9], [False, False], [False, False, False], [0, 0], 2, "Start codon gain mutation"),
([Region(0,3), Region(12,13)], [9, 9], [9], [False, False], [False], [0, 0], 2, "Start codon loss mutation"),
([Region(5,10), Region(12,13)], [9, 9], [9, 12], [False, False], [False, True], [0, 9], 8, "Stop codon loss mutation"),
([Region(0,3), Region(12,13)], [9, 9], [9, 6], [False, False], [False, False], [0, 0], 2, "Stop codon gain mutation"),
([Region(0,9), Region(12,13)], [9, 9], [9, 3], [False, False], [False, True], [0, 9], 6, "Deletion"),
([Region(0,4), Region(12,13)], [9, 9], [9, 12], [False, False], [False, True], [0, 9], 3, "Insertion"),
([Region(0,3), Region(12,13)], [9, 9], [9, 9], [False, False], [False, False], [0, 9], 2, "SNV or MNV"),
]
)
def test_variant_classifier(
canonical_uorfs_coordinates_list: typing.Collection[Region],
canonical_lengths: typing.Collection[int],
variant_lengths: typing.Collection[int],
canonical_ouorf: typing.Collection[bool],
Expand All @@ -405,6 +407,7 @@ def test_variant_classifier(
expected: str,
):
mut_class = VariantClassifier(
canonical_uorfs_coordinates_list,
canonical_lengths,
variant_lengths,
canonical_ouorf,
Expand All @@ -414,5 +417,4 @@ def test_variant_classifier(
)

type_mut = mut_class.perform_mutation_analysis()
assert type_mut == expected

assert type_mut == expected