Skip to content

Commit 0e146a7

Browse files
authored
feat: Add additional parameters and validator for regulatory element (#336)
closes #335
1 parent 2b47ba4 commit 0e146a7

File tree

5 files changed

+228
-9
lines changed

5 files changed

+228
-9
lines changed

src/fusor/fusor.py

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
Fusion,
3939
FusionType,
4040
GeneElement,
41+
GenomicLocation,
4142
InternalTandemDuplication,
4243
InternalTandemDuplicationElements,
4344
LinkerElement,
@@ -524,26 +525,75 @@ def regulatory_element(
524525
self,
525526
regulatory_class: RegulatoryClass,
526527
gene: str,
528+
feature_id: str | None = None,
529+
sequence_id: str | None = None,
530+
start: int | None = None,
531+
end: int | None = None,
532+
seq_id_target_namespace: str | None = None,
533+
coordinate_type: CoordinateType = CoordinateType.RESIDUE,
527534
use_minimal_gene: bool = True,
528535
) -> tuple[RegulatoryElement | None, str | None]:
529536
"""Create RegulatoryElement
530537
531538
:param regulatory_class: one of {"promoter", "enhancer"}
532539
:param gene: gene term to fetch normalized gene object for
540+
:param feature_id: The feature ID for the regulatory element
541+
:param sequence_id: Genomic sequence on which provided coordinates exist
542+
:param start: Start position on sequence
543+
:param end: Etart position on sequence
544+
:param seq_id_target_namespace: If want to use digest for
545+
``sequence_id``, set this to the namespace you want the digest for.
546+
Otherwise, leave as ``None``.
547+
:param coordinate_type: The coordinate type that is being supplied
548+
for ``start`` and ``end``. This is set to residue coordinates
549+
by default
533550
:param use_minimal_gene: whether to use the minimal gene object
534-
:return: Tuple with RegulatoryElement instance and None value for warnings if
535-
successful, or a None value and warning message if unsuccessful
551+
:return: Tuple with RegulatoryElement instance and None value for
552+
warnings if successful, or a None value and warning message if
553+
unsuccessful
536554
"""
537555
gene_descr, warning = self._normalized_gene(
538556
gene, use_minimal_gene=use_minimal_gene
539557
)
540558
if not gene_descr:
541559
return None, warning
542560

561+
if coordinate_type == CoordinateType.RESIDUE:
562+
if start == 0:
563+
return (
564+
None,
565+
"start must exceed 0 if using residue coordinates to construct the feature_location",
566+
)
567+
if end == 0:
568+
return (
569+
None,
570+
"end must exceed 0 if using residue coordinates to construct the feature_location",
571+
)
572+
573+
use_feat_location = any(loc_var for loc_var in (sequence_id, start, end))
574+
if use_feat_location:
575+
if not sequence_id or not start or not end:
576+
return (
577+
None,
578+
"sequence_id, start, and end must all be provided to construct the feature_location",
579+
)
580+
feat_location = self._sequence_location(
581+
start - 1 if coordinate_type == CoordinateType.RESIDUE else start,
582+
end,
583+
sequence_id,
584+
seq_id_target_namespace=seq_id_target_namespace,
585+
)
586+
feat_location = GenomicLocation(
587+
**feat_location.model_dump(exclude="name"), name=sequence_id
588+
)
589+
543590
try:
544591
return (
545592
RegulatoryElement(
546-
regulatoryClass=regulatory_class, associatedGene=gene_descr
593+
regulatoryClass=regulatory_class,
594+
associatedGene=gene_descr,
595+
featureId=feature_id,
596+
featureLocation=feat_location if use_feat_location else None,
547597
),
548598
None,
549599
)

src/fusor/models.py

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
StrictInt,
2323
StrictStr,
2424
StringConstraints,
25+
field_validator,
2526
model_validator,
2627
)
2728

@@ -59,6 +60,41 @@ class FUSORTypes(str, Enum):
5960
CAUSATIVE_EVENT = "CausativeEvent"
6061

6162

63+
class GenomicLocation(SequenceLocation):
64+
"""Define GenomicLocation class"""
65+
66+
name: str
67+
68+
@field_validator("name")
69+
def validate_genomic_location(cls, value: str):
70+
"""Validate that featureLocation only describes genomic coordinates
71+
if provided
72+
73+
:param value: The value for `name`
74+
:raises ValueError: If a non-chromosomal accession are provided to
75+
`name`
76+
"""
77+
if not value.startswith("NC_"):
78+
msg = "`name` must be a RefSeq chromosomal accession that starts with `NC_`"
79+
raise ValueError(msg)
80+
return value
81+
82+
model_config = ConfigDict(
83+
json_schema_extra={
84+
"example": {
85+
"id": "ga4gh:SL.9hqdPDfXC-m_t_bDH75FZHfaM6OKDtRw",
86+
"name": "NC_000001.11",
87+
"type": "SequenceLocation",
88+
"sequenceReference": {
89+
"refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
90+
},
91+
"start": 155593,
92+
"end": 155610,
93+
}
94+
},
95+
)
96+
97+
6298
class AdditionalFields(str, Enum):
6399
"""Define possible fields that can be added to Fusion object."""
64100

@@ -500,7 +536,7 @@ class RegulatoryElement(BaseModel):
500536
regulatoryClass: RegulatoryClass
501537
featureId: str | None = None
502538
associatedGene: MappableConcept | None = None
503-
featureLocation: SequenceLocation | None = None
539+
featureLocation: GenomicLocation | None = None
504540

505541
@model_validator(mode="after")
506542
def ensure_min_values(cls, values):
@@ -523,6 +559,7 @@ def ensure_min_values(cls, values):
523559
"regulatoryClass": "promoter",
524560
"featureLocation": {
525561
"id": "ga4gh:SL.9hqdPDfXC-m_t_bDH75FZHfaM6OKDtRw",
562+
"name": "NC_000001.11",
526563
"type": "SequenceLocation",
527564
"sequenceReference": {
528565
"id": "refseq:NC_000001.11",

tests/test_fusor.py

Lines changed: 122 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
CategoricalFusion,
1414
FunctionalDomain,
1515
GeneElement,
16+
GenomicLocation,
1617
InternalTandemDuplication,
1718
LinkerElement,
1819
MultiplePossibleGenesElement,
@@ -38,6 +39,20 @@ def braf_gene_obj_min():
3839
)
3940

4041

42+
@pytest.fixture(scope="module")
43+
def tpm3_gene_obj_min():
44+
"""Create minimal gene object for TPM3"""
45+
return MappableConcept(
46+
primaryCoding=Coding(
47+
id="hgnc:12012",
48+
code="HGNC:12012",
49+
system="https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/",
50+
),
51+
name="TPM3",
52+
conceptType="Gene",
53+
)
54+
55+
4156
@pytest.fixture(scope="module")
4257
def braf_gene_obj(braf_gene):
4358
"""Create gene object for braf"""
@@ -142,6 +157,38 @@ def regulatory_element(braf_gene_obj):
142157
return RegulatoryElement(**params)
143158

144159

160+
@pytest.fixture(scope="module")
161+
def genomic_location_feature_location():
162+
"""Create test genomic location for feature location. Adapted from models.py"""
163+
params = {
164+
"id": "ga4gh:SL.-xC3omZDIKZEuotbbHWQMTC8sS3nOxTb",
165+
"name": "NC_000001.11",
166+
"type": "SequenceLocation",
167+
"sequenceReference": {
168+
"id": "refseq:NC_000001.11",
169+
"refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
170+
"type": "SequenceReference",
171+
},
172+
"start": 15455,
173+
"end": 15456,
174+
"extensions": [{"name": "is_exonic", "value": True}],
175+
}
176+
return GenomicLocation(**params)
177+
178+
179+
@pytest.fixture(scope="module")
180+
def regulatory_element_full(tpm3_gene_obj_min, genomic_location_feature_location):
181+
"""Create full regulatory element test fixture"""
182+
params = {
183+
"type": "RegulatoryElement",
184+
"regulatoryClass": "enhancer",
185+
"associatedGene": tpm3_gene_obj_min,
186+
"featureID": "EH12345",
187+
"featureLocation": genomic_location_feature_location,
188+
}
189+
return RegulatoryElement(**params)
190+
191+
145192
@pytest.fixture(scope="module")
146193
def regulatory_element_min(braf_gene_obj_min):
147194
"""Create regulatory element test fixture with minimal gene object."""
@@ -848,7 +895,9 @@ def compare_domains(actual, expected):
848895
)
849896

850897

851-
def test_regulatory_element(fusor_instance, regulatory_element, regulatory_element_min):
898+
def test_regulatory_element(
899+
fusor_instance, regulatory_element, regulatory_element_min, regulatory_element_full
900+
):
852901
"""Test regulatory_element method."""
853902

854903
def compare_re(actual, expected):
@@ -860,9 +909,78 @@ def compare_re(actual, expected):
860909
assert actual.keys() == expected.keys()
861910
assert actual["type"] == expected["type"]
862911
compare_gene_obj(actual["associatedGene"], expected["associatedGene"])
863-
864-
re = fusor_instance.regulatory_element(RegulatoryClass.PROMOTER, "BRAF")
912+
if actual.get("featureID"):
913+
assert actual["featureID"] == expected["featureID"]
914+
if actual.get("featureLocation"):
915+
assert actual["featureLocation"]["id"] == expected["featureLocation"]["id"]
916+
assert (
917+
actual["featureLocation"]["start"]
918+
== expected["featureLocation"]["start"]
919+
)
920+
assert (
921+
actual["featureLocation"]["end"] == expected["featureLocation"]["end"]
922+
)
923+
924+
re = fusor_instance.regulatory_element(
925+
regulatory_class=RegulatoryClass.PROMOTER,
926+
gene="BRAF",
927+
)
865928
compare_re(re, regulatory_element_min)
866929

867-
re = fusor_instance.regulatory_element(RegulatoryClass.PROMOTER, "BRAF", False)
930+
re = fusor_instance.regulatory_element(
931+
regulatory_class=RegulatoryClass.PROMOTER, gene="BRAF", use_minimal_gene=False
932+
)
868933
compare_re(re, regulatory_element)
934+
935+
re = fusor_instance.regulatory_element(
936+
regulatory_class=RegulatoryClass.ENHANCER,
937+
gene="TPM3",
938+
feature_id="EH12345",
939+
sequence_id="NC_000001.11",
940+
start=15455,
941+
end=15456,
942+
coordinate_type=CoordinateType.INTER_RESIDUE,
943+
)
944+
compare_re(re, regulatory_element_full)
945+
946+
re = fusor_instance.regulatory_element(
947+
regulatory_class=RegulatoryClass.ENHANCER,
948+
gene="TPM3",
949+
feature_id="EH12345",
950+
sequence_id="NC_000001.11",
951+
start=15455,
952+
coordinate_type=CoordinateType.INTER_RESIDUE,
953+
)
954+
assert re[0] is None
955+
assert (
956+
re[1]
957+
== "sequence_id, start, and end must all be provided to construct the feature_location"
958+
)
959+
960+
re = fusor_instance.regulatory_element(
961+
regulatory_class=RegulatoryClass.ENHANCER,
962+
gene="TPM3",
963+
feature_id="EH12345",
964+
sequence_id="NC_000001.11",
965+
start=0,
966+
coordinate_type=CoordinateType.RESIDUE,
967+
)
968+
assert re[0] is None
969+
assert (
970+
re[1]
971+
== "start must exceed 0 if using residue coordinates to construct the feature_location"
972+
)
973+
974+
re = fusor_instance.regulatory_element(
975+
regulatory_class=RegulatoryClass.ENHANCER,
976+
gene="TPM3",
977+
feature_id="EH12345",
978+
sequence_id="NC_000001.11",
979+
end=0,
980+
coordinate_type=CoordinateType.RESIDUE,
981+
)
982+
assert re[0] is None
983+
assert (
984+
re[1]
985+
== "end must exceed 0 if using residue coordinates to construct the feature_location"
986+
)

tests/test_models.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,7 @@ def sequence_locations():
176176
{
177177
"id": "ga4gh:SL.VJLxl42yYoa-0ZMa8dfakhZfcP0nWgpl",
178178
"type": "SequenceLocation",
179+
"name": "NP_001123617.1",
179180
"sequenceReference": {
180181
"id": "refseq:NP_001123617.1",
181182
"refgetAccession": "SQ.sv5egNzqN5koJQH6w0M4tIK9tEDEfJl7",
@@ -776,7 +777,7 @@ def test_event():
776777
CausativeEvent(eventType="combination")
777778

778779

779-
def test_regulatory_element(regulatory_elements, gene_examples):
780+
def test_regulatory_element(regulatory_elements, gene_examples, sequence_locations):
780781
"""Test RegulatoryElement object initializes correctly"""
781782
test_reg_elmt = RegulatoryElement(**regulatory_elements[0])
782783
assert test_reg_elmt.regulatoryClass.value == "promoter"
@@ -798,6 +799,18 @@ def test_regulatory_element(regulatory_elements, gene_examples):
798799
== "Value error, Must set 1 of {`featureId`, `associatedGene`} and/or `featureLocation`"
799800
)
800801

802+
# Require chromosomal build
803+
with pytest.raises(ValidationError) as exc_info:
804+
RegulatoryElement(
805+
regulatoryClass="enhancer",
806+
associatedGene=gene_examples[0],
807+
featureLocation=sequence_locations[6],
808+
)
809+
assert (
810+
exc_info.value.errors()[0]["msg"]
811+
== "Value error, `name` must be a RefSeq chromosomal accession that starts with `NC_`"
812+
)
813+
801814

802815
def test_fusion_itd(
803816
functional_domains,

tests/test_nomenclature.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ def reg_location_example():
7373
},
7474
"featureLocation": {
7575
"type": "SequenceLocation",
76+
"name": "NC_000023.11",
7677
"id": "ga4gh:SL.KMHXvX8m5fD8PcGlQu2Vja3m7bt2iqfK",
7778
"sequenceReference": {
7879
"id": "refseq:NC_000023.11",

0 commit comments

Comments
 (0)