Skip to content

Commit 99246b4

Browse files
committed
create AnyVlmCohortAlleleFrequencyResult
1 parent 510c1e1 commit 99246b4

File tree

15 files changed

+123
-100
lines changed

15 files changed

+123
-100
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ classifiers = [
1818
]
1919
dependencies = [
2020
"ga4gh.vrs>=2.2.0,<3.0",
21-
"ga4gh.va_spec~=0.4.2",
21+
"ga4gh.va_spec~=0.4.3",
2222
"biocommons.anyvar@git+https://github.com/biocommons/anyvar.git@0d3ab56fe936b27235a1ce136da4641ea81c0bbf",
2323
"fastapi>=0.95.0",
2424
"python-multipart", # required for fastapi file uploads

src/anyvlm/anyvar/http_client.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,15 +100,15 @@ def _make_allele_expression_request(
100100
def get_registered_allele_expression(
101101
self, expression: str, assembly: ReferenceAssembly = ReferenceAssembly.GRCH38
102102
) -> models.Allele | None:
103-
"""Retrieve VRS Allele for given allele expression
103+
"""Retrieve registered VRS Allele for given allele expression
104104
105105
Currently, only expressions supported by the VRS-Python translator are supported.
106106
This could change depending on the AnyVar implementation, though, and probably
107107
can't be validated on the AnyVLM side.
108108
109-
:param expression: variation expression to translate
109+
:param expression: variation expression to get VRS Allele for
110110
:param assembly: reference assembly used in expression
111-
:return: VRS Allele if translation succeeds, else `None`
111+
:return: VRS Allele if translation succeeds and VRS Allele has already been registered, else `None`
112112
"""
113113
response = self._make_allele_expression_request(
114114
expression, assembly, HTTPMethod.POST

src/anyvlm/anyvar/python_client.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def __init__(self, translator: Translator, storage: Storage) -> None:
3030
def _translate_allele_expression(
3131
self, expression: str, assembly: ReferenceAssembly = ReferenceAssembly.GRCH38
3232
) -> Allele | None:
33-
"""Translate a single allele expression to a VRS Allele ID
33+
"""Translate a single allele expression to a VRS Allele
3434
3535
Currently, only expressions supported by the VRS-Python translator are supported.
3636
This could change depending on the AnyVar implementation, though, and probably
@@ -67,14 +67,15 @@ def get_registered_allele_expression(
6767
:return: VRS Allele if translation succeeds and VRS Allele has already been registered, else `None`
6868
"""
6969
translated_variation = self._translate_allele_expression(expression, assembly)
70-
if translated_variation:
71-
try:
72-
return self.av.get_object(translated_variation.id, Allele) # type: ignore
73-
except KeyError:
74-
_logger.exception(
75-
"VRS Allele with ID %s not found", translated_variation.id
76-
)
77-
return None
70+
if not translated_variation:
71+
return None
72+
73+
try:
74+
return self.av.get_object(translated_variation.id, Allele) # type: ignore
75+
except KeyError:
76+
_logger.exception(
77+
"VRS Allele with ID %s not found", translated_variation.id
78+
)
7879

7980
def put_allele_expressions(
8081
self,
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,18 @@
11
"""Craft a VlmResponse object from a list of CohortAlleleFrequencyStudyResults"""
22

3-
from ga4gh.va_spec.base.core import CohortAlleleFrequencyStudyResult
3+
from ga4gh.va_spec.base.core import AnyVlmCohortAlleleFrequencyResult
44

55
from anyvlm.schemas.vlm import (
66
VlmResponse,
77
)
88

99

1010
def build_vlm_response_from_caf_data(
11-
caf_data: list[CohortAlleleFrequencyStudyResult],
11+
caf_data: list[AnyVlmCohortAlleleFrequencyResult],
1212
) -> VlmResponse:
1313
"""Craft a VlmResponse object from a list of CohortAlleleFrequencyStudyResults.
1414
15-
:param caf_data: A list of `CohortAlleleFrequencyStudyResult` objects that will be used to build the VlmResponse
15+
:param caf_data: A list of `AnyVlmCohortAlleleFrequencyResult` objects that will be used to build the VlmResponse
1616
:return: A `VlmResponse` object.
1717
"""
1818
raise NotImplementedError # TODO: Implement this during/after Issue #16

src/anyvlm/functions/get_caf.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
"""Perform search against variant(s) contained by an AnyVar node, and construct cohort allele frequency model(s)"""
22

33
from ga4gh.core.models import iriReference
4-
from ga4gh.va_spec.base import CohortAlleleFrequencyStudyResult
54

65
from anyvlm.anyvar.base_client import BaseAnyVarClient
76
from anyvlm.storage.base_storage import Storage
87
from anyvlm.utils.types import (
98
ASSEMBLY_MAP,
9+
AnyVlmCohortAlleleFrequencyResult,
1010
ChromosomeName,
1111
GrcAssemblyId,
1212
NucleotideSequence,
@@ -26,7 +26,7 @@ def get_caf(
2626
start: int,
2727
reference_bases: NucleotideSequence,
2828
alternate_bases: NucleotideSequence,
29-
) -> list[CohortAlleleFrequencyStudyResult]:
29+
) -> list[AnyVlmCohortAlleleFrequencyResult]:
3030
"""Retrieve Cohort Allele Frequency data for all known registered variants matching
3131
provided search params
3232
@@ -41,7 +41,7 @@ def get_caf(
4141
:param alternate_bases: Genomic bases ('T', 'AC', etc.)
4242
:raises ValueError: if unsupported assembly ID is provided
4343
:raises VariantNotRegisteredError: if variant is not registered in AnyVar
44-
:return: list of CohortAlleleFrequencyStudyResult objects for the provided variant
44+
:return: list of AnyVlmCohortAlleleFrequencyResult objects for the provided variant
4545
"""
4646
gnomad_vcf: str = f"{reference_name}-{start}-{reference_bases}-{alternate_bases}"
4747
try:
@@ -55,7 +55,7 @@ def get_caf(
5555
msg = f"Variant {assembly.value} {gnomad_vcf} is not registered in AnyVar"
5656
raise VariantNotRegisteredError(msg)
5757

58-
cafs: list[CohortAlleleFrequencyStudyResult] = (
58+
cafs: list[AnyVlmCohortAlleleFrequencyResult] = (
5959
anyvlm_storage.get_caf_by_vrs_allele_id(vrs_variation.id) # type: ignore
6060
)
6161

src/anyvlm/restapi/vlm.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from typing import Annotated
55

66
from fastapi import Query, Request
7-
from ga4gh.va_spec.base.core import CohortAlleleFrequencyStudyResult
7+
from ga4gh.va_spec.base.core import AnyVlmCohortAlleleFrequencyResult
88

99
from anyvlm.anyvar.base_client import BaseAnyVarClient
1010
from anyvlm.functions.build_vlm_response import build_vlm_response_from_caf_data
@@ -57,7 +57,7 @@ def variant_counts(
5757
) -> VlmResponse:
5858
anyvar_client: BaseAnyVarClient = request.app.state.anyvar_client
5959
anyvlm_storage: Storage = request.app.state.anyvlm_storage
60-
caf_data: list[CohortAlleleFrequencyStudyResult] = get_caf(
60+
caf_data: list[AnyVlmCohortAlleleFrequencyResult] = get_caf(
6161
anyvar_client,
6262
anyvlm_storage,
6363
assemblyId,

src/anyvlm/storage/base_storage.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from abc import ABC, abstractmethod
44

5-
from ga4gh.va_spec.base import CohortAlleleFrequencyStudyResult
5+
from anyvlm.utils.types import AnyVlmCohortAlleleFrequencyResult
66

77

88
class StorageError(Exception):
@@ -30,7 +30,7 @@ def sanitized_url(self) -> str:
3030
"""Return a sanitized URL (password masked) of the database connection string."""
3131

3232
@abstractmethod
33-
def add_allele_frequencies(self, caf: CohortAlleleFrequencyStudyResult) -> None:
33+
def add_allele_frequencies(self, caf: AnyVlmCohortAlleleFrequencyResult) -> None:
3434
"""Add allele frequency data to the database. Will skip conflicts.
3535
3636
NOTE: For now, this will only insert a single caf record into the database.
@@ -43,10 +43,10 @@ def add_allele_frequencies(self, caf: CohortAlleleFrequencyStudyResult) -> None:
4343
@abstractmethod
4444
def get_caf_by_vrs_allele_id(
4545
self, vrs_allele_id: str
46-
) -> list[CohortAlleleFrequencyStudyResult]:
46+
) -> list[AnyVlmCohortAlleleFrequencyResult]:
4747
"""Retrieve cohort allele frequency study results by VRS Allele ID
4848
49-
:param vrs_allele_id: VRS Allele ID
50-
:return: List of cohort allele frequency study results matching given VRS
51-
Allele IDs. Will use iriReference for focusAllele
49+
:param vrs_allele_id: VRS Allele ID to filter by
50+
:return: List of cohort allele frequency study results matching given VRS Allele
51+
ID. Will use iriReference for focusAllele
5252
"""

src/anyvlm/storage/mapper_registry.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,9 @@
33
from types import MappingProxyType
44
from typing import TypeVar
55

6-
from ga4gh.va_spec.base import CohortAlleleFrequencyStudyResult
7-
86
from anyvlm.storage import orm
97
from anyvlm.storage.mappers import AlleleFrequencyMapper, BaseMapper
8+
from anyvlm.utils.types import AnyVlmCohortAlleleFrequencyResult
109

1110
T = TypeVar("T")
1211

@@ -15,7 +14,7 @@ class MapperRegistry:
1514
"""Central registry for all object mappers."""
1615

1716
va_model_to_db_mapping: MappingProxyType = MappingProxyType(
18-
{CohortAlleleFrequencyStudyResult: orm.AlleleFrequencyData}
17+
{AnyVlmCohortAlleleFrequencyResult: orm.AlleleFrequencyData}
1918
)
2019

2120
_mappers: MappingProxyType[type, BaseMapper] = MappingProxyType(

src/anyvlm/storage/mappers.py

Lines changed: 46 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,16 @@
44
from typing import Generic, TypeVar
55

66
from ga4gh.core.models import iriReference
7-
from ga4gh.va_spec.base import CohortAlleleFrequencyStudyResult, StudyGroup
8-
from pydantic import ValidationError
7+
from ga4gh.va_spec.base import StudyGroup
98

109
from anyvlm.storage import orm
11-
from anyvlm.utils.types import AncillaryResults, QualityMeasures
10+
from anyvlm.utils.types import (
11+
AncillaryResults,
12+
AnyVlmCohortAlleleFrequencyResult,
13+
QualityMeasures,
14+
)
1215

13-
V = TypeVar("V") # VA-Spec entity type
16+
V = TypeVar("V") # VA-Spec compliant entity type
1417
D = TypeVar("D") # DB entity type
1518

1619

@@ -19,73 +22,77 @@ class BaseMapper(Generic[V, D], ABC):
1922

2023
@abstractmethod
2124
def from_db_entity(self, db_entity: D) -> V:
22-
"""Convert DB entity to VA-Spec model."""
25+
"""Convert DB entity to VA-Spec compliant model."""
2326

2427
@abstractmethod
2528
def to_db_entity(self, va_model: V) -> D:
26-
"""Convert VA-Spec model to DB entity."""
29+
"""Convert VA-Spec compliant model to DB entity."""
2730

2831

2932
class AlleleFrequencyMapper(
30-
BaseMapper[CohortAlleleFrequencyStudyResult, orm.AlleleFrequencyData]
33+
BaseMapper[AnyVlmCohortAlleleFrequencyResult, orm.AlleleFrequencyData]
3134
):
3235
"""Maps between Allele Frequency Entities"""
3336

3437
def from_db_entity(
3538
self, db_entity: orm.AlleleFrequencyData
36-
) -> CohortAlleleFrequencyStudyResult:
39+
) -> AnyVlmCohortAlleleFrequencyResult:
3740
"""Convert DB Allele Frequency Data to VA-Spec Cohort Allele Frequency Study Result model
3841
3942
:param db_entity: An ORM Allele Frequency Data instance
40-
:return: VA-Spec Cohort Allele Frequency Study Result instance. Will use
41-
iriReference for focusAllele
43+
:return: VA-Spec compliant Cohort Allele Frequency Study Result instance. Will
44+
use iriReference for focusAllele
4245
"""
4346
homozygotes = db_entity.ac_hom
4447
heterozygotes = db_entity.ac_het
4548
hemizygotes = db_entity.ac_hemi
46-
ac = sum(x or 0 for x in (homozygotes, heterozygotes, hemizygotes))
47-
an = db_entity.an
4849

49-
if filter_ := db_entity.filter:
50-
quality_measures = QualityMeasures(qcFilters=filter_).model_dump()
50+
if any(x is not None for x in (homozygotes, heterozygotes, hemizygotes)):
51+
ancillary_results = AncillaryResults(
52+
homozygotes=homozygotes,
53+
heterozygotes=heterozygotes,
54+
hemizygotes=hemizygotes,
55+
)
5156
else:
52-
quality_measures = None
57+
ancillary_results = None
5358

54-
return CohortAlleleFrequencyStudyResult(
59+
ac = sum(x or 0 for x in (homozygotes, heterozygotes, hemizygotes))
60+
an = db_entity.an
61+
62+
return AnyVlmCohortAlleleFrequencyResult(
5563
focusAllele=iriReference(db_entity.vrs_id),
5664
focusAlleleCount=ac,
5765
locusAlleleCount=an,
5866
focusAlleleFrequency=round(ac / an, 9),
59-
qualityMeasures=quality_measures,
60-
ancillaryResults=AncillaryResults(
61-
homozygotes=homozygotes,
62-
heterozygotes=heterozygotes,
63-
hemizygotes=hemizygotes,
64-
).model_dump(),
67+
qualityMeasures=QualityMeasures(qcFilters=db_entity.filter)
68+
if db_entity.filter
69+
else None,
70+
ancillaryResults=ancillary_results,
6571
cohort=StudyGroup(name=db_entity.cohort), # type: ignore
6672
)
6773

6874
def to_db_entity(
69-
self, va_model: CohortAlleleFrequencyStudyResult
75+
self, va_model: AnyVlmCohortAlleleFrequencyResult
7076
) -> orm.AlleleFrequencyData:
71-
"""Convert VA-Spec Cohort Allele Frequency Study Result model to DB Allele Frequency Data
77+
"""Convert VA-Spec compliant Cohort Allele Frequency Study Result model to DB
78+
Allele Frequency Data
7279
73-
:param va_model: VA-Spec Cohort Allele Frequency Study Result instance
80+
:param va_model: VA-Spec compliant Cohort Allele Frequency Study Result instance
7481
:return: ORM Allele Frequency Data instance
75-
:raises ValueError: if ancillaryResults or qualityMeasures are invalid
7682
"""
77-
try:
78-
ancillary_results = AncillaryResults(**va_model.ancillaryResults or {})
79-
except ValidationError as e:
80-
raise ValueError("Invalid ancillaryResults data") from e
83+
ancillary_results = va_model.ancillaryResults
84+
if ancillary_results:
85+
ac_het = ancillary_results.heterozygotes
86+
ac_hom = ancillary_results.homozygotes
87+
ac_hemi = ancillary_results.hemizygotes
88+
else:
89+
ac_het = None
90+
ac_hom = None
91+
ac_hemi = None
8192

82-
try:
83-
quality_measures = QualityMeasures(**va_model.qualityMeasures or {})
84-
except ValidationError as e:
85-
raise ValueError("Invalid qualityMeasures data") from e
93+
quality_measures = va_model.qualityMeasures
8694

8795
focus_allele = va_model.focusAllele
88-
8996
if isinstance(focus_allele, iriReference):
9097
vrs_id = focus_allele.root
9198
else:
@@ -95,9 +102,9 @@ def to_db_entity(
95102
vrs_id=vrs_id,
96103
an=va_model.locusAlleleCount,
97104
ac=va_model.focusAlleleCount,
98-
ac_het=ancillary_results.heterozygotes,
99-
ac_hom=ancillary_results.homozygotes,
100-
ac_hemi=ancillary_results.hemizygotes,
101-
filter=quality_measures.qcFilters,
105+
ac_het=ac_het,
106+
ac_hom=ac_hom,
107+
ac_hemi=ac_hemi,
108+
filter=quality_measures.qcFilters if quality_measures else None,
102109
cohort=va_model.cohort.name,
103110
)

src/anyvlm/storage/postgres.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
from urllib.parse import urlparse
44

5-
from ga4gh.va_spec.base import CohortAlleleFrequencyStudyResult
65
from sqlalchemy import create_engine, delete, select
76
from sqlalchemy.dialects.postgresql import insert
87
from sqlalchemy.orm import sessionmaker
@@ -13,6 +12,7 @@
1312
)
1413
from anyvlm.storage.mapper_registry import mapper_registry
1514
from anyvlm.storage.orm import create_tables
15+
from anyvlm.utils.types import AnyVlmCohortAlleleFrequencyResult
1616

1717

1818
class PostgresObjectStore(Storage):
@@ -54,7 +54,7 @@ def sanitized_url(self) -> str:
5454
netloc += f":{parsed.port}"
5555
return f"{parsed.scheme}://{netloc}{parsed.path}"
5656

57-
def add_allele_frequencies(self, caf: CohortAlleleFrequencyStudyResult) -> None:
57+
def add_allele_frequencies(self, caf: AnyVlmCohortAlleleFrequencyResult) -> None:
5858
"""Add allele frequency data to the database. Will skip conflicts.
5959
6060
NOTE: For now, this will only insert a single caf record into the database.
@@ -71,14 +71,14 @@ def add_allele_frequencies(self, caf: CohortAlleleFrequencyStudyResult) -> None:
7171

7272
def get_caf_by_vrs_allele_id(
7373
self, vrs_allele_id: str
74-
) -> list[CohortAlleleFrequencyStudyResult]:
74+
) -> list[AnyVlmCohortAlleleFrequencyResult]:
7575
"""Retrieve cohort allele frequency study results by VRS Allele ID
7676
77-
:param vrs_allele_id: VRS Allele ID
78-
:return: List of cohort allele frequency study results matching given VRS
79-
Allele ID. Will use iriReference for focusAllele
77+
:param vrs_allele_id: VRS Allele ID to filter by
78+
:return: List of cohort allele frequency study results matching given VRS Allele
79+
ID. Will use iriReference for focusAllele
8080
"""
81-
cafs: list[CohortAlleleFrequencyStudyResult] = []
81+
cafs: list[AnyVlmCohortAlleleFrequencyResult] = []
8282
with self.session_factory() as session:
8383
stmt = (
8484
select(orm.AlleleFrequencyData)

0 commit comments

Comments
 (0)