Skip to content

Commit 7e69de4

Browse files
committed
feat: add anyvlm client for caf retrieval
close #16
1 parent 70e4508 commit 7e69de4

File tree

11 files changed

+270
-51
lines changed

11 files changed

+270
-51
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ test = [
3838
"jsonschema",
3939
"pyyaml",
4040
"pytest-recording",
41+
"deepdiff",
4142
]
4243
dev = [
4344
"ruff==0.12.8",

src/anyvlm/anyvlm.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
"""Client for cohort allele frequency (CAF) retrieval"""
2+
3+
from anyvar.utils.types import VrsVariation
4+
from ga4gh.va_spec.base import CohortAlleleFrequencyStudyResult
5+
6+
from anyvlm.storage.base_storage import Storage
7+
8+
9+
class AnyVLM:
10+
"""Client for cohort allele frequency (CAF) operations"""
11+
12+
def __init__(self, storage: Storage) -> None:
13+
"""Initialize AnyVLM client
14+
15+
:param storage: AnyVLM storage
16+
"""
17+
self.storage = storage
18+
19+
def get_caf_for_variations(
20+
self, vrs_variations: list[VrsVariation]
21+
) -> list[CohortAlleleFrequencyStudyResult]:
22+
"""Retrieve Cohort Allele Frequency data for given VRS Variations
23+
24+
:param vrs_variations: List of VRS Variations to get CAF data for
25+
:return: list of CAFs for VRS Variations. Will use VRS Variation for focusAllele
26+
"""
27+
vrs_variations_map: dict[str, VrsVariation] = {
28+
vrs_variation.id: vrs_variation for vrs_variation in vrs_variations
29+
}
30+
31+
cafs: list[CohortAlleleFrequencyStudyResult] = self.storage.get_caf_by_vrs_ids(
32+
list(vrs_variations_map)
33+
)
34+
35+
for caf in cafs:
36+
caf.focusAllele = vrs_variations_map[caf.focusAllele.root]
37+
38+
return cafs

src/anyvlm/functions/get_caf.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,29 @@
11
"""Perform search against variant(s) contained by an AnyVar node, and construct cohort allele frequency model(s)"""
22

3+
from anyvar.utils.types import VrsVariation
34
from ga4gh.va_spec.base.core import CohortAlleleFrequencyStudyResult
45

56
from anyvlm.anyvar.base_client import BaseAnyVarClient
7+
from anyvlm.anyvlm import AnyVLM
68

79

810
def get_caf(
9-
av: BaseAnyVarClient, accession_id: str, start: int, end: int
11+
anyvar: BaseAnyVarClient,
12+
anyvlm: AnyVLM,
13+
accession_id: str,
14+
start: int,
15+
end: int,
1016
) -> list[CohortAlleleFrequencyStudyResult]:
1117
"""Retrieve Cohort Allele Frequency data for all known variants matching provided search params
1218
13-
:param av: AnyVar client
19+
:param anyvar: AnyVar client (variant lookup)
20+
:param anyvlm: AnyVLM client (caf lookup)
1421
:param accession_id: ID for sequence to search upon
1522
:param start: start of range search
1623
:param end: end of range to search
1724
:return: list of CAFs contained in search interval
1825
"""
19-
raise NotImplementedError
26+
vrs_variations: list[VrsVariation] = anyvar.search_by_interval(
27+
accession_id, start, end
28+
)
29+
return anyvlm.get_caf_for_variations(vrs_variations)

src/anyvlm/storage/base_storage.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,14 @@ def add_allele_frequency(self, caf: CohortAlleleFrequencyStudyResult) -> None:
3434
3535
:param caf: Cohort allele frequency study result object to insert into the DB
3636
"""
37+
38+
@abstractmethod
39+
def get_caf_by_vrs_ids(
40+
self, vrs_ids: list[str]
41+
) -> list[CohortAlleleFrequencyStudyResult]:
42+
"""Retrieve cohort allele frequency study results by VRS IDs
43+
44+
:param vrs_ids: List of VRS variation IDs
45+
:return: List of cohort allele frequency study results matching given VRS
46+
variation IDs. Will use iriReference for focusAllele
47+
"""

src/anyvlm/storage/postgres.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""Provide PostgreSQL-based storage implementation."""
22

33
from ga4gh.va_spec.base import CohortAlleleFrequencyStudyResult
4-
from sqlalchemy import create_engine, delete
4+
from sqlalchemy import create_engine, delete, select
55
from sqlalchemy.dialects.postgresql import insert
66
from sqlalchemy.orm import sessionmaker
77

@@ -46,3 +46,26 @@ def add_allele_frequency(self, caf: CohortAlleleFrequencyStudyResult) -> None:
4646

4747
with self.session_factory() as session, session.begin():
4848
session.execute(stmt, db_entity.to_dict())
49+
50+
def get_caf_by_vrs_ids(
51+
self, vrs_ids: list[str]
52+
) -> list[CohortAlleleFrequencyStudyResult]:
53+
"""Retrieve cohort allele frequency study results by VRS IDs
54+
55+
:param vrs_ids: List of VRS variation IDs
56+
:return: List of cohort allele frequency study results matching given VRS
57+
variation IDs. Will use iriReference for focusAllele
58+
"""
59+
cafs: list[CohortAlleleFrequencyStudyResult] = []
60+
with self.session_factory() as session:
61+
stmt = (
62+
select(orm.AlleleFrequencyData)
63+
.where(orm.AlleleFrequencyData.vrs_id.in_(vrs_ids))
64+
.limit(self.MAX_ROWS)
65+
)
66+
db_objects = session.scalars(stmt).all()
67+
68+
for db_object in db_objects:
69+
caf = mapper_registry.from_db_entity(db_object)
70+
cafs.append(caf)
71+
return cafs

src/anyvlm/utils/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
"""Provide utilities."""

src/anyvlm/utils/types.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
"""Provide helpful type definitions, references, and type-based operations."""
2+
3+
from pydantic import BaseModel
4+
5+
6+
class AncillaryResults(BaseModel):
7+
"""Define model for Ancillary Results"""
8+
9+
homozygotes: int
10+
heterozygotes: int
11+
hemizygotes: int
12+
consequence: str
13+
14+
15+
class QualityMeasures(BaseModel):
16+
"""Define model for Quality Measures"""
17+
18+
qcFilters: list[str] # noqa: N815

tests/conftest.py

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,14 @@
33
from pathlib import Path
44

55
import pytest
6+
from anyvar.anyvar import create_storage, create_translator
67
from dotenv import load_dotenv
78
from ga4gh.core.models import iriReference
89
from ga4gh.va_spec.base import CohortAlleleFrequencyStudyResult, StudyGroup
910
from ga4gh.vrs import models
1011
from pydantic import BaseModel
1112

13+
from anyvlm.anyvar.python_client import PythonAnyVarClient
1214
from anyvlm.storage import orm
1315
from anyvlm.storage.base_storage import Storage
1416
from anyvlm.storage.postgres import PostgresObjectStore
@@ -68,17 +70,42 @@ def vcr_config():
6870

6971

7072
@pytest.fixture(scope="session")
71-
def postgres_uri():
73+
def anyvlm_anyvar_postgres_uri():
74+
return environ.get(
75+
"ANYVLM_ANYVAR_TEST_STORAGE_URI",
76+
"postgresql://postgres:postgres@localhost:5432/anyvlm_anyvar_test",
77+
)
78+
79+
80+
@pytest.fixture
81+
def anyvar_python_client(anyvlm_anyvar_postgres_uri: str) -> PythonAnyVarClient:
82+
storage = create_storage(anyvlm_anyvar_postgres_uri)
83+
storage.wipe_db()
84+
translator = create_translator()
85+
return PythonAnyVarClient(translator, storage)
86+
87+
88+
@pytest.fixture
89+
def anyvar_populated_python_client(
90+
anyvar_python_client: PythonAnyVarClient, alleles: dict
91+
):
92+
for allele_fixture in alleles.values():
93+
anyvar_python_client.put_objects([models.Allele(**allele_fixture["variation"])])
94+
return anyvar_python_client
95+
96+
97+
@pytest.fixture(scope="session")
98+
def anyvlm_postgres_uri():
7299
return environ.get(
73100
"ANYVLM_TEST_STORAGE_URI",
74101
"postgresql://postgres:postgres@localhost:5432/anyvlm_test",
75102
)
76103

77104

78105
@pytest.fixture
79-
def postgres_storage(postgres_uri: str):
106+
def postgres_storage(anyvlm_postgres_uri: str):
80107
"""Reset storage state after each test case"""
81-
storage = PostgresObjectStore(postgres_uri)
108+
storage = PostgresObjectStore(anyvlm_postgres_uri)
82109
yield storage
83110
storage.wipe_db()
84111

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
"""Test that get_caf function works correctly"""
2+
3+
import pytest
4+
from deepdiff import DeepDiff
5+
from ga4gh.core.models import iriReference
6+
from ga4gh.va_spec.base import CohortAlleleFrequencyStudyResult
7+
8+
from anyvlm.anyvar.python_client import PythonAnyVarClient
9+
from anyvlm.anyvlm import AnyVLM
10+
from anyvlm.functions.get_caf import get_caf
11+
from anyvlm.storage.postgres import PostgresObjectStore
12+
13+
POS = 2781760
14+
REFGET_AC = "SQ.8_liLu1aycC0tPQPFmUaGXJLDs5SbPZ5"
15+
GA4GH_SEQ_ID = f"ga4gh:{REFGET_AC}"
16+
17+
18+
@pytest.fixture
19+
def alleles_to_add(alleles: dict):
20+
"""Create test fixture for alleles whose sequence reference matches REFGET_AC"""
21+
return [
22+
value["variation"]
23+
for value in alleles.values()
24+
if value["location"]["sequenceReference"]["refgetAccession"] == REFGET_AC
25+
]
26+
27+
28+
@pytest.fixture
29+
def alleles_in_range(alleles_to_add):
30+
"""Create test fixture for alleles overlapping POS"""
31+
return [
32+
variation
33+
for variation in alleles_to_add
34+
if variation["location"]["start"] <= POS and variation["location"]["end"] >= POS
35+
]
36+
37+
38+
@pytest.fixture
39+
def populated_postgres_storage(
40+
postgres_storage: PostgresObjectStore,
41+
alleles_to_add: list[dict],
42+
caf_iri: CohortAlleleFrequencyStudyResult,
43+
):
44+
for variation in alleles_to_add:
45+
caf_copy = caf_iri.model_copy(deep=True)
46+
caf_copy.focusAllele = iriReference(root=variation["id"])
47+
postgres_storage.add_allele_frequency(caf_copy)
48+
return postgres_storage
49+
50+
51+
@pytest.fixture
52+
def anyvlm_populated_client(populated_postgres_storage):
53+
"""Define test fixture for anyvlm"""
54+
return AnyVLM(populated_postgres_storage)
55+
56+
57+
@pytest.fixture
58+
def expected_cafs(caf_iri, alleles_in_range):
59+
cafs = []
60+
for variation in alleles_in_range:
61+
new_caf = caf_iri.model_copy(deep=True)
62+
new_caf.focusAllele = variation
63+
cafs.append(new_caf)
64+
return cafs
65+
66+
67+
def test_get_caf_results_returned(
68+
anyvar_populated_python_client: PythonAnyVarClient,
69+
anyvlm_populated_client: AnyVLM,
70+
expected_cafs: list[CohortAlleleFrequencyStudyResult],
71+
):
72+
"""Test get_caf when results are expected"""
73+
cafs = get_caf(
74+
anyvar_populated_python_client,
75+
anyvlm_populated_client,
76+
GA4GH_SEQ_ID,
77+
POS,
78+
POS,
79+
)
80+
diff = DeepDiff(
81+
[caf.model_dump(exclude_none=True) for caf in cafs],
82+
[caf.model_dump(exclude_none=True) for caf in expected_cafs],
83+
ignore_order=True,
84+
)
85+
assert diff == {}
86+
87+
88+
def test_get_caf_no_results(
89+
anyvar_populated_python_client: PythonAnyVarClient,
90+
anyvlm_populated_client: AnyVLM,
91+
):
92+
"""Test get_caf when no results are expected"""
93+
cafs = get_caf(
94+
anyvar_populated_python_client,
95+
anyvlm_populated_client,
96+
"GRCh45.p1:Y",
97+
POS,
98+
POS,
99+
)
100+
assert cafs == []

tests/integration/storage/test_postgres_integration.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,12 @@
66
from anyvlm.storage.postgres import PostgresObjectStore
77

88

9-
def test_db_lifecycle(postgres_uri: str, caf_iri: CohortAlleleFrequencyStudyResult):
9+
def test_db_lifecycle(
10+
anyvlm_postgres_uri: str, caf_iri: CohortAlleleFrequencyStudyResult
11+
):
1012
"""Test that DB lifecycle works correctly"""
1113
# set up and populate DB
12-
storage = PostgresObjectStore(postgres_uri)
14+
storage = PostgresObjectStore(anyvlm_postgres_uri)
1315
caf_rows = return_cafs(storage)
1416
assert caf_rows == []
1517

0 commit comments

Comments
 (0)