Skip to content

Commit c7c2871

Browse files
committed
wip: add create_anyvlm_storage + sanitized_url
1 parent 1674196 commit c7c2871

File tree

8 files changed

+94
-5
lines changed

8 files changed

+94
-5
lines changed

docs/source/configuration/index.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ Configuration
33

44
This section details AnyVLM configuration. It is broken down into the following subsections:
55

6-
* :doc:`Object Storage <storage>`: define database connection, alter table names, and set parameters for bulk processing
6+
* :doc:`Object Storage <storage>`: define database connection
77
* :doc:`Example .env file <dotenv_example>`: use a ``.env`` file to declare environment variables when running REST API service
88
* :doc:`Docker Compose <docker_compose>`: edit the provided Docker Compose file to tailor it to your needs
99

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ classifiers = [
1919
dependencies = [
2020
"ga4gh.vrs>=2.2.0,<3.0",
2121
"ga4gh.va_spec~=0.4.2",
22-
"biocommons.anyvar@git+https://github.com/biocommons/anyvar.git@main",
22+
"biocommons.anyvar@git+https://github.com/biocommons/anyvar.git@0d3ab56fe936b27235a1ce136da4641ea81c0bbf",
2323
"fastapi>=0.95.0",
2424
"python-multipart", # required for fastapi file uploads
2525
"uvicorn",

src/anyvlm/main.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import logging
44
from collections.abc import AsyncGenerator
55
from contextlib import asynccontextmanager
6+
from os import environ
7+
from urllib.parse import urlparse
68

79
from anyvar.anyvar import create_storage, create_translator
810
from fastapi import FastAPI
@@ -18,6 +20,8 @@
1820
ServiceOrganization,
1921
ServiceType,
2022
)
23+
from anyvlm.storage import DEFAULT_STORAGE_URI
24+
from anyvlm.storage.base_storage import Storage
2125
from anyvlm.utils.types import (
2226
EndpointTag,
2327
)
@@ -51,6 +55,34 @@ def create_anyvar_client(
5155
return PythonAnyVarClient(translator, storage)
5256

5357

58+
def create_anyvlm_storage(uri: str | None = None) -> Storage:
59+
"""Provide factory to create storage based on `uri`, the ANYVLM_STORAGE_URI
60+
environment value, or the default value if neither is provided.
61+
62+
The URI format is as follows:
63+
64+
`postgresql://[username]:[password]@[domain]/[database]`
65+
66+
:param uri: AnyVLM storage URI
67+
:raises ValueError: if the URI scheme is not supported
68+
:return: AnyVLM storage instance
69+
"""
70+
if not uri:
71+
uri = environ.get("ANYVLM_STORAGE_URI", DEFAULT_STORAGE_URI)
72+
73+
parsed_uri = urlparse(uri)
74+
if parsed_uri.scheme == "postgresql":
75+
from anyvlm.storage.postgres import PostgresObjectStore # noqa: PLC0415
76+
77+
storage = PostgresObjectStore(uri)
78+
else:
79+
msg = f"URI scheme {parsed_uri.scheme} is not implemented"
80+
raise ValueError(msg)
81+
82+
_logger.debug("create_storage: %s → %s}", storage.sanitized_url, storage)
83+
return storage
84+
85+
5486
@asynccontextmanager
5587
async def lifespan(app: FastAPI) -> AsyncGenerator:
5688
"""Configure FastAPI instance lifespan.
@@ -59,8 +91,10 @@ async def lifespan(app: FastAPI) -> AsyncGenerator:
5991
:return: async context handler
6092
"""
6193
app.state.anyvar_client = create_anyvar_client()
94+
app.state.anyvlm_storage = create_anyvlm_storage()
6295
yield
6396
app.state.anyvar_client.close()
97+
app.state.anyvlm_storage.close()
6498

6599

66100
app = FastAPI(

src/anyvlm/storage/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,6 @@
22

33
from .base_storage import Storage
44

5-
__all__ = ["Storage"]
5+
DEFAULT_STORAGE_URI = "postgresql://postgres@localhost:5432/anyvlm"
6+
7+
__all__ = ["DEFAULT_STORAGE_URI", "Storage"]

src/anyvlm/storage/base_storage.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,5 +28,14 @@ def wipe_db(self) -> None:
2828
def add_allele_frequencies(self, caf: CohortAlleleFrequencyStudyResult) -> None:
2929
"""Add allele frequency data to the database. Will skip conflicts.
3030
31+
NOTE: For now, this will only insert a single caf record into the database.
32+
Single insertion is used to do a simple test of the storage backend.
33+
Issue-34 will support batch insertion of caf records.
34+
3135
:param caf: Cohort allele frequency study result object to insert into the DB
3236
"""
37+
38+
@property
39+
@abstractmethod
40+
def sanitized_url(self) -> str:
41+
"""Return a sanitized URL (password masked) of the database connection string."""

src/anyvlm/storage/mappers.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def from_db_entity(
4141
homozygotes = db_entity.ac_hom
4242
heterozygotes = db_entity.ac_het
4343
hemizygotes = db_entity.ac_hemi
44-
ac = sum((homozygotes, heterozygotes, hemizygotes))
44+
ac = sum((homozygotes or 0, heterozygotes or 0, hemizygotes or 0))
4545
an = db_entity.an
4646

4747
return CohortAlleleFrequencyStudyResult(
@@ -56,7 +56,7 @@ def from_db_entity(
5656
"hemizygotes": hemizygotes,
5757
},
5858
cohort=StudyGroup(name=db_entity.cohort), # type: ignore
59-
) # type: ignore
59+
)
6060

6161
def to_db_entity(
6262
self, va_model: CohortAlleleFrequencyStudyResult

src/anyvlm/storage/postgres.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
"""Provide PostgreSQL-based storage implementation."""
22

3+
from urllib.parse import urlparse
4+
35
from ga4gh.va_spec.base import CohortAlleleFrequencyStudyResult
46
from sqlalchemy import create_engine, delete
57
from sqlalchemy.dialects.postgresql import insert
@@ -36,9 +38,29 @@ def wipe_db(self) -> None:
3638
with self.session_factory() as session, session.begin():
3739
session.execute(delete(orm.AlleleFrequencyData))
3840

41+
@property
42+
def sanitized_url(self) -> str:
43+
"""Return a sanitized URL (password masked) of the database connection string."""
44+
parsed = urlparse(self.db_url)
45+
netloc = ""
46+
if parsed.username:
47+
netloc += parsed.username
48+
if parsed.password:
49+
netloc += ":****"
50+
netloc += "@"
51+
if parsed.hostname:
52+
netloc += f"{parsed.hostname}"
53+
if parsed.port:
54+
netloc += f":{parsed.port}"
55+
return f"{parsed.scheme}://{netloc}{parsed.path}"
56+
3957
def add_allele_frequencies(self, caf: CohortAlleleFrequencyStudyResult) -> None:
4058
"""Add allele frequency data to the database. Will skip conflicts.
4159
60+
NOTE: For now, this will only insert a single caf record into the database.
61+
Single insertion is used to do a simple test of the storage backend.
62+
Issue-34 will support batch insertion of caf records.
63+
4264
:param caf: Cohort allele frequency study result object to insert into the DB
4365
"""
4466
db_entity = mapper_registry.to_db_entity(caf)

tests/unit/storage/test_postgres_unit.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,28 @@ def caf_empty_cohort(caf_iri: CohortAlleleFrequencyStudyResult):
3232
return caf
3333

3434

35+
@pytest.mark.parametrize(
36+
("db_url", "sanitized_db_url"),
37+
[
38+
(
39+
"postgresql://postgres:postgres@localhost:5432/anyvlm_test",
40+
"postgresql://postgres:****@localhost:5432/anyvlm_test",
41+
),
42+
(
43+
"postgresql://postgres@localhost:5432/anyvlm_test",
44+
"postgresql://postgres@localhost:5432/anyvlm_test",
45+
),
46+
],
47+
)
48+
def test_sanitized_url(monkeypatch, db_url: str, sanitized_db_url: str):
49+
"""Test that sanitized_url method works correctly"""
50+
monkeypatch.setattr(PostgresObjectStore, "__init__", lambda *_: None)
51+
object_store = PostgresObjectStore("")
52+
monkeypatch.setattr(object_store, "db_url", db_url, raising=False)
53+
assert object_store.db_url == db_url
54+
assert object_store.sanitized_url == sanitized_db_url
55+
56+
3557
@pytest.mark.parametrize("caf_fixture_name", ["caf_iri", "caf_allele"])
3658
def test_add_allele_frequencies(
3759
request, caf_fixture_name: str, postgres_storage: PostgresObjectStore

0 commit comments

Comments
 (0)