Skip to content

Commit 3720107

Browse files
feat: enable config of node-specific fields in vlm response (Issue #27)
The schema classes defined in `src/anyvlm/schemas/vlm.py` have 4 variables that we will be hard-coding to GREGoR-specific values for our MVP release. These can be divided into 2 groups: 1. Metadata about the source the CAF data is coming from: - `HandoverType.id` - `HandoverType.label` - `BeaconHandover.url` 2. Metadata about the AnyVLM node: - `Meta.beaconId` Since our only data source for MVP will be GREGoR, hard-coding values for the items under Point Number 1 works for the time being. However, we'll ultimately need to pull these three values dynamically (since we'll be adding support for nodes to contain data from multiple sources post MVP - see Issue #37). I've therefore set these via environment variables for now in `build_vlm_response_from_caf_data()` for expediency, but have left #TODOs referencing the multi-cohort epic in the places where I'm using them. The item under Point Number 2 (i.e. `Meta.beaconId`) should always be set once per node and should not change (even post-MVP). This one makes sense to leave an environment variable long-term, so I handled this differently by pulling the env var in at the class level and using Pydantic validation to ensure it's set. --- ⚠️ See the `.env.example` file for the new env var names and the GREGoR default values we'll use for our MVP release. You'll need to add these to your own `.env` file in order for tests to pass. --------- Co-authored-by: James Stevenson <[email protected]>
1 parent 0ae941d commit 3720107

File tree

6 files changed

+168
-65
lines changed

6 files changed

+168
-65
lines changed

.env.example

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,11 @@ ANYVLM_STORAGE_URI=postgresql://anyvlm:anyvlm-pw@localhost:5435/anyvlm
1111

1212
## Testing - see "Contributing" -> "Testing" in the docs
1313
ANYVLM_TEST_STORAGE_URI=postgresql://anyvlm_test:anyvlm-test-pw@localhost:5436/anyvlm_test
14+
15+
###########################
16+
## VLM RESPONSE SETTINGS ##
17+
###########################
18+
HANDOVER_TYPE_ID="GREGoR-NCH"
19+
HANDOVER_TYPE_LABEL="GREGoR AnyVLM Reference"
20+
BEACON_HANDOVER_URL="https://variants.gregorconsortium.org/"
21+
BEACON_NODE_ID="org.anyvlm.gregor"

.github/workflows/python-package.yaml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,12 @@ jobs:
4646
- name: Run tests
4747
run: uv run pytest
4848
env:
49-
ANYVLM_ANYVAR_TEST_STORAGE_URI: postgresql://postgres:postgres@localhost:5432/postgres
5049
ANYVLM_TEST_STORAGE_URI: postgresql://postgres:postgres@localhost:5432/postgres
50+
ANYVLM_ANYVAR_TEST_STORAGE_URI: postgresql://postgres:postgres@localhost:5432/postgres
51+
HANDOVER_TYPE_ID: GREGoR-NCH
52+
HANDOVER_TYPE_LABEL: "GREGoR AnyVLM Reference"
53+
BEACON_HANDOVER_URL: https://variants.gregorconsortium.org/
54+
BEACON_NODE_ID: org.anyvlm.gregor
5155
lint:
5256
name: lint
5357
runs-on: ubuntu-latest
Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,35 @@
11
"""Craft a VlmResponse object from a list of CohortAlleleFrequencyStudyResults"""
22

3+
import os
4+
35
from anyvlm.schemas.vlm import (
6+
BeaconHandover,
7+
HandoverType,
8+
ResponseField,
9+
ResponseSummary,
410
VlmResponse,
511
)
612
from anyvlm.utils.types import AnyVlmCohortAlleleFrequencyResult
713

814

15+
class MissingEnvironmentVariableError(Exception):
16+
"""Raised when a required environment variable is not set."""
17+
18+
19+
def _get_environment_var(key: str) -> str:
20+
"""Retrieves an environment variable, raising an error if it is not set.
21+
22+
:param key: The key for the environment variable
23+
:returns: The value for the environment variable of the provided `key`
24+
:raises: MissingEnvironmentVariableError if environment variable is not found.
25+
"""
26+
value: str | None = os.environ.get(key)
27+
if not value:
28+
message = f"Missing required environment variable: {key}"
29+
raise MissingEnvironmentVariableError(message)
30+
return value
31+
32+
933
def build_vlm_response_from_caf_data(
1034
caf_data: list[AnyVlmCohortAlleleFrequencyResult],
1135
) -> VlmResponse:
@@ -14,4 +38,31 @@ def build_vlm_response_from_caf_data(
1438
:param caf_data: A list of `AnyVlmCohortAlleleFrequencyResult` objects that will be used to build the VlmResponse
1539
:return: A `VlmResponse` object.
1640
"""
17-
raise NotImplementedError # TODO: Implement this during/after Issue #16
41+
raise NotImplementedError # TODO: Remove this and finish implementing this function in Issue #35
42+
43+
# TODO - create `handover_type` and `beacon_handovers` dynamically,
44+
# instead of pulling from environment variables. See Issue #37.
45+
handover_type = HandoverType(
46+
id=_get_environment_var("HANDOVER_TYPE_ID"),
47+
label=_get_environment_var("HANDOVER_TYPE_LABEL"),
48+
)
49+
50+
beacon_handovers: list[BeaconHandover] = [
51+
BeaconHandover(
52+
handoverType=handover_type, url=_get_environment_var("BEACON_HANDOVER_URL")
53+
)
54+
]
55+
56+
num_results = len(caf_data)
57+
response_summary = ResponseSummary(
58+
exists=num_results > 0, numTotalResults=num_results
59+
)
60+
61+
# TODO - create this field in Issue #35
62+
response_field = ResponseField()
63+
64+
return VlmResponse(
65+
beaconHandovers=beacon_handovers,
66+
responseSummary=response_summary,
67+
response=response_field,
68+
)

src/anyvlm/schemas/vlm.py

Lines changed: 44 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,32 +3,36 @@
33
from typing import ClassVar, Literal, Self
44

55
from pydantic import BaseModel, ConfigDict, Field, model_validator
6+
from pydantic_settings import BaseSettings, SettingsConfigDict
67

78
from anyvlm.utils.types import Zygosity
89

9-
# ruff: noqa: N815 (allows camelCase vars instead of snake_case to align with expected VLM protocol response)
10+
# ruff: noqa: N815, N803, D107 (allow camelCase instead of snake_case to align with expected VLM protocol response + don't require init docstrings)
1011

1112
RESULT_ENTITY_TYPE = "genomicVariant"
1213

1314

1415
class HandoverType(BaseModel):
1516
"""The type of handover the parent `BeaconHandover` represents."""
1617

17-
id: str = Field(
18-
default="gregor", description="Node-specific identifier"
19-
) # TODO: enable configuration of this field. See Issue #27.
18+
id: str = Field(default="gregor", description="Node-specific identifier")
2019
label: str = Field(
21-
default="GREGoR AnVIL browser", description="Node-specific label"
22-
) # TODO: enable configuration of this field. See Issue #27.
20+
description="Node-specific identifier",
21+
)
2322

2423

2524
class BeaconHandover(BaseModel):
2625
"""Describes how users can get more information about the results provided in the parent `VlmResponse`"""
2726

28-
handoverType: HandoverType = HandoverType()
27+
handoverType: HandoverType = Field(
28+
..., description="The type of handover this represents"
29+
)
2930
url: str = Field(
30-
default="https://anvil.terra.bio/#workspaces?filter=GREGoR", # TODO: enable configuration of this field. See Issue #27.
31-
description="A url which directs users to more detailed information about the results tabulated by the API (ideally human-readable)",
31+
"",
32+
description="""
33+
A url which directs users to more detailed information about the results tabulated by the API. Must be human-readable.
34+
Ideally links directly to the variant specified in the query, but can be a generic search page if necessary.
35+
""",
3236
)
3337

3438

@@ -42,13 +46,27 @@ class ReturnedSchema(BaseModel):
4246
schema_: str = Field(
4347
default="ga4gh-beacon-variant-v2.0.0",
4448
# Alias is required because 'schema' is reserved by Pydantic's BaseModel class,
45-
# But VLM expects a field named 'schema'
49+
# But VLM protocol expects a field named 'schema'
4650
alias="schema",
4751
)
4852

4953
model_config = ConfigDict(populate_by_name=True)
5054

5155

56+
class MetaSettings(BaseSettings):
57+
"""Settings for 'Meta' class"""
58+
59+
beaconId: str = Field(..., alias="BEACON_NODE_ID")
60+
61+
model_config = SettingsConfigDict(
62+
env_prefix="",
63+
extra="ignore",
64+
)
65+
66+
67+
meta_settings = MetaSettings() # type: ignore
68+
69+
5270
class Meta(BaseModel):
5371
"""Relevant metadata about the results provided in the parent `VlmResponse`"""
5472

@@ -57,15 +75,20 @@ class Meta(BaseModel):
5775
description="The version of the VLM API that this response conforms to",
5876
)
5977
beaconId: str = Field(
60-
default="org.gregor.beacon", # TODO: enable configuration of this field. See Issue #27.
61-
description="""
62-
The Id of a Beacon. Usually a reversed domain string, but any URI is acceptable. The purpose of this attribute is,
63-
in the context of a Beacon network, to disambiguate responses coming from different Beacons. See the beacon documentation
64-
[here](https://github.com/ga4gh-beacon/beacon-v2/blob/c6558bf2e6494df3905f7b2df66e903dfe509500/framework/src/common/beaconCommonComponents.yaml#L26)
65-
""",
78+
default="",
79+
description=(
80+
"The Id of a Beacon. Usually a reversed domain string, but any URI is acceptable. "
81+
"The purpose of this attribute is,in the context of a Beacon network, to disambiguate "
82+
"responses coming from different Beacons. See the beacon documentation "
83+
"[here](https://github.com/ga4gh-beacon/beacon-v2/blob/c6558bf2e6494df3905f7b2df66e903dfe509500/framework/src/common/beaconCommonComponents.yaml#L26)"
84+
),
6685
)
6786
returnedSchemas: list[ReturnedSchema] = [ReturnedSchema()]
6887

88+
# custom __init__ to prevent overriding attributes that are static or set via environment variables
89+
def __init__(self) -> None:
90+
super().__init__(beaconId=meta_settings.beaconId)
91+
6992

7093
class ResponseSummary(BaseModel):
7194
"""A high-level summary of the results provided in the parent `VlmResponse"""
@@ -104,6 +127,10 @@ class ResultSet(BaseModel):
104127
description=f"The type of entity relevant to these results. Must always be set to '{RESULT_ENTITY_TYPE}'",
105128
)
106129

130+
# custom __init__ to prevent inadvertently overriding static fields
131+
def __init__(self, resultset_id: str, resultsCount: int) -> None:
132+
super().__init__(id=resultset_id, resultsCount=resultsCount)
133+
107134

108135
class ResponseField(BaseModel):
109136
"""A list of ResultSets"""
@@ -116,7 +143,7 @@ class ResponseField(BaseModel):
116143
class VlmResponse(BaseModel):
117144
"""Define response structure for the variant_counts endpoint."""
118145

119-
beaconHandovers: list[BeaconHandover] = [BeaconHandover()]
146+
beaconHandovers: list[BeaconHandover]
120147
meta: Meta = Meta()
121148
responseSummary: ResponseSummary
122149
response: ResponseField

tests/conftest.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,7 @@
1818
QualityMeasures,
1919
)
2020

21-
22-
@pytest.fixture(scope="session", autouse=True)
23-
def load_env():
24-
"""Load `.env` file.
25-
26-
Must set `autouse=True` to run before other fixtures or test cases.
27-
"""
28-
load_dotenv()
21+
load_dotenv()
2922

3023

3124
@pytest.fixture(scope="session")

tests/unit/test_schemas.py

Lines changed: 58 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
"""Test schema validation functionality"""
22

3+
import os
34
import re
45

56
import pytest
67

78
from anyvlm.schemas.vlm import (
8-
RESULT_ENTITY_TYPE,
9+
BeaconHandover,
910
HandoverType,
1011
ResponseField,
1112
ResponseSummary,
@@ -17,75 +18,94 @@
1718

1819
@pytest.fixture(scope="module")
1920
def valid_handover_id() -> str:
20-
return HandoverType().id
21+
return os.environ.get("HANDOVER_TYPE_ID") # type: ignore
22+
23+
24+
@pytest.fixture(scope="module")
25+
def beacon_handovers(valid_handover_id: str) -> list[BeaconHandover]:
26+
handover_type = HandoverType(
27+
id=valid_handover_id,
28+
label=os.environ.get("HANDOVER_TYPE_LABEL"), # type: ignore
29+
)
30+
31+
return [
32+
BeaconHandover(
33+
handoverType=handover_type,
34+
url=os.environ.get("BEACON_HANDOVER_URL"), # type: ignore
35+
)
36+
]
2137

2238

2339
@pytest.fixture(scope="module")
2440
def response_summary() -> ResponseSummary:
2541
return ResponseSummary(exists=False, numTotalResults=0)
2642

2743

28-
@pytest.fixture(scope="module")
29-
def responses_with_invalid_resultset_ids(valid_handover_id) -> list[ResponseField]:
30-
return [
44+
def test_valid_resultset_id(
45+
valid_handover_id: str,
46+
beacon_handovers: list[BeaconHandover],
47+
response_summary: ResponseSummary,
48+
):
49+
response = ResponseField(
50+
resultSets=[
51+
ResultSet(
52+
resultset_id=f"{valid_handover_id} {Zygosity.HOMOZYGOUS}",
53+
resultsCount=0,
54+
)
55+
]
56+
)
57+
58+
# Should NOT raise an error
59+
vlm_response = VlmResponse(
60+
beaconHandovers=beacon_handovers,
61+
responseSummary=response_summary,
62+
response=response,
63+
)
64+
65+
assert (
66+
vlm_response.response.resultSets[0].id
67+
== f"{valid_handover_id} {Zygosity.HOMOZYGOUS}"
68+
)
69+
70+
71+
def test_invalid_resultset_ids(
72+
response_summary: ResponseSummary,
73+
beacon_handovers: list[BeaconHandover],
74+
):
75+
responses_with_invalid_resultset_ids: list[ResponseField] = [
3176
ResponseField(
3277
resultSets=[
3378
ResultSet(
34-
exists=True,
35-
id=f"invalid_handover_id {Zygosity.HOMOZYGOUS}",
79+
resultset_id=f"invalid_handover_id {Zygosity.HOMOZYGOUS}",
3680
resultsCount=0,
37-
setType=RESULT_ENTITY_TYPE,
3881
)
3982
]
4083
),
4184
ResponseField(
4285
resultSets=[
4386
ResultSet(
44-
exists=True,
45-
id=f"{valid_handover_id} invalid_zygosity",
87+
resultset_id=f"{valid_handover_id} invalid_zygosity",
4688
resultsCount=0,
47-
setType=RESULT_ENTITY_TYPE,
4889
)
4990
]
5091
),
5192
ResponseField(
5293
resultSets=[
5394
ResultSet(
54-
exists=True,
55-
id=f"{Zygosity.HOMOZYGOUS}-{valid_handover_id}", # incorrect order/formatting
95+
resultset_id=f"{Zygosity.HOMOZYGOUS}-{valid_handover_id}", # incorrect order/formatting
5696
resultsCount=0,
57-
setType=RESULT_ENTITY_TYPE,
5897
)
5998
]
6099
),
61100
]
62101

63-
64-
def test_valid_resultset_id(response_summary, valid_handover_id):
65-
response = ResponseField(
66-
resultSets=[
67-
ResultSet(
68-
exists=True,
69-
id=f"{valid_handover_id} {Zygosity.HOMOZYGOUS}",
70-
resultsCount=0,
71-
setType=RESULT_ENTITY_TYPE,
72-
)
73-
]
74-
)
75-
76-
# Should NOT raise an error
77-
vlm_response = VlmResponse(responseSummary=response_summary, response=response)
78-
79-
assert (
80-
vlm_response.response.resultSets[0].id
81-
== f"{valid_handover_id} {Zygosity.HOMOZYGOUS}"
82-
)
83-
84-
85-
def test_invalid_resultset_ids(response_summary, responses_with_invalid_resultset_ids):
86102
for response in responses_with_invalid_resultset_ids:
87103
with pytest.raises(
88104
ValueError,
89105
match=re.escape(VlmResponse.resultset_id_error_message_base),
90106
):
91-
VlmResponse(responseSummary=response_summary, response=response)
107+
VlmResponse(
108+
beaconHandovers=beacon_handovers,
109+
responseSummary=response_summary,
110+
response=response,
111+
)

0 commit comments

Comments
 (0)