Skip to content

Commit f479566

Browse files
add validation for ResultSet ids + add extra info to TODOs
1 parent e2fef0f commit f479566

File tree

3 files changed

+76
-3
lines changed

3 files changed

+76
-3
lines changed

src/anyvlm/functions/build_vlm_response.py

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Craft a VlmResponse object from a list of CohortAlleleFrequencyStudyResults"""
22

3+
from ga4gh.core.models import MappableConcept
34
from ga4gh.va_spec.base.core import CohortAlleleFrequencyStudyResult
45

56
from anyvlm.schemas.vlm import (
@@ -9,6 +10,41 @@
910
ResultSet,
1011
VlmResponse,
1112
)
13+
from anyvlm.utils.types import Zygosity
14+
15+
16+
def _extract_zygosity(caf_study_result: CohortAlleleFrequencyStudyResult) -> Zygosity:
17+
"""Extracts a zygosity from a CohortAlleleFrequencyStudyResult.
18+
#TODO: This is my best guess at how we'll want to represent zygosity in our CAF results. We may need
19+
to update this function during or after Issue #16 based on how we choose to implement our CAF data pull.
20+
21+
:param caf_study_result: The CohortAlleleFrequencyStudyResult whose zygosity we wish to determine.
22+
:return: The `Zygosity` of the study result.
23+
"""
24+
cohort_characteristics: list[MappableConcept] | None = (
25+
caf_study_result.cohort.characteristics
26+
)
27+
if not (cohort_characteristics):
28+
error_message: str = "Each CohortAlleleFrequencyStudyResult's 'cohort' field must have 'characteristics' set"
29+
raise ValueError(error_message)
30+
31+
zygosity: str | None = next(
32+
(
33+
concept.name
34+
for concept in cohort_characteristics
35+
if getattr(concept, "conceptType", None) == "Zygosity"
36+
),
37+
None,
38+
)
39+
if not zygosity:
40+
error_message: str = "'CohortAlleleFrequencyStudyResult.cohort.characteristics' must contain a 'Zygosity' entry"
41+
raise ValueError(error_message)
42+
43+
try:
44+
return Zygosity(zygosity)
45+
except ValueError as e:
46+
error_message: str = f"Invalid zygosity provided: {zygosity}"
47+
raise ValueError(error_message) from e
1248

1349

1450
def build_vlm_response(caf_data: list[CohortAlleleFrequencyStudyResult]) -> VlmResponse:
@@ -21,15 +57,17 @@ def build_vlm_response(caf_data: list[CohortAlleleFrequencyStudyResult]) -> VlmR
2157
if caf_data:
2258
total: int = sum(
2359
[caf_study_result.focusAlleleCount for caf_study_result in caf_data]
24-
) # TODO: I'm not sure this is the right field?
60+
) # TODO: I'm not sure this is the right field? Will need to verify during/after Issue #16.
2561
response_summary = ResponseSummary(exists=True, total=total)
2662

2763
for caf_study_result in caf_data:
2864
result_sets.extend(
2965
[
3066
ResultSet(
3167
exists=True,
32-
id=f"{HandoverType.id} {caf_study_result.cohort}", # TODO - not sure that caf_study_result.cohort is the right field
68+
# TODO - HandoverType.id represents the ID of the node from which the dataset was pulled.
69+
# In the future, this ID should be set dynamically.
70+
id=f"{HandoverType.id} {_extract_zygosity(caf_study_result)}",
3371
resultsCount=caf_study_result.focusAlleleCount,
3472
)
3573
]

src/anyvlm/schemas/vlm.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
"""Schemas relating to VLM API."""
22

3-
from pydantic import BaseModel, Field
3+
from typing import Self
4+
5+
from pydantic import BaseModel, Field, model_validator
46

57
from anyvlm import __version__
8+
from anyvlm.utils.types import Zygosity
69

710
# ruff: noqa: N815 (allows camelCase vars instead of snake_case)
811

@@ -88,3 +91,26 @@ class VlmResponse(BaseModel):
8891
meta: Meta = Meta()
8992
responseSummary: ResponseSummary
9093
response: ResponseField
94+
95+
@model_validator(mode="after")
96+
def validate_resultset_ids(self) -> Self:
97+
"""Ensure each ResultSet.id is correctly constructed."""
98+
handover_ids: list[str] = [
99+
beaconHandover.handoverType.id for beaconHandover in self.beaconHandovers
100+
]
101+
102+
for result_set in self.response.resultSets:
103+
node_id, zygosity = result_set.id.split(" ")
104+
105+
if node_id not in handover_ids:
106+
error_message = f"Invalid ResultSet id - ids must be in form '<node_id> <zygosity>', but provided node_id of {node_id} does not match any `handoverType.id` provided in `self.beaconHandovers`"
107+
raise ValueError(error_message)
108+
109+
try:
110+
Zygosity(zygosity)
111+
except ValueError as e:
112+
valid_zygosity_values = {zygosity.value for zygosity in Zygosity}
113+
error_message = f"Invalid ResultSet id - ids must be in form '<node_id> <zygosity>', but provided zygosity of {zygosity} is not found in allowable value set of: {', '.join(valid_zygosity_values)}"
114+
raise ValueError(error_message) from e
115+
116+
return self

src/anyvlm/utils/types.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,3 +74,12 @@ def _normalize_chromosome_name(chromosome_name: str) -> str:
7474

7575

7676
ChromosomeName = Annotated[str, BeforeValidator(_normalize_chromosome_name)]
77+
78+
79+
class Zygosity(StrEnum):
80+
"""Allowable zygosity values as defined by the VLM protocol"""
81+
82+
HOMOZYGOUS = "Homozygous"
83+
HETEROZYGOUS = "Heterozygous"
84+
HEMIZYGOUS = "Hemizygous"
85+
UNKNOWN = "Unknown Zygosity"

0 commit comments

Comments
 (0)