|
1 | 1 | """Schemas relating to VLM API.""" |
| 2 | + |
| 3 | +from typing import ClassVar, Literal, Self |
| 4 | + |
| 5 | +from pydantic import BaseModel, ConfigDict, Field, model_validator |
| 6 | + |
| 7 | +from anyvlm.utils.types import Zygosity |
| 8 | + |
| 9 | +# ruff: noqa: N815 (allows camelCase vars instead of snake_case to align with expected VLM protocol response) |
| 10 | + |
| 11 | +RESULT_ENTITY_TYPE = "genomicVariant" |
| 12 | + |
| 13 | + |
| 14 | +class HandoverType(BaseModel): |
| 15 | + """The type of handover the parent `BeaconHandover` represents.""" |
| 16 | + |
| 17 | + id: str = Field( |
| 18 | + default="gregor", description="Node-specific identifier" |
| 19 | + ) # TODO: enable configuration of this field. See Issue #27. |
| 20 | + label: str = Field( |
| 21 | + default="GREGoR AnVIL browser", description="Node-specific label" |
| 22 | + ) # TODO: enable configuration of this field. See Issue #27. |
| 23 | + |
| 24 | + |
| 25 | +class BeaconHandover(BaseModel): |
| 26 | + """Describes how users can get more information about the results provided in the parent `VlmResponse`""" |
| 27 | + |
| 28 | + handoverType: HandoverType = HandoverType() |
| 29 | + url: str = Field( |
| 30 | + default="https://anvil.terra.bio/#workspaces?filter=GREGoR", # TODO: enable configuration of this field. See Issue #27. |
| 31 | + description="A url which directs users to more detailed information about the results tabulated by the API (ideally human-readable)", |
| 32 | + ) |
| 33 | + |
| 34 | + |
| 35 | +class ReturnedSchema(BaseModel): |
| 36 | + """Fixed [Beacon Schema](https://github.com/ga4gh-beacon/beacon-v2/blob/c6558bf2e6494df3905f7b2df66e903dfe509500/framework/json/common/beaconCommonComponents.json#L241)""" |
| 37 | + |
| 38 | + entityType: str = Field( |
| 39 | + default=RESULT_ENTITY_TYPE, |
| 40 | + description=f"The type of entity this response describes. Must always be set to '{RESULT_ENTITY_TYPE}'", |
| 41 | + ) |
| 42 | + schema_: str = Field( |
| 43 | + default="ga4gh-beacon-variant-v2.0.0", |
| 44 | + # Alias is required because 'schema' is reserved by Pydantic's BaseModel class, |
| 45 | + # But VLM expects a field named 'schema' |
| 46 | + alias="schema", |
| 47 | + ) |
| 48 | + |
| 49 | + model_config = ConfigDict(populate_by_name=True) |
| 50 | + |
| 51 | + |
| 52 | +class Meta(BaseModel): |
| 53 | + """Relevant metadata about the results provided in the parent `VlmResponse`""" |
| 54 | + |
| 55 | + apiVersion: str = Field( |
| 56 | + default="v1.0", |
| 57 | + description="The version of the VLM API that this response conforms to", |
| 58 | + ) |
| 59 | + beaconId: str = Field( |
| 60 | + default="org.gregor.beacon", # TODO: enable configuration of this field. See Issue #27. |
| 61 | + description=""" |
| 62 | + The Id of a Beacon. Usually a reversed domain string, but any URI is acceptable. The purpose of this attribute is, |
| 63 | + in the context of a Beacon network, to disambiguate responses coming from different Beacons. See the beacon documentation |
| 64 | + [here](https://github.com/ga4gh-beacon/beacon-v2/blob/c6558bf2e6494df3905f7b2df66e903dfe509500/framework/src/common/beaconCommonComponents.yaml#L26) |
| 65 | + """, |
| 66 | + ) |
| 67 | + returnedSchemas: list[ReturnedSchema] = [ReturnedSchema()] |
| 68 | + |
| 69 | + |
| 70 | +class ResponseSummary(BaseModel): |
| 71 | + """A high-level summary of the results provided in the parent `VlmResponse""" |
| 72 | + |
| 73 | + exists: bool = Field( |
| 74 | + ..., description="Indicates whether the response contains any results." |
| 75 | + ) |
| 76 | + numTotalResults: int = Field( |
| 77 | + ..., description="The total number of results found for the given query" |
| 78 | + ) |
| 79 | + |
| 80 | + |
| 81 | +class ResultSet(BaseModel): |
| 82 | + """A set of cohort allele frequency results. The zygosity of the ResultSet is identified in the `id` field""" |
| 83 | + |
| 84 | + exists: Literal[True] = Field( |
| 85 | + default=True, |
| 86 | + description="Indicates whether this ResultSet exists. This must always be `True`, even if `resultsCount` = `0`", |
| 87 | + ) |
| 88 | + id: str = Field( |
| 89 | + ..., |
| 90 | + description="id should be constructed of the `HandoverType.id` + the ResultSet's zygosity. See `validate_resultset_ids` validator in `VlmResponse` class.", |
| 91 | + examples=["Geno2MP Homozygous", "MyGene2 Heterozygous"], |
| 92 | + ) |
| 93 | + results: list = Field( |
| 94 | + default=[], |
| 95 | + min_length=0, |
| 96 | + max_length=0, |
| 97 | + description="This must always be set to an empty array", |
| 98 | + ) |
| 99 | + resultsCount: int = Field( |
| 100 | + ..., description="A count for the zygosity indicated by the ResultSet's `id`" |
| 101 | + ) |
| 102 | + setType: str = Field( |
| 103 | + default=RESULT_ENTITY_TYPE, |
| 104 | + description=f"The type of entity relevant to these results. Must always be set to '{RESULT_ENTITY_TYPE}'", |
| 105 | + ) |
| 106 | + |
| 107 | + |
| 108 | +class ResponseField(BaseModel): |
| 109 | + """A list of ResultSets""" |
| 110 | + |
| 111 | + resultSets: list[ResultSet] = Field( |
| 112 | + ..., description="A list of ResultSets for the given query." |
| 113 | + ) |
| 114 | + |
| 115 | + |
| 116 | +class VlmResponse(BaseModel): |
| 117 | + """Define response structure for the variant_counts endpoint.""" |
| 118 | + |
| 119 | + beaconHandovers: list[BeaconHandover] = [BeaconHandover()] |
| 120 | + meta: Meta = Meta() |
| 121 | + responseSummary: ResponseSummary |
| 122 | + response: ResponseField |
| 123 | + |
| 124 | + resultset_id_error_message_base: ClassVar[str] = ( |
| 125 | + "Invalid ResultSet id - ids must be in form '<node_id> <zygosity>'" |
| 126 | + ) |
| 127 | + |
| 128 | + @model_validator(mode="after") |
| 129 | + def validate_resultset_ids(self) -> Self: |
| 130 | + """Ensure each ResultSet.id is correctly constructed.""" |
| 131 | + handover_ids: list[str] = [ |
| 132 | + beaconHandover.handoverType.id for beaconHandover in self.beaconHandovers |
| 133 | + ] |
| 134 | + |
| 135 | + for result_set in self.response.resultSets: |
| 136 | + node_id, zygosity = None, None |
| 137 | + try: |
| 138 | + node_id, zygosity = result_set.id.split(" ") |
| 139 | + except ValueError as e: |
| 140 | + error_message = f"{self.resultset_id_error_message_base}, but provided id of {result_set.id} contains invalid formatting" |
| 141 | + raise ValueError(error_message) from e |
| 142 | + |
| 143 | + if node_id not in handover_ids: |
| 144 | + error_message = f"{self.resultset_id_error_message_base}, but provided node_id of {node_id} does not match any `handoverType.id` provided in `self.beaconHandovers`" |
| 145 | + raise ValueError(error_message) |
| 146 | + |
| 147 | + try: |
| 148 | + Zygosity(zygosity) |
| 149 | + except ValueError as e: |
| 150 | + valid_zygosity_values = {zygosity.value for zygosity in Zygosity} |
| 151 | + error_message = f"{self.resultset_id_error_message_base}, but provided zygosity of {zygosity} is not found in allowable value set of: {', '.join(valid_zygosity_values)}" |
| 152 | + raise ValueError(error_message) from e |
| 153 | + |
| 154 | + return self |
0 commit comments