Skip to content
Merged
Show file tree
Hide file tree
Changes from 30 commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
11d1002
first pass at drafting stub api endpoint for vlm caf request
jennifer-bowser Dec 3, 2025
41cf9a1
add enums for genomic reference assembly ids
jennifer-bowser Dec 4, 2025
9df72df
add validation for 'referenceBases' and 'alternateBases'
jennifer-bowser Dec 4, 2025
ca011b1
refactor to use FastAPI's built-in validation
jennifer-bowser Dec 4, 2025
f8b307c
add validation for 'referenceName' param
jennifer-bowser Dec 4, 2025
4d2121d
add TODOs with issue numbers for work to be completed in future tickets
jennifer-bowser Dec 4, 2025
713f702
move endpoint from 'main.py' into 'restapi/vlm.py'
jennifer-bowser Dec 4, 2025
bfcf59a
refactor chromosome name validation to be more streamlined
jennifer-bowser Dec 4, 2025
130c8c6
fix casing in function params
jennifer-bowser Dec 4, 2025
355157f
add newline to end of file
jennifer-bowser Dec 4, 2025
ed637e3
update endpoint description
jennifer-bowser Dec 4, 2025
34956bd
fix a 'vlm' string to make it 'anyvlm' - missed in original PR
jennifer-bowser Dec 4, 2025
6ae34f2
first pass at creating the VlmResponse object and filling in default …
jennifer-bowser Dec 4, 2025
4f51d08
added some descriptions to the vlm response schema objects
jennifer-bowser Dec 4, 2025
db131dd
adds more decriptions and clearer TODO messages
jennifer-bowser Dec 5, 2025
e2fef0f
move logic out of http endpoint handler into a reusable function
jennifer-bowser Dec 5, 2025
f479566
add validation for ResultSet ids + add extra info to TODOs
jennifer-bowser Dec 5, 2025
f354a34
update zygosity getter func to raise a NotImplementedError
jennifer-bowser Dec 5, 2025
be12487
just raise a 'NotImplementedError' for 'build_vlm_response_from_caf_d…
jennifer-bowser Dec 8, 2025
f1b851e
update comment for clairity
jennifer-bowser Dec 8, 2025
56a8078
update 'get_caf' method signature with better typing
jennifer-bowser Dec 8, 2025
1032e78
update a few names, types, and comments for clairity
jennifer-bowser Dec 8, 2025
211a8f8
resolve merge conflict
jennifer-bowser Dec 8, 2025
70cc545
add support for mitochondrial DNA
jennifer-bowser Dec 10, 2025
f54ced2
use a pydantic model for 'ReturnedSchema' in the 'Meta' class
jennifer-bowser Dec 10, 2025
a07517a
update 'Meta.apiVersion' to refer to the _VLM_ API version, not our _…
jennifer-bowser Dec 10, 2025
65ed1d0
use correct casing for GREGoR
jennifer-bowser Dec 10, 2025
1fec677
update TODOs re: configuratbility to reference new issue #27
jennifer-bowser Dec 10, 2025
b55f3e7
add descriptions for all Fields that didn't already have them
jennifer-bowser Dec 10, 2025
8107044
adds tests for validation code in 'VlmResponse'
jennifer-bowser Dec 10, 2025
630a4f5
use variable for error message matching instead of comparing raw strings
jennifer-bowser Dec 12, 2025
ccc70ad
fix typo: 'uscs' > 'ucsc'
jennifer-bowser Dec 12, 2025
c5ed5c4
use python's built-in 'removeprefix' function
jennifer-bowser Dec 12, 2025
4994bc0
Update 'uscs' > 'ucsc' in all imports/usages
jennifer-bowser Dec 12, 2025
ebfffef
remove docstring from FastAPI endpoint since the info is duplicated b…
jennifer-bowser Dec 12, 2025
4f573d1
streamline chromosome name validation
jennifer-bowser Dec 12, 2025
80c1ad3
update one last instance of 'uscs' > 'ucsc'
jennifer-bowser Dec 12, 2025
082a4d6
Expand the allowable values in the 'GenomicSequence' type
jennifer-bowser Dec 12, 2025
051c9a1
rename 'GenomicSequence' to NucleotideSequence' for specificity
jennifer-bowser Dec 12, 2025
c13e2fe
use 'ConfigDict' instead of raw dictionary object
jennifer-bowser Dec 12, 2025
aece41d
Merge branch 'issue-17-stub-vlm-request-endpoint' into issue-13-build…
jennifer-bowser Dec 12, 2025
f3587de
Whoops update import to use 'ConfigDict'
jennifer-bowser Dec 15, 2025
1b520b6
resolve merge conflicts
jennifer-bowser Dec 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/anyvlm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class Settings(BaseSettings):
"""

model_config = SettingsConfigDict(
env_prefix="vlm_",
env_prefix="anyvlm_",
Copy link
Contributor Author

@jennifer-bowser jennifer-bowser Dec 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is unrelated to this ticket but I found one last instance of "vlm" that needed to be updated to "anyvlm"

env_file=".env",
env_file_encoding="utf-8",
extra="ignore",
Expand Down
18 changes: 18 additions & 0 deletions src/anyvlm/functions/build_vlm_response.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
"""Craft a VlmResponse object from a list of CohortAlleleFrequencyStudyResults"""

from ga4gh.va_spec.base.core import CohortAlleleFrequencyStudyResult

from anyvlm.schemas.vlm import (
VlmResponse,
)


def build_vlm_response_from_caf_data(
caf_data: list[CohortAlleleFrequencyStudyResult],
) -> VlmResponse:
"""Craft a VlmResponse object from a list of CohortAlleleFrequencyStudyResults.

:param caf_data: A list of `CohortAlleleFrequencyStudyResult` objects that will be used to build the VlmResponse
:return: A `VlmResponse` object.
"""
raise NotImplementedError # TODO: Implement this during/after Issue #16
23 changes: 18 additions & 5 deletions src/anyvlm/functions/get_caf.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,30 @@
from ga4gh.va_spec.base.core import CohortAlleleFrequencyStudyResult

from anyvlm.anyvar.base_client import BaseAnyVarClient
from anyvlm.utils.types import (
ChromosomeName,
GenomicSequence,
GrcAssemblyId,
UscsAssemblyBuild,
)


def get_caf(
av: BaseAnyVarClient, accession_id: str, start: int, end: int
anyvar_client: BaseAnyVarClient,
assembly_id: GrcAssemblyId | UscsAssemblyBuild,
reference_name: ChromosomeName,
start: int,
reference_bases: GenomicSequence,
alternate_bases: GenomicSequence,
) -> list[CohortAlleleFrequencyStudyResult]:
"""Retrieve Cohort Allele Frequency data for all known variants matching provided search params

:param av: AnyVar client
:param accession_id: ID for sequence to search upon
:param anyvar_client: AnyVar client
:param assembly_id: The reference assembly to utilize - must be one of: "GRCh37", "GRCh38", "hg38", "hg19"
:param reference_name: The chromosome to search on, with an optional "chr" prefix - e.g., "1", "chr22", "X", "chrY", etc.
:param start: start of range search
:param end: end of range to search
:param reference_bases: Genomic bases ('T', 'AC', etc.)
:param alternate_bases: Genomic bases ('T', 'AC', etc.)
:return: list of CAFs contained in search interval
"""
raise NotImplementedError
raise NotImplementedError # TODO: Implement this. See Issue #16.
13 changes: 4 additions & 9 deletions src/anyvlm/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from collections.abc import AsyncGenerator
from contextlib import asynccontextmanager
from enum import Enum

from fastapi import FastAPI

Expand All @@ -15,6 +14,9 @@
ServiceOrganization,
ServiceType,
)
from anyvlm.utils.types import (
EndpointTag,
)


def create_anyvar_client(
Expand Down Expand Up @@ -58,18 +60,11 @@ async def lifespan(app: FastAPI) -> AsyncGenerator:
)


class _Tag(str, Enum):
"""Define tag names for endpoints."""

META = "Meta"
SEARCH = "Search"


@app.get(
"/service-info",
summary="Get basic service information",
description="Retrieve service metadata, such as versioning and contact info. Structured in conformance with the [GA4GH service info API specification](https://www.ga4gh.org/product/service-info/)",
tags=[_Tag.META],
tags=[EndpointTag.META],
)
def service_info() -> ServiceInfo:
"""Provide service info per GA4GH Service Info spec"""
Expand Down
59 changes: 59 additions & 0 deletions src/anyvlm/restapi/vlm.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,25 @@
"""Define route(s) for the variant-level matching (VLM) protocol"""

from pathlib import Path
from typing import Annotated

from fastapi import Query, Request
from ga4gh.va_spec.base.core import CohortAlleleFrequencyStudyResult

from anyvlm.anyvar.base_client import BaseAnyVarClient
from anyvlm.functions.build_vlm_response import build_vlm_response_from_caf_data
from anyvlm.functions.get_caf import get_caf
from anyvlm.main import app
from anyvlm.schemas.vlm import (
VlmResponse,
)
from anyvlm.utils.types import (
ChromosomeName,
EndpointTag,
GenomicSequence,
GrcAssemblyId,
UscsAssemblyBuild,
)


def ingest_vcf(vcf_path: Path) -> None:
Expand All @@ -9,3 +28,43 @@ def ingest_vcf(vcf_path: Path) -> None:
:param vcf_path: VCF file location
"""
raise NotImplementedError


@app.get(
"/variant_counts",
summary="Provides allele counts of a single sequence variant, broken down by zygosity",
description="Search for a single sequence variant and receive allele counts by zygosity, in accordance with the Variant-Level Matching protocol",
tags=[EndpointTag.SEARCH],
)
def variant_counts(
request: Request,
assemblyId: Annotated[ # noqa: N803
GrcAssemblyId | UscsAssemblyBuild,
Query(..., description="Genome reference assembly"),
],
referenceName: Annotated[ # noqa: N803
ChromosomeName, Query(..., description="Chromosome with optional 'chr' prefix")
],
start: Annotated[int, Query(..., description="Variant position")],
referenceBases: Annotated[ # noqa: N803
GenomicSequence, Query(..., description="Genomic bases ('T', 'AC', etc.)")
],
alternateBases: Annotated[ # noqa: N803
GenomicSequence, Query(..., description="Genomic bases ('T', 'AC', etc.)")
],
) -> VlmResponse:
"""Accept a Variant-Level Matching network request and return allele counts by zygosity.

:param request: FastAPI `Request` object
:param assemblyId: The genome reference assembly. Must be a GRC assembly identifier (e.g., "GRCh38) or a USCS assembly build (e.g., "hg38")
:param referenceName: The name of the reference chromosome, with optional 'chr' prefix
:param start: The start of the variant's position
:param referenceBases: Genomic bases ('T', 'AC', etc.)
:param alternateBases: Genomic bases ('T', 'AC', etc.)
:return: A VlmResponse object containing cohort allele frequency data. If no matches are found, endpoint will return a status code of 200 with an empty set of results.
"""
anyvar_client: BaseAnyVarClient = request.app.state.anyvar_client
caf_data: list[CohortAlleleFrequencyStudyResult] = get_caf(
anyvar_client, assemblyId, referenceName, start, referenceBases, alternateBases
)
return build_vlm_response_from_caf_data(caf_data)
153 changes: 153 additions & 0 deletions src/anyvlm/schemas/vlm.py
Original file line number Diff line number Diff line change
@@ -1 +1,154 @@
"""Schemas relating to VLM API."""

from typing import ClassVar, Literal, Self

from pydantic import BaseModel, Field, model_validator

from anyvlm.utils.types import Zygosity

# ruff: noqa: N815 (allows camelCase vars instead of snake_case to align with expected VLM protocol response)

RESULT_ENTITY_TYPE = "genomicVariant"


class HandoverType(BaseModel):
"""The type of handover the parent `BeaconHandover` represents."""

id: str = Field(
default="gregor", description="Node-specific identifier"
) # TODO: enable configuration of this field. See Issue #27.
label: str = Field(
default="GREGoR AnVIL browser", description="Node-specific label"
) # TODO: enable configuration of this field. See Issue #27.


class BeaconHandover(BaseModel):
"""Describes how users can get more information about the results provided in the parent `VlmResponse`"""

handoverType: HandoverType = HandoverType()
url: str = Field(
default="https://anvil.terra.bio/#workspaces?filter=GREGoR", # TODO: enable configuration of this field. See Issue #27.
description="A url which directs users to more detailed information about the results tabulated by the API (ideally human-readable)",
)


class ReturnedSchema(BaseModel):
"""Fixed [Beacon Schema](https://github.com/ga4gh-beacon/beacon-v2/blob/c6558bf2e6494df3905f7b2df66e903dfe509500/framework/json/common/beaconCommonComponents.json#L241)"""

entityType: str = Field(
default=RESULT_ENTITY_TYPE,
description=f"The type of entity this response describes. Must always be set to '{RESULT_ENTITY_TYPE}'",
)
schema_: str = Field(
default="ga4gh-beacon-variant-v2.0.0",
# Alias is required because 'schema' is reserved by Pydantic's BaseModel class,
# But VLM expects a field named 'schema'
alias="schema",
)

model_config = {"populate_by_name": True}


class Meta(BaseModel):
"""Relevant metadata about the results provided in the parent `VlmResponse`"""

apiVersion: str = Field(
default="v1.0",
description="The version of the VLM API that this response conforms to",
)
beaconId: str = Field(
default="org.gregor.beacon", # TODO: enable configuration of this field. See Issue #27.
description="""
The Id of a Beacon. Usually a reversed domain string, but any URI is acceptable. The purpose of this attribute is,
in the context of a Beacon network, to disambiguate responses coming from different Beacons. See the beacon documentation
[here](https://github.com/ga4gh-beacon/beacon-v2/blob/c6558bf2e6494df3905f7b2df66e903dfe509500/framework/src/common/beaconCommonComponents.yaml#L26)
""",
)
returnedSchemas: list[ReturnedSchema] = [ReturnedSchema()]


class ResponseSummary(BaseModel):
"""A high-level summary of the results provided in the parent `VlmResponse"""

exists: bool = Field(
..., description="Indicates whether the response contains any results."
)
numTotalResults: int = Field(
..., description="The total number of results found for the given query"
)


class ResultSet(BaseModel):
"""A set of cohort allele frequency results. The zygosity of the ResultSet is identified in the `id` field"""

exists: Literal[True] = Field(
default=True,
description="Indicates whether this ResultSet exists. This must always be `True`, even if `resultsCount` = `0`",
)
id: str = Field(
...,
description="id should be constructed of the `HandoverType.id` + the ResultSet's zygosity. See `validate_resultset_ids` validator in `VlmResponse` class.",
examples=["Geno2MP Homozygous", "MyGene2 Heterozygous"],
)
results: list = Field(
default=[],
min_length=0,
max_length=0,
description="This must always be set to an empty array",
)
resultsCount: int = Field(
..., description="A count for the zygosity indicated by the ResultSet's `id`"
)
setType: str = Field(
default=RESULT_ENTITY_TYPE,
description=f"The type of entity relevant to these results. Must always be set to '{RESULT_ENTITY_TYPE}'",
)


class ResponseField(BaseModel):
"""A list of ResultSets"""

resultSets: list[ResultSet] = Field(
..., description="A list of ResultSets for the given query."
)


class VlmResponse(BaseModel):
"""Define response structure for the variant_counts endpoint."""

beaconHandovers: list[BeaconHandover] = [BeaconHandover()]
meta: Meta = Meta()
responseSummary: ResponseSummary
response: ResponseField

resultset_id_error_message_base: ClassVar[str] = (
"Invalid ResultSet id - ids must be in form '<node_id> <zygosity>'"
)

@model_validator(mode="after")
def validate_resultset_ids(self) -> Self:
"""Ensure each ResultSet.id is correctly constructed."""
handover_ids: list[str] = [
beaconHandover.handoverType.id for beaconHandover in self.beaconHandovers
]

for result_set in self.response.resultSets:
node_id, zygosity = None, None
try:
node_id, zygosity = result_set.id.split(" ")
except ValueError as e:
error_message = f"{self.resultset_id_error_message_base}, but provided id of {result_set.id} contains invalid formatting"
raise ValueError(error_message) from e

if node_id not in handover_ids:
error_message = f"{self.resultset_id_error_message_base}, but provided node_id of {node_id} does not match any `handoverType.id` provided in `self.beaconHandovers`"
raise ValueError(error_message)

try:
Zygosity(zygosity)
except ValueError as e:
valid_zygosity_values = {zygosity.value for zygosity in Zygosity}
error_message = f"{self.resultset_id_error_message_base}, but provided zygosity of {zygosity} is not found in allowable value set of: {', '.join(valid_zygosity_values)}"
raise ValueError(error_message) from e

return self
1 change: 1 addition & 0 deletions src/anyvlm/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Provide utilities."""
Loading