Skip to content

Commit aece41d

Browse files
Merge branch 'issue-17-stub-vlm-request-endpoint' into issue-13-build-vlm-network-response
2 parents c13e2fe + 051c9a1 commit aece41d

File tree

3 files changed

+33
-57
lines changed

3 files changed

+33
-57
lines changed

src/anyvlm/functions/get_caf.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,19 @@
55
from anyvlm.anyvar.base_client import BaseAnyVarClient
66
from anyvlm.utils.types import (
77
ChromosomeName,
8-
GenomicSequence,
98
GrcAssemblyId,
10-
UscsAssemblyBuild,
9+
NucleotideSequence,
10+
UcscAssemblyBuild,
1111
)
1212

1313

1414
def get_caf(
1515
anyvar_client: BaseAnyVarClient,
16-
assembly_id: GrcAssemblyId | UscsAssemblyBuild,
16+
assembly_id: GrcAssemblyId | UcscAssemblyBuild,
1717
reference_name: ChromosomeName,
1818
start: int,
19-
reference_bases: GenomicSequence,
20-
alternate_bases: GenomicSequence,
19+
reference_bases: NucleotideSequence,
20+
alternate_bases: NucleotideSequence,
2121
) -> list[CohortAlleleFrequencyStudyResult]:
2222
"""Retrieve Cohort Allele Frequency data for all known variants matching provided search params
2323

src/anyvlm/restapi/vlm.py

Lines changed: 10 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@
1616
from anyvlm.utils.types import (
1717
ChromosomeName,
1818
EndpointTag,
19-
GenomicSequence,
2019
GrcAssemblyId,
21-
UscsAssemblyBuild,
20+
NucleotideSequence,
21+
UcscAssemblyBuild,
2222
)
2323

2424

@@ -36,33 +36,24 @@ def ingest_vcf(vcf_path: Path) -> None:
3636
description="Search for a single sequence variant and receive allele counts by zygosity, in accordance with the Variant-Level Matching protocol",
3737
tags=[EndpointTag.SEARCH],
3838
)
39+
# ruff: noqa: D103, N803 (allow camelCase args and don't require docstrings)
3940
def variant_counts(
4041
request: Request,
41-
assemblyId: Annotated[ # noqa: N803
42-
GrcAssemblyId | UscsAssemblyBuild,
42+
assemblyId: Annotated[
43+
GrcAssemblyId | UcscAssemblyBuild,
4344
Query(..., description="Genome reference assembly"),
4445
],
45-
referenceName: Annotated[ # noqa: N803
46+
referenceName: Annotated[
4647
ChromosomeName, Query(..., description="Chromosome with optional 'chr' prefix")
4748
],
4849
start: Annotated[int, Query(..., description="Variant position")],
49-
referenceBases: Annotated[ # noqa: N803
50-
GenomicSequence, Query(..., description="Genomic bases ('T', 'AC', etc.)")
50+
referenceBases: Annotated[
51+
NucleotideSequence, Query(..., description="Genomic bases ('T', 'AC', etc.)")
5152
],
52-
alternateBases: Annotated[ # noqa: N803
53-
GenomicSequence, Query(..., description="Genomic bases ('T', 'AC', etc.)")
53+
alternateBases: Annotated[
54+
NucleotideSequence, Query(..., description="Genomic bases ('T', 'AC', etc.)")
5455
],
5556
) -> VlmResponse:
56-
"""Accept a Variant-Level Matching network request and return allele counts by zygosity.
57-
58-
:param request: FastAPI `Request` object
59-
:param assemblyId: The genome reference assembly. Must be a GRC assembly identifier (e.g., "GRCh38) or a USCS assembly build (e.g., "hg38")
60-
:param referenceName: The name of the reference chromosome, with optional 'chr' prefix
61-
:param start: The start of the variant's position
62-
:param referenceBases: Genomic bases ('T', 'AC', etc.)
63-
:param alternateBases: Genomic bases ('T', 'AC', etc.)
64-
:return: A VlmResponse object containing cohort allele frequency data. If no matches are found, endpoint will return a status code of 200 with an empty set of results.
65-
"""
6657
anyvar_client: BaseAnyVarClient = request.app.state.anyvar_client
6758
caf_data: list[CohortAlleleFrequencyStudyResult] = get_caf(
6859
anyvar_client, assemblyId, referenceName, start, referenceBases, alternateBases

src/anyvlm/utils/types.py

Lines changed: 18 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -20,57 +20,42 @@ class GrcAssemblyId(StrEnum):
2020
GRCH38 = "GRCh38"
2121

2222

23-
class UscsAssemblyBuild(StrEnum):
24-
"""Supported USCS assembly builds"""
23+
class UcscAssemblyBuild(StrEnum):
24+
"""Supported UCSC assembly builds"""
2525

2626
HG38 = "hg38"
2727
HG19 = "hg19"
2828

2929

30-
GenomicSequence = Annotated[
30+
NucleotideSequence = Annotated[
3131
str,
3232
BeforeValidator(str.upper),
33-
StringConstraints(pattern=r"^[AGCT]*$"),
33+
StringConstraints(pattern=r"^[ACGTURYKMSWBDHVN]*$"),
3434
]
3535

3636

37-
def is_valid_chromosome_name(chromosome_name: str) -> bool:
38-
"""Checks whether or not a provided chromosome name is valid.
39-
40-
:param chromosome_name: The chromosome name to validate.
41-
:return: `True` if the chromosome name is a number between 1-22, or the values "X" or "Y"; else `False`.
42-
"""
43-
min_chromosome_number = 1
44-
max_chromosome_number = 22
45-
try:
46-
return (
47-
chromosome_name in {"X", "Y"}
48-
or min_chromosome_number <= int(chromosome_name) <= max_chromosome_number
49-
)
50-
except ValueError:
51-
return False
52-
53-
5437
def _normalize_chromosome_name(chromosome_name: str) -> str:
5538
"""Normalize a chromosome name. Input must be a string consisting of either a number between 1-22,
56-
or 'X' or 'Y'; optionally prefixed with 'chr'.
39+
or one of the values 'X', 'Y', or 'MT'; optionally prefixed with 'chr'.
5740
5841
:param chromosome_name: The name of the chromosome to normalize, following the rules stated above.
5942
:return: The chromosome name, stripped of it's 'chr' prefix if it was added
6043
"""
61-
# strip the 'chr' prefix if it was included
62-
chromosome_name = (
63-
chromosome_name[3:]
64-
if chromosome_name.lower().startswith("chr")
65-
else chromosome_name
66-
).upper()
67-
68-
if is_valid_chromosome_name(chromosome_name):
44+
chromosome_name = chromosome_name.upper().removeprefix("CHR")
45+
46+
min_chromosome_number = 1
47+
max_chromosome_number = 22
48+
49+
if chromosome_name in {"X", "Y", "MT"} or (
50+
chromosome_name.isdigit()
51+
and min_chromosome_number <= int(chromosome_name) <= max_chromosome_number
52+
):
6953
return chromosome_name
70-
error_message = (
71-
"Invalid chromosome. Must be 1-22, 'X', or 'Y'; with optional 'chr' prefix."
54+
55+
raise ValueError(
56+
"Invalid chromosome. Must be either a number between 1-22, or "
57+
"'one of the values 'X', 'Y', or 'MT'; optionally prefixed with 'chr'."
7258
)
73-
raise ValueError(error_message)
7459

7560

7661
ChromosomeName = Annotated[str, BeforeValidator(_normalize_chromosome_name)]

0 commit comments

Comments
 (0)