VariantEffect
diff --git a/‎alembic/versions/2b7a977e7e98_make_faf95_max_and_ancestry_possibly_.py‎
Lines changed: 31 additions & 0 deletions b/‎alembic/versions/2b7a977e7e98_make_faf95_max_and_ancestry_possibly_.py‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎src/mavedb/lib/annotation/annotate.py‎
Lines changed: 6 additions & 17 deletions b/‎src/mavedb/lib/annotation/annotate.py‎
Lines changed: 6 additions & 17 deletions
diff --git a/‎src/mavedb/lib/annotation/classification.py‎
Lines changed: 13 additions & 2 deletions b/‎src/mavedb/lib/annotation/classification.py‎
Lines changed: 13 additions & 2 deletions
diff --git a/‎src/mavedb/lib/annotation/util.py‎
Lines changed: 112 additions & 0 deletions b/‎src/mavedb/lib/annotation/util.py‎
Lines changed: 112 additions & 0 deletions
diff --git a/‎src/mavedb/lib/gnomad.py‎
Lines changed: 43 additions & 28 deletions b/‎src/mavedb/lib/gnomad.py‎
Lines changed: 43 additions & 28 deletions
diff --git a/‎src/mavedb/models/gnomad_variant.py‎
Lines changed: 2 additions & 2 deletions b/‎src/mavedb/models/gnomad_variant.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/mavedb/routers/mapped_variant.py‎
Lines changed: 4 additions & 4 deletions b/‎src/mavedb/routers/mapped_variant.py‎
Lines changed: 4 additions & 4 deletions
@@ -0,0 +1,31 @@
+"""Make faf95_max and ancestry possibly nullable
+
+Revision ID: 2b7a977e7e98
+Revises: a8e345cca190
+Create Date: 2025-08-21 10:08:58.565416
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "2b7a977e7e98"
+down_revision = "a8e345cca190"
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.alter_column("gnomad_variants", "faf95_max", existing_type=sa.DOUBLE_PRECISION(precision=53), nullable=True)
+    op.alter_column("gnomad_variants", "faf95_max_ancestry", existing_type=sa.VARCHAR(), nullable=True)
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.alter_column("gnomad_variants", "faf95_max_ancestry", existing_type=sa.VARCHAR(), nullable=False)
+    op.alter_column("gnomad_variants", "faf95_max", existing_type=sa.DOUBLE_PRECISION(precision=53), nullable=False)
+    # ### end Alembic commands ###
@@ -13,14 +13,17 @@
 from ga4gh.va_spec.acmg_2015 import VariantPathogenicityEvidenceLine
 from ga4gh.va_spec.base.core import ExperimentalVariantFunctionalImpactStudyResult, Statement
 
-from mavedb.lib.annotation.constants import FUNCTIONAL_RANGES, CLINICAL_RANGES
 from mavedb.lib.annotation.evidence_line import acmg_evidence_line, functional_evidence_line
 from mavedb.lib.annotation.proposition import (
     mapped_variant_to_experimental_variant_clinical_impact_proposition,
     mapped_variant_to_experimental_variant_functional_impact_proposition,
 )
 from mavedb.lib.annotation.statement import mapped_variant_to_functional_statement
 from mavedb.lib.annotation.study_result import mapped_variant_to_experimental_variant_impact_study_result
+from mavedb.lib.annotation.util import (
+    can_annotate_variant_for_pathogenicity_evidence,
+    can_annotate_variant_for_functional_statement,
+)
 from mavedb.models.mapped_variant import MappedVariant
 
 
@@ -29,14 +32,7 @@ def variant_study_result(mapped_variant: MappedVariant) -> ExperimentalVariantFu
 
 
 def variant_functional_impact_statement(mapped_variant: MappedVariant) -> Optional[Statement]:
-    if mapped_variant.variant.score_set.score_ranges is None:
-        return None
-
-    if not any(
-        range_key in mapped_variant.variant.score_set.score_ranges
-        and mapped_variant.variant.score_set.score_ranges[range_key] is not None
-        for range_key in FUNCTIONAL_RANGES
-    ):
+    if not can_annotate_variant_for_functional_statement(mapped_variant):
         return None
 
     # TODO#494: Add support for multiple functional evidence lines. If a score set has multiple ranges
@@ -51,14 +47,7 @@ def variant_functional_impact_statement(mapped_variant: MappedVariant) -> Option
 def variant_pathogenicity_evidence(
     mapped_variant: MappedVariant,
 ) -> Optional[VariantPathogenicityEvidenceLine]:
-    if mapped_variant.variant.score_set.score_ranges is None:
-        return None
-
-    if not any(
-        range_key in mapped_variant.variant.score_set.score_ranges
-        and mapped_variant.variant.score_set.score_ranges[range_key] is not None
-        for range_key in CLINICAL_RANGES
-    ):
+    if not can_annotate_variant_for_pathogenicity_evidence(mapped_variant):
         return None
 
     study_result = mapped_variant_to_experimental_variant_impact_study_result(mapped_variant)
 
@@ -40,7 +40,13 @@ def functional_classification_of_variant(
         )
 
     # This property of this column is guaranteed to be defined.
-    functional_score: float = mapped_variant.variant.data["score_data"]["score"]  # type: ignore
+    functional_score: Optional[float] = mapped_variant.variant.data["score_data"]["score"]  # type: ignore
+    if functional_score is None:
+        raise ValueError(
+            f"Variant {mapped_variant.variant.urn} does not have a functional score."
+            " Unable to classify functional impact."
+        )
+
     for range in score_ranges.ranges:
         lower_bound, upper_bound = inf_or_float(range.range[0], lower=True), inf_or_float(range.range[1], lower=False)
         if functional_score > lower_bound and functional_score <= upper_bound:
@@ -72,7 +78,12 @@ def pillar_project_clinical_classification_of_variant(
         )
 
     # This property of this column is guaranteed to be defined.
-    functional_score: float = mapped_variant.variant.data["score_data"]["score"]  # type: ignore
+    functional_score: Optional[float] = mapped_variant.variant.data["score_data"]["score"]  # type: ignore
+    if functional_score is None:
+        raise ValueError(
+            f"Variant {mapped_variant.variant.urn} does not have a functional score."
+            " Unable to classify clinical impact."
+        )
 
     for range in score_ranges.ranges:
         lower_bound, upper_bound = inf_or_float(range.range[0], lower=True), inf_or_float(range.range[1], lower=False)
 
@@ -8,6 +8,7 @@
     Expression,
     LiteralSequenceExpression,
 )
+from mavedb.lib.annotation.constants import CLINICAL_RANGES, FUNCTIONAL_RANGES
 from mavedb.models.mapped_variant import MappedVariant
 from mavedb.lib.annotation.exceptions import MappingDataDoesntExistException
 
@@ -137,3 +138,114 @@ def variation_from_mapped_variant(mapped_variant: MappedVariant) -> MolecularVar
         )
 
     return vrs_object_from_mapped_variant(mapped_variant.post_mapped)
+
+
+def _can_annotate_variant_base_assumptions(mapped_variant: MappedVariant) -> bool:
+    """
+    Check if a mapped variant meets the basic requirements for annotation.
+
+    This function validates that a mapped variant has the necessary data
+    to proceed with annotation by checking for a valid score value.
+
+    Args:
+        mapped_variant (MappedVariant): The mapped variant to check for
+            annotation eligibility.
+
+    Returns:
+        bool: True if the variant can be annotated (has score ranges and
+            a non-None score), False otherwise.
+    """
+    # This property is guaranteed to exist for all variants.
+    if mapped_variant.variant.data["score_data"]["score"] is None:  # type: ignore
+        return False
+
+    return True
+
+
+def _variant_score_ranges_have_required_keys_for_annotation(
+    mapped_variant: MappedVariant, key_options: list[str]
+) -> bool:
+    """
+    Check if a mapped variant's score set contains any of the required score range keys for annotation and is present.
+
+    Args:
+        mapped_variant (MappedVariant): The mapped variant object containing the variant with score set data.
+        key_options (list[str]): List of possible score range keys to check for in the score set.
+
+    Returns:
+        bool: False if none of the required keys are found or if all found keys have None values.
+              Returns True (implicitly) if at least one required key exists with a non-None value.
+    """
+    if mapped_variant.variant.score_set.score_ranges is None:
+        return False
+
+    if not any(
+        range_key in mapped_variant.variant.score_set.score_ranges
+        and mapped_variant.variant.score_set.score_ranges[range_key] is not None
+        for range_key in key_options
+    ):
+        return False
+
+    return True
+
+
+def can_annotate_variant_for_pathogenicity_evidence(mapped_variant: MappedVariant) -> bool:
+    """
+    Determine if a mapped variant can be annotated for pathogenicity evidence.
+
+    This function checks whether a given mapped variant meets all the necessary
+    requirements to receive pathogenicity evidence annotations. It validates
+    both basic annotation assumptions and the presence of required clinical
+    score range keys.
+
+    Args:
+        mapped_variant (MappedVariant): The mapped variant object to evaluate
+            for pathogenicity evidence annotation eligibility.
+
+    Returns:
+        bool: True if the variant can be annotated for pathogenicity evidence,
+            False otherwise.
+
+    Notes:
+        The function performs two main validation checks:
+        1. Basic annotation assumptions via _can_annotate_variant_base_assumptions
+        2. Required clinical range keys via _variant_score_ranges_have_required_keys_for_annotation
+
+        Both checks must pass for the variant to be considered eligible for
+        pathogenicity evidence annotation.
+    """
+    if not _can_annotate_variant_base_assumptions(mapped_variant):
+        return False
+    if not _variant_score_ranges_have_required_keys_for_annotation(mapped_variant, CLINICAL_RANGES):
+        return False
+
+    return True
+
+
+def can_annotate_variant_for_functional_statement(mapped_variant: MappedVariant) -> bool:
+    """
+    Determine if a mapped variant can be annotated for functional statements.
+
+    This function checks if a variant meets all the necessary conditions to receive
+    functional annotations by validating base assumptions and ensuring the variant's
+    score ranges contain the required keys for functional annotation.
+
+    Args:
+        mapped_variant (MappedVariant): The variant object to check for annotation
+            eligibility, containing mapping information and score data.
+
+    Returns:
+        bool: True if the variant can be annotated for functional statements,
+            False otherwise.
+
+    Notes:
+        The function performs two main checks:
+        1. Validates base assumptions using _can_annotate_variant_base_assumptions
+        2. Verifies score ranges have required keys using FUNCTIONAL_RANGES
+    """
+    if not _can_annotate_variant_base_assumptions(mapped_variant):
+        return False
+    if not _variant_score_ranges_have_required_keys_for_annotation(mapped_variant, FUNCTIONAL_RANGES):
+        return False
+
+    return True
@@ -7,6 +7,7 @@
 from sqlalchemy.orm import Session
 
 from mavedb.lib.logging.context import logging_context, save_to_logging_context
+from mavedb.lib.utils import batched
 from mavedb.db.athena import engine as athena_engine
 from mavedb.models.gnomad_variant import GnomADVariant
 from mavedb.models.mapped_variant import MappedVariant
@@ -56,7 +57,7 @@ def allele_list_from_list_like_string(alleles_string: str) -> list[str]:
     if not alleles_string:
         return []
 
-    if not re.match(r"^\"\[\s*[AGTC]+(?:\s*,\s*[AGTC]+)\s*\]\"$", alleles_string):
+    if not re.match(r"^\[\s*[AGTC]+(?:\s*,\s*[AGTC]+)\s*\]$", alleles_string):
         raise ValueError("Invalid format for alleles string.")
 
     alleles_string = alleles_string.strip().strip('"[]')
@@ -67,7 +68,9 @@ def allele_list_from_list_like_string(alleles_string: str) -> list[str]:
 
 def gnomad_variant_data_for_caids(caids: Sequence[str]) -> Sequence[Row[Any]]:  # pragma: no cover
     """
-    Fetches variant rows from the gnomAD table for a list of CAIDs.
+    Fetches variant rows from the gnomAD table for a list of CAIDs. Athena has a maximum character limit of 262144
+    in queries. CAIDs are about 12 characters long on average + 4 for two quotes, a comma and a space. Chunk our list
+    into chunks of 260000/16=16250 so we are guaranteed to remain under the character limit.
 
     Args:
         caids (list[str]): A list of CAIDs (Canonical Allele Identifiers) to query.
@@ -87,36 +90,45 @@ def gnomad_variant_data_for_caids(caids: Sequence[str]) -> Sequence[Row[Any]]:
     Raises:
         sqlalchemy.exc.SQLAlchemyError: If there is an error executing the query.
     """
-
-    caid_str = ",".join(f"'{caid}'" for caid in caids)
-    athena_query = f"""
-        SELECT
-            "locus.contig",
-            "locus.position",
-            "alleles",
-            "caid",
-            "joint.freq.all.ac",
-            "joint.freq.all.an",
-            "joint.fafmax.faf95_max_gen_anc",
-            "joint.fafmax.faf95_max"
-        FROM
-            {gnomad_table_name()}
-        WHERE
-            caid IN ({caid_str})
-    """
-
-    save_to_logging_context({"num_caids": len(caids)})
-    logger.debug(msg=f"Fetching gnomAD variants from Athena with query:\n{athena_query}", extra=logging_context())
+    chunked_caids = batched(caids, 16250)
+    caid_strs = [",".join(f"'{caid}'" for caid in chunk) for chunk in chunked_caids]
+    save_to_logging_context({"num_caids": len(caids), "num_chunks": len(caid_strs)})
 
     with athena_engine.connect() as athena_connection:
         logger.debug(msg="Connected to Athena", extra=logging_context())
-        result = athena_connection.execute(text(athena_query))
-        rows = result.fetchall()
 
-    save_to_logging_context({"num_gnomad_variant_rows_fetched": len(rows)})
-    logger.debug(msg="Done fetching gnomAD variants from Athena", extra=logging_context())
+        result_rows: list[Row[Any]] = []
+        for chunk_index, caid_str in enumerate(caid_strs):
+            athena_query = f"""
+                SELECT
+                    "locus.contig",
+                    "locus.position",
+                    "alleles",
+                    "caid",
+                    "joint.freq.all.ac",
+                    "joint.freq.all.an",
+                    "joint.fafmax.faf95_max_gen_anc",
+                    "joint.fafmax.faf95_max"
+                FROM
+                    {gnomad_table_name()}
+                WHERE
+                    caid IN ({caid_str})
+            """
+            logger.debug(
+                msg=f"Fetching gnomAD variants from Athena (batch {chunk_index}) with query:\n{athena_query}",
+                extra=logging_context(),
+            )
+
+            result = athena_connection.execute(text(athena_query))
+            rows = result.fetchall()
+            result_rows.extend(rows)
+
+            logger.debug(f"Fetched {len(rows)} gnomAD variants from Athena (batch {chunk_index}).")
 
-    return rows
+        save_to_logging_context({"num_gnomad_variant_rows_fetched": len(result_rows)})
+        logger.debug(msg="Done fetching gnomAD variants from Athena", extra=logging_context())
+
+    return result_rows
 
 
 def link_gnomad_variants_to_mapped_variants(
@@ -153,7 +165,10 @@ def link_gnomad_variants_to_mapped_variants(
         allele_number = int(row.__getattribute__("joint.freq.all.an"))
         allele_frequency = float(allele_count) / float(allele_number)
         faf95_max_ancestry = row.__getattribute__("joint.fafmax.faf95_max_gen_anc")
-        faf95_max = float(row.__getattribute__("joint.fafmax.faf95_max"))
+        faf95_max = row.__getattribute__("joint.fafmax.faf95_max")
+
+        if faf95_max is not None:
+            faf95_max = float(faf95_max)
 
         for mapped_variant in mapped_variants_with_caids:
             # Remove any existing gnomAD variants for this mapped variant that match the current gnomAD data version to avoid data duplication.
 
@@ -24,8 +24,8 @@ class GnomADVariant(Base):
     allele_number = Column(Integer, nullable=False)
     allele_frequency = Column(Float, nullable=False)
 
-    faf95_max = Column(Float, nullable=False)
-    faf95_max_ancestry = Column(String, nullable=False)
+    faf95_max = Column(Float, nullable=True)
+    faf95_max_ancestry = Column(String, nullable=True)
 
     creation_date = Column(Date, nullable=False, default=date.today)
     modification_date = Column(Date, nullable=False, default=date.today, onupdate=date.today)
 
@@ -139,12 +139,12 @@ async def show_mapped_variant_functional_impact_statement(
 
     if not functional_impact:
         logger.info(
-            msg="Could not construct a functional impact statement for this mapped variant; No score range evidence exists for this score set.",
+            msg="Could not construct a functional impact statement for this mapped variant. Variant does not have sufficient evidence to evaluate its functional impact.",
             extra=logging_context(),
         )
         raise HTTPException(
             status_code=404,
-            detail=f"Could not construct a functional impact statement for mapped variant {urn}: No score range evidence found",
+            detail=f"Could not construct a functional impact statement for mapped variant {urn}. Variant does not have sufficient evidence to evaluate its functional impact.",
         )
 
     return functional_impact
@@ -180,12 +180,12 @@ async def show_mapped_variant_acmg_evidence_line(
 
     if not pathogenicity_evidence:
         logger.info(
-            msg="Could not construct a pathogenicity evidence line for this mapped variant; No calibrations exist for this score set.",
+            msg="Could not construct a pathogenicity evidence line for this mapped variant; Variant does not have sufficient evidence to evaluate its pathogenicity.",
             extra=logging_context(),
         )
         raise HTTPException(
             status_code=404,
-            detail=f"Could not construct a pathogenicity evidence line for mapped variant {urn}; No calibrations exist for this score set",
+            detail=f"Could not construct a pathogenicity evidence line for mapped variant {urn}; Variant does not have sufficient evidence to evaluate its pathogenicity.",
         )
 
     return pathogenicity_evidence