VariantEffect
diff --git a/‎Dockerfile‎
Lines changed: 1 addition & 1 deletion b/‎Dockerfile‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/mavedb/db/view.py‎
Lines changed: 123 additions & 5 deletions b/‎src/mavedb/db/view.py‎
Lines changed: 123 additions & 5 deletions
diff --git a/‎src/mavedb/lib/score_sets.py‎
Lines changed: 108 additions & 8 deletions b/‎src/mavedb/lib/score_sets.py‎
Lines changed: 108 additions & 8 deletions
diff --git a/‎src/mavedb/routers/alphafold.py‎
Lines changed: 50 additions & 0 deletions b/‎src/mavedb/routers/alphafold.py‎
Lines changed: 50 additions & 0 deletions
@@ -2,7 +2,7 @@
 # python-base
 # Set up shared environment variables
 ################################
-FROM --platform=amd64 python:3.11 AS python-base
+FROM python:3.11 AS python-base
 
     # Poetry
     # https://python-poetry.org/docs/configuration/#using-environment-variables
 
@@ -6,8 +6,8 @@
 
 import sqlalchemy as sa
 from sqlalchemy.ext import compiler
-from sqlalchemy.schema import DDLElement, MetaData
 from sqlalchemy.orm import Session
+from sqlalchemy.schema import DDLElement, MetaData
 
 from mavedb.db.base import Base
 
@@ -32,7 +32,53 @@ class MaterializedView(Base):
 
     @classmethod
     def refresh(cls, connection, concurrently=True):
-        """Refresh this materialized view."""
+        """
+        Refresh the underlying materialized view for this ORM-mapped class.
+
+        This class method delegates to `refresh_mat_view` to issue a database
+        REFRESH MATERIALIZED VIEW (optionally CONCURRENTLY) statement for the
+        materialized view backing the current model (`cls.__table__.fullname`).
+
+        Parameters
+        ---------
+        connection : sqlalchemy.engine.Connection | sqlalchemy.orm.Session
+            An active SQLAlchemy connection or session bound to the target database.
+        concurrently : bool, default True
+            If True, performs a concurrent refresh (REFRESH MATERIALIZED VIEW CONCURRENTLY),
+            allowing reads during the refresh when the database backend supports it.
+            If False, performs a blocking refresh.
+
+        Returns
+        -------
+        None
+
+        Raises
+        ------
+        sqlalchemy.exc.DBAPIError
+            If the database reports an error while refreshing the materialized view.
+        sqlalchemy.exc.OperationalError
+            For operational issues such as locks or insufficient privileges.
+        ValueError
+            If the connection provided is not a valid SQLAlchemy connection/session.
+
+        Notes
+        -----
+        - A concurrent refresh typically requires the materialized view to have a unique
+          index matching all rows; otherwise the database may reject the operation.
+        - This operation does not return a value; it is executed for its side effect.
+        - Ensure the connection/session is in a clean transactional state if you rely on
+          consistent snapshot semantics.
+        - This function commits no changes; it is the caller's responsibility to
+          commit the session if needed.
+
+        Examples
+        --------
+        # Refresh with concurrent mode (default)
+        MyMaterializedView.refresh(connection)
+
+        # Perform a blocking refresh
+        MyMaterializedView.refresh(connection, concurrently=False)
+        """
         refresh_mat_view(connection, cls.__table__.fullname, concurrently)
 
 
@@ -123,19 +169,91 @@ class MyView(Base):
 
 def refresh_mat_view(session: Session, name: str, concurrently=True):
     """
-    Refreshes a single materialized view, given by `name`.
+    Refresh a PostgreSQL materialized view within the current SQLAlchemy session.
+
+    This helper issues a REFRESH MATERIALIZED VIEW statement for the specified
+    materialized view name. It first explicitly flushes the session because
+    session.execute() bypasses SQLAlchemy's autoflush mechanism; without the flush,
+    pending changes (e.g., newly inserted/updated rows that the view depends on)
+    might not be reflected in the refreshed view.
+
+    Parameters
+    ----------
+    session : sqlalchemy.orm.Session
+        An active SQLAlchemy session bound to a PostgreSQL database.
+    name : str
+        The exact name (optionally schema-qualified) of the materialized view to refresh.
+    concurrently : bool, default True
+        If True, uses REFRESH MATERIALIZED VIEW CONCURRENTLY allowing reads during
+        the refresh and requiring a unique index on the materialized view. If False,
+        performs a blocking refresh.
+
+    Raises
+    ------
+    sqlalchemy.exc.SQLAlchemyError
+        Propagates any database errors encountered during execution (e.g.,
+        insufficient privileges, missing view, lack of required unique index for
+        CONCURRENTLY).
+
+    Notes
+    -----
+    - Using CONCURRENTLY requires the materialized view to have at least one
+      unique index; otherwise PostgreSQL will raise an error.
+    - The operation does not return a value; it is executed for its side effect.
+    - Ensure the session is in a clean transactional state if you rely on
+      consistent snapshot semantics.
+    - This function commits no changes; it is the caller's responsibility to
+      commit the session if needed.
+
+    Examples
+    --------
+    refresh_mat_view(session, "public.my_materialized_view")
+    refresh_mat_view(session, "reports.daily_stats", concurrently=False)
     """
     # since session.execute() bypasses autoflush, must manually flush in order
     # to include newly-created/modified objects in the refresh
     session.flush()
+
     _con = "CONCURRENTLY " if concurrently else ""
     session.execute(sa.text("REFRESH MATERIALIZED VIEW " + _con + name))
 
 
 def refresh_all_mat_views(session: Session, concurrently=True):
     """
-    Refreshes all materialized views. Views are refreshed in non-deterministic order,
-    so view definitions can't depend on each other.
+    Refreshes all PostgreSQL materialized views visible to the given SQLAlchemy session.
+
+    The function inspects the current database connection for registered materialized
+    views and issues a REFRESH MATERIALIZED VIEW command for each one using the helper
+    function `refresh_mat_view`. After all refresh operations complete, the session
+    is committed to persist any transactional side effects of the refresh statements.
+
+    Parameters
+    ----------
+    session : sqlalchemy.orm.Session
+        An active SQLAlchemy session bound to a PostgreSQL connection.
+    concurrently : bool, default True
+        If True, each materialized view is refreshed using the CONCURRENTLY option
+        (only supported when the view has a unique index that satisfies PostgreSQL
+        requirements). If False, a standard blocking refresh is performed.
+
+    Behavior
+    --------
+    - If inspection of the connection fails or returns no inspector, the function
+      exits without performing any work.
+    - Each materialized view name returned by the inspector is passed to
+      `refresh_mat_view(session, name, concurrently)`.
+
+    Notes
+    -----
+    - Using CONCURRENTLY allows reads during refresh at the cost of requiring an
+      appropriate unique index and potentially being slower.
+    - Exceptions raised during individual refresh operations will propagate unless
+      `refresh_mat_view` handles them internally; in such a case the commit will
+      not be reached.
+    - Ensure the session is in a clean transactional state if you rely on
+      consistent snapshot semantics.
+    - This function commits no changes; it is the caller's responsibility to
+      commit the session if needed.
     """
     inspector = sa.inspect(session.connection())
 
 
@@ -1,16 +1,17 @@
+from collections import Counter
 import csv
 import io
 import logging
-import re
 from operator import attrgetter
+import re
 from typing import Any, BinaryIO, Iterable, Optional, TYPE_CHECKING, Sequence, Literal
 
 from mavedb.models.mapped_variant import MappedVariant
 import numpy as np
 import pandas as pd
 from pandas.testing import assert_index_equal
 from sqlalchemy import Integer, and_, cast, func, or_, select
-from sqlalchemy.orm import Session, aliased, contains_eager, joinedload, selectinload
+from sqlalchemy.orm import Session, aliased, contains_eager, joinedload, Query, selectinload
 
 from mavedb.lib.exceptions import ValidationError
 from mavedb.lib.logging.context import logging_context, save_to_logging_context
@@ -71,11 +72,15 @@ def options(cls) -> list[str]:
         return [cls.NUCLEOTIDE, cls.TRANSCRIPT, cls.PROTEIN]
 
 
-def search_score_sets(db: Session, owner_or_contributor: Optional[User], search: ScoreSetsSearch) -> list[ScoreSet]:
-    save_to_logging_context({"score_set_search_criteria": search.model_dump()})
+def build_search_score_sets_query_filter(
+    db: Session, query: Query[ScoreSet], owner_or_contributor: Optional[User], search: ScoreSetsSearch
+):
+    superseding_score_set = aliased(ScoreSet)
 
-    query = db.query(ScoreSet)  # \
-    # .filter(ScoreSet.private.is_(False))
+    # Limit to unsuperseded score sets.
+    # TODO#??? Prevent unpublished superseding score sets from hiding their published precursors in search results.
+    query = query.join(superseding_score_set, ScoreSet.superseding_score_set, isouter=True)
+    query = query.filter(superseding_score_set.id.is_(None))
 
     if owner_or_contributor is not None:
         query = query.filter(
@@ -213,6 +218,14 @@ def search_score_sets(db: Session, owner_or_contributor: Optional[User], search:
                 )
             )
         )
+    return query
+
+
+def search_score_sets(db: Session, owner_or_contributor: Optional[User], search: ScoreSetsSearch):
+    save_to_logging_context({"score_set_search_criteria": search.model_dump()})
+
+    query = db.query(ScoreSet)
+    query = build_search_score_sets_query_filter(db, query, owner_or_contributor, search)
 
     score_sets: list[ScoreSet] = (
         query.join(ScoreSet.experiment)
@@ -257,15 +270,102 @@ def search_score_sets(db: Session, owner_or_contributor: Optional[User], search:
             ),
         )
         .order_by(Experiment.title)
+        .offset(search.offset if search.offset is not None else None)
+        .limit(search.limit + 1 if search.limit is not None else None)
         .all()
     )
     if not score_sets:
         score_sets = []
 
-    save_to_logging_context({"matching_resources": len(score_sets)})
+    offset = search.offset if search.offset is not None else 0
+    num_score_sets = offset + len(score_sets)
+    if search.limit is not None and num_score_sets > offset + search.limit:
+        # In the main query, we have allowed limit + 1 results. The extra record tells us whether we need to run a count
+        # query.
+        score_sets = score_sets[: search.limit]
+        count_query = db.query(ScoreSet)
+        build_search_score_sets_query_filter(db, count_query, owner_or_contributor, search)
+        num_score_sets = count_query.order_by(None).limit(None).count()
+
+    save_to_logging_context({"matching_resources": num_score_sets})
     logger.debug(msg=f"Score set search yielded {len(score_sets)} matching resources.", extra=logging_context())
 
-    return score_sets  # filter_visible_score_sets(score_sets)
+    return {"score_sets": score_sets, "num_score_sets": num_score_sets}
+
+
+def score_set_search_filter_options_from_counter(counter: Counter):
+    return [{"value": value, "count": count} for value, count in counter.items()]
+
+
+def fetch_score_set_search_filter_options(db: Session, owner_or_contributor: Optional[User], search: ScoreSetsSearch):
+    save_to_logging_context({"score_set_search_criteria": search.model_dump()})
+
+    query = db.query(ScoreSet)
+    query = build_search_score_sets_query_filter(db, query, owner_or_contributor, search)
+
+    score_sets: list[ScoreSet] = query.all()
+    if not score_sets:
+        score_sets = []
+
+    target_category_counter: Counter[str] = Counter()
+    target_name_counter: Counter[str] = Counter()
+    target_organism_name_counter: Counter[str] = Counter()
+    target_accession_counter: Counter[str] = Counter()
+    for score_set in score_sets:
+        for target in getattr(score_set, "target_genes", []):
+            category = getattr(target, "category", None)
+            if category:
+                target_category_counter[category] += 1
+
+            name = getattr(target, "name", None)
+            if name:
+                target_name_counter[name] += 1
+
+            target_sequence = getattr(target, "target_sequence", None)
+            taxonomy = getattr(target_sequence, "taxonomy", None)
+            organism_name = getattr(taxonomy, "organism_name", None)
+
+            if organism_name:
+                target_organism_name_counter[organism_name] += 1
+
+            target_accession = getattr(target, "target_accession", None)
+            accession = getattr(target_accession, "accession", None)
+
+            if accession:
+                target_accession_counter[accession] += 1
+
+    publication_author_name_counter: Counter[str] = Counter()
+    publication_db_name_counter: Counter[str] = Counter()
+    publication_journal_counter: Counter[str] = Counter()
+    for score_set in score_sets:
+        for publication_association in getattr(score_set, "publication_identifier_associations", []):
+            publication = getattr(publication_association, "publication", None)
+
+            authors = getattr(publication, "authors", [])
+            for author in authors:
+                name = author.get("name")
+                if name:
+                    publication_author_name_counter[name] += 1
+
+            db_name = getattr(publication, "db_name", None)
+            if db_name:
+                publication_db_name_counter[db_name] += 1
+
+            journal = getattr(publication, "publication_journal", None)
+            if journal:
+                publication_journal_counter[journal] += 1
+
+    logger.debug(msg="Score set search filter options were fetched.", extra=logging_context())
+
+    return {
+        "target_gene_categories": score_set_search_filter_options_from_counter(target_category_counter),
+        "target_gene_names": score_set_search_filter_options_from_counter(target_name_counter),
+        "target_organism_names": score_set_search_filter_options_from_counter(target_organism_name_counter),
+        "target_accessions": score_set_search_filter_options_from_counter(target_accession_counter),
+        "publication_author_names": score_set_search_filter_options_from_counter(publication_author_name_counter),
+        "publication_db_names": score_set_search_filter_options_from_counter(publication_db_name_counter),
+        "publication_journals": score_set_search_filter_options_from_counter(publication_journal_counter),
+    }
 
 
 def fetch_superseding_score_set_in_search_result(
 
@@ -0,0 +1,50 @@
+from fastapi import APIRouter, HTTPException
+import httpx
+import xml.etree.ElementTree as ET
+import re
+
+from mavedb.lib.logging.logged_route import LoggedRoute
+
+ALPHAFOLD_BASE = "https://alphafold.ebi.ac.uk/files/"
+
+router = APIRouter(
+    prefix="/api/v1",
+    tags=["alphafold files"],
+    responses={404: {"description": "Not found"}},
+    route_class=LoggedRoute,
+)
+
+@router.get("/alphafold-files/version")
+async def proxy_alphafold_index():
+    """
+    Proxy the AlphaFold files index (XML document).
+    """
+    async with httpx.AsyncClient(follow_redirects=True, timeout=30) as client:
+        resp = await client.get(ALPHAFOLD_BASE, headers={"Accept": "application/xml"})
+    if resp.status_code != 200:
+        raise HTTPException(status_code=resp.status_code, detail="Upstream error fetching AlphaFold files index")
+
+    # parse XML response
+    try:
+        root = ET.fromstring(resp.content)
+
+        # Detect default namespace
+        if root.tag.startswith("{"):
+            ns_uri = root.tag.split("}", 1)[0][1:]
+            ns = {"x": ns_uri}
+            next_marker_tag = "x:NextMarker"
+        else:
+            ns = {}
+            next_marker_tag = "NextMarker"
+
+        next_marker_el = root.find(next_marker_tag, ns)
+        next_marker = next_marker_el.text if next_marker_el is not None else None
+
+        match = re.search(r"model_(v\d+)\.pdb$", next_marker, re.IGNORECASE)
+        if not match:
+            raise HTTPException(status_code=500, detail="Malformed AlphaFold PDB ID in XML")
+        version = match.group(1)
+        return {"version": version.lower()}
+
+    except ET.ParseError as e:
+        raise HTTPException(status_code=502, detail=f"Failed to parse upstream XML: {e}")