1414from ga4gh .vrs import models as vrs_models
1515from snowflake .sqlalchemy import MergeInto
1616from snowflake .sqlalchemy .snowdialect import SnowflakeDialect
17- from sqlalchemy import String , column , create_engine , delete , insert , select , text
17+ from sqlalchemy import (
18+ String ,
19+ and_ ,
20+ column ,
21+ create_engine ,
22+ delete ,
23+ insert ,
24+ or_ ,
25+ select ,
26+ text ,
27+ )
1828from sqlalchemy .engine .url import URL
1929from sqlalchemy .exc import IntegrityError
2030from sqlalchemy .ext .compiler import compiles
2737from anyvar .core import objects as anyvar_objects
2838from anyvar .storage import orm
2939from anyvar .storage .base import (
40+ AlleleSearchPage ,
3041 DataIntegrityError ,
3142 IncompleteVrsObjectError ,
3243 InvalidSearchParamsError ,
@@ -616,13 +627,18 @@ def search_alleles(
616627 refget_accession : str ,
617628 start : int ,
618629 stop : int ,
619- ) -> Iterable [vrs_models .Allele ]:
630+ page_size : int = 1000 ,
631+ cursor : str | None = None ,
632+ ) -> AlleleSearchPage :
620633 """Find all Alleles that are located within the specified interval.
621634
622635 The interval is the closed range [start, stop] on the sequence identified by
623636 the RefGet SequenceReference accession (`SQ.*`). Both `start` and `stop` are
624637 inclusive and represent inter-residue positions.
625638
639+ Uses keyset pagination, meaning that altering the page size while looping through
640+ successive cursors will effectively nullify the search loop.
641+
626642 Currently, any variation which overlaps the queried region is returned.
627643
628644 Todo (see Issue #338):
@@ -636,12 +652,19 @@ def search_alleles(
636652 :param refget_accession: refget accession (e.g. `"SQ.IW78mgV5Cqf6M24hy52hPjyyo5tCCd86"`)
637653 :param start: Inclusive, inter-residue start position of the interval
638654 :param stop: Inclusive, inter-residue end position of the interval
639- :return: an iterable of matching VRS alleles
655+ :param page_size: Max # of results to return
656+ :param cursor: Opaque key indicating start location for query in pagination
657+ :return: Results page including variants and a cursor for next result page, if available
640658 :raise InvalidSearchParamsError: if above search param requirements are violated
641659 """
642660 if start < 0 or stop < 0 or start > stop :
643661 raise InvalidSearchParamsError
644662
663+ seek_start : int | None = None
664+ seek_id : str | None = None
665+ if cursor :
666+ seek_start , seek_id = self ._decode_search_cursor (cursor )
667+
645668 with self .session_factory () as session :
646669 # Query alleles with overlapping locations
647670 # NOTE: this is any overlap, not containment.
@@ -659,13 +682,26 @@ def search_alleles(
659682 orm .Location .start <= stop ,
660683 orm .Location .end >= start ,
661684 )
662- .limit (self .MAX_ROWS )
685+ .order_by (orm .Location .start , orm .Allele .id )
686+ .limit (page_size )
663687 )
664- db_alleles = session .scalars (stmt ).all ()
665688
666- return [
667- mapper_registry .from_db_entity (db_allele ) for db_allele in db_alleles
668- ]
689+ # seek predicate -- assumes ORDER BY location.start ASC, allele.id ASC
690+ if seek_start is not None and seek_id is not None :
691+ stmt = stmt .where (
692+ or_ (
693+ orm .Location .start > seek_start ,
694+ and_ (orm .Location .start == seek_start , orm .Allele .id > seek_id ),
695+ )
696+ )
697+
698+ page_db = session .scalars (stmt ).all ()
699+ items = [mapper_registry .from_db_entity (a ) for a in page_db ]
700+ if not page_db :
701+ return AlleleSearchPage (items = [], next_cursor = None )
702+ last = page_db [- 1 ]
703+ next_cursor = self ._encode_search_cursor (last .location .start , last .id )
704+ return AlleleSearchPage (items = items , next_cursor = next_cursor )
669705
670706
671707@compiles (Insert , "snowflake" )
0 commit comments