diff --git a/llama-index-core/llama_index/core/vector_stores/types.py b/llama-index-core/llama_index/core/vector_stores/types.py index 12177296b8..29df41ad8b 100644 --- a/llama-index-core/llama_index/core/vector_stores/types.py +++ b/llama-index-core/llama_index/core/vector_stores/types.py @@ -51,6 +51,9 @@ class VectorStoreQueryMode(str, Enum): TEXT_SEARCH = "text_search" SEMANTIC_HYBRID = "semantic_hybrid" + # NOTE: currently only used by postgres filters search + FILTERS = "filters" + # fit learners SVM = "svm" LOGISTIC_REGRESSION = "logistic_regression" @@ -257,6 +260,8 @@ class VectorStoreQuery: sparse_top_k: Optional[int] = None # NOTE: return top k results from hybrid search. similarity_top_k is used for dense search top k hybrid_top_k: Optional[int] = None + # NOTE: currently only used by postgres filters search + filters_top_k: Optional[int] = None @runtime_checkable diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-postgres/llama_index/vector_stores/postgres/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-postgres/llama_index/vector_stores/postgres/base.py index 59e66de2c9..7a7d72ff23 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-postgres/llama_index/vector_stores/postgres/base.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-postgres/llama_index/vector_stores/postgres/base.py @@ -712,6 +712,28 @@ async def _aquery_with_score( for item in res.all() ] + def _build_filters_query( + self, + limit: int, + metadata_filters: Optional[MetadataFilters] = None, + ) -> Any: + from sqlalchemy import select + from sqlalchemy.sql import text + + stmt = ( + select( # type: ignore + self._table_class.id, + self._table_class.node_id, + self._table_class.text, + self._table_class.metadata_, + self._table_class.text_search_tsv.label("rank"), + ) + .order_by(text("rank desc")) + ) + + # type: ignore + return self._apply_filters_and_limit(stmt, limit, metadata_filters) + def _build_sparse_query( self, query_str: Optional[str], @@ -799,6 +821,24 @@ def _sparse_query_with_rank( for item in res.all() ] + def _query_filters_only( + self, + limit: int = 10, + metadata_filters: Optional[MetadataFilters] = None, + ) -> List[DBEmbeddingRow]: + stmt = self._build_filters_query(limit, metadata_filters) + with self._session() as session, session.begin(): + res = session.execute(stmt) + return [ + DBEmbeddingRow( + node_id=item.node_id, + text=item.text, + metadata=item.metadata_, + similarity=item.rank, + ) + for item in res.all() + ] + async def _async_hybrid_query( self, query: VectorStoreQuery, **kwargs: Any ) -> List[DBEmbeddingRow]: @@ -880,6 +920,9 @@ async def aquery( self._initialize() if query.mode == VectorStoreQueryMode.HYBRID: results = await self._async_hybrid_query(query, **kwargs) + elif query.mode == VectorStoreQueryMode.FILTERS: + filters_top_k = query.filters_top_k or query.similarity_top_k + results = self._query_filters_only(filters_top_k, query.filters) elif query.mode in [ VectorStoreQueryMode.SPARSE, VectorStoreQueryMode.TEXT_SEARCH, @@ -904,6 +947,9 @@ def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResul self._initialize() if query.mode == VectorStoreQueryMode.HYBRID: results = self._hybrid_query(query, **kwargs) + elif query.mode == VectorStoreQueryMode.FILTERS: + filters_top_k = query.filters_top_k or query.similarity_top_k + results = self._query_filters_only(filters_top_k, query.filters) elif query.mode in [ VectorStoreQueryMode.SPARSE, VectorStoreQueryMode.TEXT_SEARCH,