diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-azurepostgresql/llama_index/vector_stores/azure_postgres/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-azurepostgresql/llama_index/vector_stores/azure_postgres/base.py index f7e1979fd2..e4d9974b77 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-azurepostgresql/llama_index/vector_stores/azure_postgres/base.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-azurepostgresql/llama_index/vector_stores/azure_postgres/base.py @@ -16,6 +16,7 @@ MetadataFilter, MetadataFilters, VectorStoreQuery, + VectorStoreQueryMode, VectorStoreQueryResult, ) from llama_index.core.vector_stores.utils import ( @@ -284,6 +285,13 @@ def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResul filter_expression=metadata_filters_to_sql(query.filters), **kwargs, ) + if query.mode == VectorStoreQueryMode.HYBRID: + text_results = self._full_text_search( + query_str=query.query_str, + **kwargs, + ) + results = self._dedup_results(results + text_results) + nodes = [] similarities = [] ids = [] diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-azurepostgresql/llama_index/vector_stores/azure_postgres/common/_base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-azurepostgresql/llama_index/vector_stores/azure_postgres/common/_base.py index 9a30a9929a..feb47f8eda 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-azurepostgresql/llama_index/vector_stores/azure_postgres/common/_base.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-azurepostgresql/llama_index/vector_stores/azure_postgres/common/_base.py @@ -714,3 +714,105 @@ def _get_by_ids(self, ids: Sequence[str], /) -> list[dict[str, Any]]: for result in resultset ] return documents + + def _full_text_search( + self, + query_str: str, + k: int = 4, + language: str = "english", + **kwargs: Any, + ) -> list[tuple[dict, float, None]]: + """Run a Postgres full-text search using plainto_tsquery and return ranked results. + + Args: + query_str: The free-text query string to search for. + k: Maximum number of results to return. + language: The text search configuration/language to use (e.g. 'english'). + **kwargs: Reserved for future options; currently ignored. + + Returns: + List of tuples (document_dict, rank, None). Document dict contains id, content, and metadata. + """ + with ( + self.connection_pool.connection() as conn, + conn.cursor(row_factory=dict_row) as cursor, + ): + # normalize metadata column(s) + metadata_columns: list[str] + if isinstance(self.metadata_column, list): + metadata_columns = [ + col if isinstance(col, str) else col[0] + for col in self.metadata_column + ] + elif isinstance(self.metadata_column, str): + metadata_columns = [self.metadata_column] + else: + metadata_columns = [] + + sql_query = sql.SQL( + """ + SELECT {id_col}, {content_col}, + rank() OVER ( + ORDER BY ts_rank_cd( + to_tsvector({lang}, {content_col}), + plainto_tsquery({lang}, %(q)s) + ) DESC + ) AS rank + FROM {table} + WHERE plainto_tsquery({lang}, %(q)s) @@ to_tsvector({lang}, {content_col}) + ORDER BY rank + LIMIT %(top_k)s + """ + ).format( + id_col=sql.Identifier(self.id_column), + content_col=sql.Identifier(self.content_column), + lang=sql.Literal(language), + table=sql.Identifier(self.schema_name, self.table_name), + ) + + cursor.execute(sql_query, {"q": query_str, "top_k": k}) + rows = cursor.fetchall() + + results: list[tuple[dict, float, None]] = [] + for row in rows: + doc = { + "id": row[self.id_column], + "content": row[self.content_column], + "metadata": ( + row[metadata_columns[0]] + if isinstance(self.metadata_column, str) + else {col: row[col] for col in metadata_columns} + ), + } + rank_val = float(row["rank"]) if row.get("rank") is not None else 0.0 + results.append((doc, rank_val, None)) + + return results + + def _dedup_results( + self, results: list[tuple[dict, float, Any]] + ) -> list[tuple[dict, float, Any]]: + """Deduplicate search results by document id, preserving order. + + Accepts a list of tuples (document_dict, score, optional_embedding) where + document_dict contains at least the id column (self.id_column) or 'id'. + Returns a filtered list keeping the first occurrence of each id. + """ + seen_ids: set = set() + deduped: list[tuple[dict, float, Any]] = [] + for doc, score, emb in results: + # robustly get id value using configured id_column or fallback to 'id' + doc_id = doc.get(self.id_column) if isinstance(doc, dict) else None + if doc_id is None: + doc_id = doc.get("id") if isinstance(doc, dict) else None + + # If there's no id, treat the row as unique and keep it + if doc_id is None: + deduped.append((doc, score, emb)) + continue + + if doc_id not in seen_ids: + deduped.append((doc, score, emb)) + seen_ids.add(doc_id) + + return deduped diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-azurepostgresql/tests/llama_index/test_vectorstore.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-azurepostgresql/tests/llama_index/test_vectorstore.py index 5c22b77fef..c28e3b3a5a 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-azurepostgresql/tests/llama_index/test_vectorstore.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-azurepostgresql/tests/llama_index/test_vectorstore.py @@ -16,6 +16,7 @@ from llama_index.core.vector_stores.types import ( MetadataFilters, VectorStoreQuery, + VectorStoreQueryMode, ) from llama_index.vector_stores.azure_postgres import AzurePGVectorStore from llama_index.vector_stores.azure_postgres.common import DiskANN @@ -316,16 +317,18 @@ def test_clear( assert not remaining_set, "All document IDs should have been deleted" @pytest.mark.parametrize( - ["query", "embedding", "k", "filters"], + ["query", "embedding", "k", "filters", "mode"], [ - ("query about cats", [0.99] * 1536, 2, None), - ("query about animals", [0.5] * 1536, 3, None), - ("query about cats", [0.99] * 1536, 2, "filter1"), - ("query about cats", [0.99] * 1536, 2, "filter2"), + ("query about cats", [0.99] * 1536, 2, None, None), + ("query about cats", [0.99] * 1536, 2, None, "hybrid"), + ("query about animals", [0.5] * 1536, 3, None, None), + ("query about cats", [0.99] * 1536, 2, "filter1", None), + ("query about cats", [0.99] * 1536, 2, "filter2", None), ], indirect=["filters"], ids=[ "search-cats", + "search-cats-hybrid", "search-animals", "search-cats-filtered", "search-cats-multifiltered", @@ -338,6 +341,7 @@ def test_query( embedding: list[float], k: int, filters: MetadataFilters | None, + mode: str | None, ): """Run a similarity query and assert returned documents match expectations. @@ -350,6 +354,11 @@ def test_query( query_embedding=embedding, similarity_top_k=k, filters=filters, + mode=( + VectorStoreQueryMode.HYBRID + if mode == "hybrid" + else VectorStoreQueryMode.DEFAULT + ), ) results = vectorstore.query(query=vsquery) diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-azurepostgresql/uv.lock b/llama-index-integrations/vector_stores/llama-index-vector-stores-azurepostgresql/uv.lock index abafd4ae21..02cf9583ed 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-azurepostgresql/uv.lock +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-azurepostgresql/uv.lock @@ -1889,7 +1889,7 @@ type = [ requires-dist = [ { name = "aiohttp", specifier = "~=3.0" }, { name = "azure-identity", specifier = "~=1.0" }, - { name = "llama-index-core", specifier = "~=0.13.0" }, + { name = "llama-index-core", specifier = ">=0.13,<0.15" }, { name = "numpy", specifier = "~=2.0" }, { name = "pgvector", specifier = "~=0.4.0" }, { name = "psycopg", extras = ["binary", "pool"], specifier = "~=3.0" },