run-llama · orhankislal · Sep 24, 2025 · Oct 7, 2025
diff --git a/...lama-index-vector-stores-azurepostgresql/llama_index/vector_stores/azure_postgres/base.py b/...lama-index-vector-stores-azurepostgresql/llama_index/vector_stores/azure_postgres/base.py
@@ -16,6 +16,7 @@
     MetadataFilter,
     MetadataFilters,
     VectorStoreQuery,
+    VectorStoreQueryMode,
     VectorStoreQueryResult,
 )
 from llama_index.core.vector_stores.utils import (
@@ -284,6 +285,13 @@ def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResul
             filter_expression=metadata_filters_to_sql(query.filters),
             **kwargs,
         )
+        if query.mode == VectorStoreQueryMode.HYBRID:
+            text_results = self._full_text_search(
+                query_str=query.query_str,
+                **kwargs,
+            )
+            results = self._dedup_results(results + text_results)
+
         nodes = []
         similarities = []
         ids = []

diff --git a/...ex-vector-stores-azurepostgresql/llama_index/vector_stores/azure_postgres/common/_base.py b/...ex-vector-stores-azurepostgresql/llama_index/vector_stores/azure_postgres/common/_base.py
@@ -714,3 +714,105 @@ def _get_by_ids(self, ids: Sequence[str], /) -> list[dict[str, Any]]:
                 for result in resultset
             ]
             return documents
+
+    def _full_text_search(
+        self,
+        query_str: str,
+        k: int = 4,
+        language: str = "english",
+        **kwargs: Any,
+    ) -> list[tuple[dict, float, None]]:
+        """Run a Postgres full-text search using plainto_tsquery and return ranked results.
+
+        Args:
+            query_str: The free-text query string to search for.
+            k: Maximum number of results to return.
+            language: The text search configuration/language to use (e.g. 'english').
+            **kwargs: Reserved for future options; currently ignored.
+
+        Returns:
+            List of tuples (document_dict, rank, None). Document dict contains id, content, and metadata.
+        """
+        with (
+            self.connection_pool.connection() as conn,
+            conn.cursor(row_factory=dict_row) as cursor,
+        ):
+            # normalize metadata column(s)
+            metadata_columns: list[str]
+            if isinstance(self.metadata_column, list):
+                metadata_columns = [
+                    col if isinstance(col, str) else col[0]
+                    for col in self.metadata_column
+                ]
+            elif isinstance(self.metadata_column, str):
+                metadata_columns = [self.metadata_column]
+            else:
+                metadata_columns = []
+
+            sql_query = sql.SQL(
+                """
+                SELECT {id_col}, {content_col},
+                    rank() OVER (
+                        ORDER BY ts_rank_cd(
+                            to_tsvector({lang}, {content_col}),
+                            plainto_tsquery({lang}, %(q)s)
+                        ) DESC
+                    ) AS rank
+                    FROM {table}
+                    WHERE plainto_tsquery({lang}, %(q)s) @@ to_tsvector({lang}, {content_col})
+                ORDER BY rank
+                    LIMIT %(top_k)s
+                """
+            ).format(
+                id_col=sql.Identifier(self.id_column),
+                content_col=sql.Identifier(self.content_column),
+                lang=sql.Literal(language),
+                table=sql.Identifier(self.schema_name, self.table_name),
+            )
+
+            cursor.execute(sql_query, {"q": query_str, "top_k": k})
+            rows = cursor.fetchall()
+
+        results: list[tuple[dict, float, None]] = []
+        for row in rows:
+            doc = {
+                "id": row[self.id_column],
+                "content": row[self.content_column],
+                "metadata": (
+                    row[metadata_columns[0]]
+                    if isinstance(self.metadata_column, str)
+                    else {col: row[col] for col in metadata_columns}
+                ),
+            }
+            rank_val = float(row["rank"]) if row.get("rank") is not None else 0.0
+            results.append((doc, rank_val, None))
+
+        return results
+
+    def _dedup_results(
+        self, results: list[tuple[dict, float, Any]]
+    ) -> list[tuple[dict, float, Any]]:
+        """Deduplicate search results by document id, preserving order.
+
+        Accepts a list of tuples (document_dict, score, optional_embedding) where
+        document_dict contains at least the id column (self.id_column) or 'id'.
+        Returns a filtered list keeping the first occurrence of each id.
+        """
+        seen_ids: set = set()
+        deduped: list[tuple[dict, float, Any]] = []
+        for doc, score, emb in results:
+            # robustly get id value using configured id_column or fallback to 'id'
+            doc_id = doc.get(self.id_column) if isinstance(doc, dict) else None
+            if doc_id is None:
+                doc_id = doc.get("id") if isinstance(doc, dict) else None
+
+            # If there's no id, treat the row as unique and keep it
+            if doc_id is None:
+                deduped.append((doc, score, emb))
+                continue
+
+            if doc_id not in seen_ids:
+                deduped.append((doc, score, emb))
+                seen_ids.add(doc_id)
+
+        return deduped
diff --git a/...or_stores/llama-index-vector-stores-azurepostgresql/tests/llama_index/test_vectorstore.py b/...or_stores/llama-index-vector-stores-azurepostgresql/tests/llama_index/test_vectorstore.py
@@ -16,6 +16,7 @@
 from llama_index.core.vector_stores.types import (
     MetadataFilters,
     VectorStoreQuery,
+    VectorStoreQueryMode,
 )
 from llama_index.vector_stores.azure_postgres import AzurePGVectorStore
 from llama_index.vector_stores.azure_postgres.common import DiskANN
@@ -316,16 +317,18 @@ def test_clear(
         assert not remaining_set, "All document IDs should have been deleted"
 
     @pytest.mark.parametrize(
-        ["query", "embedding", "k", "filters"],
+        ["query", "embedding", "k", "filters", "mode"],
         [
-            ("query about cats", [0.99] * 1536, 2, None),
-            ("query about animals", [0.5] * 1536, 3, None),
-            ("query about cats", [0.99] * 1536, 2, "filter1"),
-            ("query about cats", [0.99] * 1536, 2, "filter2"),
+            ("query about cats", [0.99] * 1536, 2, None, None),
+            ("query about cats", [0.99] * 1536, 2, None, "hybrid"),
+            ("query about animals", [0.5] * 1536, 3, None, None),
+            ("query about cats", [0.99] * 1536, 2, "filter1", None),
+            ("query about cats", [0.99] * 1536, 2, "filter2", None),
         ],
         indirect=["filters"],
         ids=[
             "search-cats",
+            "search-cats-hybrid",
             "search-animals",
             "search-cats-filtered",
             "search-cats-multifiltered",
@@ -338,6 +341,7 @@ def test_query(
         embedding: list[float],
         k: int,
         filters: MetadataFilters | None,
+        mode: str | None,
     ):
         """Run a similarity query and assert returned documents match expectations.
 
@@ -350,6 +354,11 @@ def test_query(
             query_embedding=embedding,
             similarity_top_k=k,
             filters=filters,
+            mode=(
+                VectorStoreQueryMode.HYBRID
+                if mode == "hybrid"
+                else VectorStoreQueryMode.DEFAULT
+            ),
         )
         results = vectorstore.query(query=vsquery)
 

diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-azurepostgresql/uv.lock b/llama-index-integrations/vector_stores/llama-index-vector-stores-azurepostgresql/uv.lock