centre-for-humanities-computing · Zafarhussain87 · Dec 16, 2025 · Dec 16, 2025 · Dec 16, 2025 · Dec 16, 2025
diff --git a/openapi/lex-db.yaml b/openapi/lex-db.yaml
@@ -73,6 +73,39 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/HTTPValidationError'
+  /api/hybrid-search/indexes/{index_name}/query:
+    post:
+      tags:
+      - lex-db
+      summary: Perform hybrid search combining semantic and full-text search
+      description: Perform hybrid search using RRF fusion.
+      operationId: hybrid_search
+      parameters:
+      - name: index_name
+        in: path
+        required: true
+        schema:
+          type: string
+          title: Index Name
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/HybridSearchRequest'
+      responses:
+        '200':
+          description: Successful Response
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HybridSearchResults'
+        '422':
+          description: Validation Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPValidationError'
   /api/articles:
     get:
       tags:
@@ -385,3 +418,84 @@ components:
       - results
       title: VectorSearchResults
       description: Result of a vector search.
+    HybridSearchRequest:
+      properties:
+        query_text:
+          type: string
+          title: Query Text
+        top_k:
+          type: integer
+          title: Top K
+          default: 10
+        top_k_semantic:
+          type: integer
+          title: Top K Semantic
+          default: 50
+        top_k_fts:
+          type: integer
+          title: Top K Fts
+          default: 50
+        rrf_k:
+          type: integer
+          title: Rrf K
+          default: 60
+      type: object
+      required:
+      - query_text
+      title: HybridSearchRequest
+      description: Hybrid search request model.
+    HybridSearchResults:
+      properties:
+        results:
+          items:
+            $ref: '#/components/schemas/LexDbHybridSearchSearchResult'
+          type: array
+          title: Results
+      type: object
+      required:
+      - results
+      title: HybridSearchResults
+      description: Results of a hybrid search.
+    LexDbHybridSearchSearchResult:
+      properties:
+        rank:
+          type: integer
+          title: Rank
+        article_id:
+          type: integer
+          title: Article Id
+        article_headword:
+          type: string
+          title: Article Headword
+        chunk_sequence:
+          type: integer
+          title: Chunk Sequence
+        chunk_text:
+          type: string
+          title: Chunk Text
+        rrf_score:
+          type: number
+          title: Rrf Score
+        semantic_rank:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          title: Semantic Rank
+        fts_rank:
+          anyOf:
+          - type: integer
+          - type: 'null'
+          title: Fts Rank
+        source:
+          type: string
+          title: Source
+      type: object
+      required:
+      - rank
+      - article_id
+      - article_headword
+      - chunk_sequence
+      - chunk_text
+      - rrf_score
+      - source
+      title: LexDbHybridSearchSearchResult
diff --git a/src/lex_llm/api/connectors/lex_db_connector.py b/src/lex_llm/api/connectors/lex_db_connector.py
@@ -7,6 +7,7 @@
 from lex_db_api.api_client import ApiClient
 from lex_db_api.configuration import Configuration
 from lex_db_api.models.vector_search_request import VectorSearchRequest
+from lex_db_api.models.hybrid_search_request import HybridSearchRequest
 
 lexdb_client = ApiClient(
     configuration=Configuration(host=os.getenv("DB_HOST", "http://localhost:8000"))
@@ -59,3 +60,41 @@ async def vector_search(
             print(f"Error connecting to LexDB: {e}")
             # TODO: more robust error handling/logging
             return []
+
+    async def hybrid_search(
+        self,
+        query: str,
+        top_k: int = 10,
+        top_k_semantic: int = 50,
+        top_k_fts: int = 50,
+        rrf_k: int = 60,
+        index_name: str = "article_embeddings_e5",
+    ) -> List[LexArticle]:
+        """Performs hybrid search using RRF fusion via the lex-db API."""
+
+        try:
+            hybrid_req = HybridSearchRequest(
+                query_text=query,
+                top_k=top_k,
+                top_k_semantic=top_k_semantic,
+                top_k_fts=top_k_fts,
+                rrf_k=rrf_k,
+            )
+
+            hybrid_search_result = lexdb_api.hybrid_search(index_name, hybrid_req)
+
+            if hybrid_search_result.results:
+                return [
+                    LexArticle(
+                        id=result.article_id,
+                        title=result.article_headword,
+                        text=result.chunk_text,
+                        url=f"https://lex.dk/{result.article_headword}",
+                    )
+                    for result in hybrid_search_result.results
+                ]
+
+            return []
+        except httpx.RequestError as e:
+            print(f"Error connecting to LexDB: {e}")
+            return []
diff --git a/src/lex_llm/tools/search_knowledge_base.py b/src/lex_llm/tools/search_knowledge_base.py
@@ -8,14 +8,15 @@
 def search_knowledge_base(
     index_name: str = "openai_large_3_sections",
     top_k: int = 10,
+    search_method: str = "vector_search",
 ) -> Callable[[Dict[str, Any], EventEmitter], AsyncGenerator[None, None]]:
     """
     Creates a knowledge base search step with the specified parameters.
 
     Args:
         index_name: The name of the vector index to search
         top_k: Number of top results to retrieve
-
+        search_method: Search method to use - one of: "vector_search", "hybrid_search"
     Returns:
         An async generator function compatible with the Orchestrator
     """
@@ -27,9 +28,14 @@ async def search_knowledge_base(
         lex_db_connector = LexDBConnector()
         user_input = context.get("user_input", "")
 
-        documents = await lex_db_connector.vector_search(
-            query=user_input, top_k=top_k, index_name=index_name
-        )
+        if search_method == "hybrid_search":
+            documents = await lex_db_connector.hybrid_search(
+                query=user_input, top_k=top_k, index_name=index_name
+            )
+        else:  # Default to vector_search
+            documents = await lex_db_connector.vector_search(
+                query=user_input, top_k=top_k, index_name=index_name
+            )
         context["retrieved_docs"] = documents
         yield
 

diff --git a/src/lex_llm/workflows/beta_workflow_v1_large.py b/src/lex_llm/workflows/beta_workflow_v1_large.py
@@ -14,6 +14,7 @@ def get_workflow(request: WorkflowRunRequest) -> Orchestrator:
             search_knowledge_base(
                 index_name="article_embeddings_e5",
                 top_k=10,
+                search_method="hybrid_search",
             ),
             generate_response_with_sources(
                 llm_provider=OpenRouterProvider(