Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 114 additions & 0 deletions openapi/lex-db.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,39 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/HTTPValidationError'
/api/hybrid-search/indexes/{index_name}/query:
post:
tags:
- lex-db
summary: Perform hybrid search combining semantic and full-text search
description: Perform hybrid search using RRF fusion.
operationId: hybrid_search
parameters:
- name: index_name
in: path
required: true
schema:
type: string
title: Index Name
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/HybridSearchRequest'
responses:
'200':
description: Successful Response
content:
application/json:
schema:
$ref: '#/components/schemas/HybridSearchResults'
'422':
description: Validation Error
content:
application/json:
schema:
$ref: '#/components/schemas/HTTPValidationError'
/api/articles:
get:
tags:
Expand Down Expand Up @@ -385,3 +418,84 @@ components:
- results
title: VectorSearchResults
description: Result of a vector search.
HybridSearchRequest:
properties:
query_text:
type: string
title: Query Text
top_k:
type: integer
title: Top K
default: 10
top_k_semantic:
type: integer
title: Top K Semantic
default: 50
top_k_fts:
type: integer
title: Top K Fts
default: 50
rrf_k:
type: integer
title: Rrf K
default: 60
type: object
required:
- query_text
title: HybridSearchRequest
description: Hybrid search request model.
HybridSearchResults:
properties:
results:
items:
$ref: '#/components/schemas/LexDbHybridSearchSearchResult'
type: array
title: Results
type: object
required:
- results
title: HybridSearchResults
description: Results of a hybrid search.
LexDbHybridSearchSearchResult:
properties:
rank:
type: integer
title: Rank
article_id:
type: integer
title: Article Id
article_headword:
type: string
title: Article Headword
chunk_sequence:
type: integer
title: Chunk Sequence
chunk_text:
type: string
title: Chunk Text
rrf_score:
type: number
title: Rrf Score
semantic_rank:
anyOf:
- type: integer
- type: 'null'
title: Semantic Rank
fts_rank:
anyOf:
- type: integer
- type: 'null'
title: Fts Rank
source:
type: string
title: Source
type: object
required:
- rank
- article_id
- article_headword
- chunk_sequence
- chunk_text
- rrf_score
- source
title: LexDbHybridSearchSearchResult
39 changes: 39 additions & 0 deletions src/lex_llm/api/connectors/lex_db_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from lex_db_api.api_client import ApiClient
from lex_db_api.configuration import Configuration
from lex_db_api.models.vector_search_request import VectorSearchRequest
from lex_db_api.models.hybrid_search_request import HybridSearchRequest

lexdb_client = ApiClient(
configuration=Configuration(host=os.getenv("DB_HOST", "http://localhost:8000"))
Expand Down Expand Up @@ -59,3 +60,41 @@ async def vector_search(
print(f"Error connecting to LexDB: {e}")
# TODO: more robust error handling/logging
return []

async def hybrid_search(
self,
query: str,
top_k: int = 10,
top_k_semantic: int = 50,
top_k_fts: int = 50,
rrf_k: int = 60,
index_name: str = "article_embeddings_e5",
) -> List[LexArticle]:
"""Performs hybrid search using RRF fusion via the lex-db API."""

try:
hybrid_req = HybridSearchRequest(
query_text=query,
top_k=top_k,
top_k_semantic=top_k_semantic,
top_k_fts=top_k_fts,
rrf_k=rrf_k,
)

hybrid_search_result = lexdb_api.hybrid_search(index_name, hybrid_req)

if hybrid_search_result.results:
return [
LexArticle(
id=result.article_id,
title=result.article_headword,
text=result.chunk_text,
url=f"https://lex.dk/{result.article_headword}",
)
for result in hybrid_search_result.results
]

return []
except httpx.RequestError as e:
print(f"Error connecting to LexDB: {e}")
return []
14 changes: 10 additions & 4 deletions src/lex_llm/tools/search_knowledge_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,15 @@
def search_knowledge_base(
index_name: str = "openai_large_3_sections",
top_k: int = 10,
search_method: str = "vector_search",
) -> Callable[[Dict[str, Any], EventEmitter], AsyncGenerator[None, None]]:
"""
Creates a knowledge base search step with the specified parameters.

Args:
index_name: The name of the vector index to search
top_k: Number of top results to retrieve

search_method: Search method to use - one of: "vector_search", "hybrid_search"
Returns:
An async generator function compatible with the Orchestrator
"""
Expand All @@ -27,9 +28,14 @@ async def search_knowledge_base(
lex_db_connector = LexDBConnector()
user_input = context.get("user_input", "")

documents = await lex_db_connector.vector_search(
query=user_input, top_k=top_k, index_name=index_name
)
if search_method == "hybrid_search":
documents = await lex_db_connector.hybrid_search(
query=user_input, top_k=top_k, index_name=index_name
)
else: # Default to vector_search
documents = await lex_db_connector.vector_search(
query=user_input, top_k=top_k, index_name=index_name
)
context["retrieved_docs"] = documents
yield

Expand Down
1 change: 1 addition & 0 deletions src/lex_llm/workflows/beta_workflow_v1_large.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should avoid changing the existing workflows and rather make new ones, even if it means having more duplicate code.

Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ def get_workflow(request: WorkflowRunRequest) -> Orchestrator:
search_knowledge_base(
index_name="article_embeddings_e5",
top_k=10,
search_method="hybrid_search",
),
generate_response_with_sources(
llm_provider=OpenRouterProvider(
Expand Down
Loading