Skip to content

Commit 88ced66

Browse files
authored
✨ Now documents and chunks support semantic search
2 parents 54cf613 + 94d893b commit 88ced66

File tree

13 files changed

+908
-100
lines changed

13 files changed

+908
-100
lines changed

backend/apps/vectordatabase_app.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from fastapi import APIRouter, Body, Depends, Header, HTTPException, Path, Query
66
from fastapi.responses import JSONResponse
77

8-
from consts.model import IndexingResponse
8+
from consts.model import HybridSearchRequest, IndexingResponse
99
from nexent.vector_database.base import VectorDatabaseCore
1010
from services.vectordatabase_service import (
1111
ElasticSearchService,
@@ -226,3 +226,32 @@ def get_index_chunks(
226226
f"Error getting chunks for index '{index_name}': {error_msg}")
227227
raise HTTPException(
228228
status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=f"Error getting chunks: {error_msg}")
229+
230+
231+
@router.post("/search/hybrid")
232+
async def hybrid_search(
233+
payload: HybridSearchRequest,
234+
vdb_core: VectorDatabaseCore = Depends(get_vector_db_core),
235+
authorization: Optional[str] = Header(None),
236+
):
237+
"""Run a hybrid (accurate + semantic) search across indices."""
238+
try:
239+
_, tenant_id = get_current_user_id(authorization)
240+
result = ElasticSearchService.search_hybrid(
241+
index_names=payload.index_names,
242+
query=payload.query,
243+
tenant_id=tenant_id,
244+
top_k=payload.top_k,
245+
weight_accurate=payload.weight_accurate,
246+
vdb_core=vdb_core,
247+
)
248+
return JSONResponse(status_code=HTTPStatus.OK, content=result)
249+
except ValueError as exc:
250+
raise HTTPException(status_code=HTTPStatus.BAD_REQUEST,
251+
detail=str(exc))
252+
except Exception as exc:
253+
logger.error(f"Hybrid search failed: {exc}", exc_info=True)
254+
raise HTTPException(
255+
status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
256+
detail=f"Error executing hybrid search: {str(exc)}",
257+
)

backend/consts/model.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,18 @@ class IndexingResponse(BaseModel):
175175
total_submitted: int
176176

177177

178+
class HybridSearchRequest(BaseModel):
179+
"""Request payload for hybrid knowledge-base searches."""
180+
query: str = Field(..., min_length=1,
181+
description="Search query text")
182+
index_names: List[str] = Field(..., min_items=1,
183+
description="List of index names to search")
184+
top_k: int = Field(10, ge=1, le=100,
185+
description="Number of results to return")
186+
weight_accurate: float = Field(0.5, ge=0.0, le=1.0,
187+
description="Weight applied to accurate search scores")
188+
189+
178190
# Request models
179191
class ProcessParams(BaseModel):
180192
chunking_strategy: Optional[str] = "basic"

backend/services/vectordatabase_service.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -996,3 +996,66 @@ def get_index_chunks(
996996
error_msg = f"Error retrieving chunks from index {index_name}: {str(e)}"
997997
logger.error(error_msg)
998998
raise Exception(error_msg)
999+
1000+
@staticmethod
1001+
def search_hybrid(
1002+
*,
1003+
index_names: List[str],
1004+
query: str,
1005+
tenant_id: str,
1006+
top_k: int = 10,
1007+
weight_accurate: float = 0.5,
1008+
vdb_core: VectorDatabaseCore = Depends(get_vector_db_core),
1009+
):
1010+
"""
1011+
Execute a hybrid search that blends accurate and semantic scoring.
1012+
"""
1013+
try:
1014+
if not tenant_id:
1015+
raise ValueError("Tenant ID is required for hybrid search")
1016+
if not query or not query.strip():
1017+
raise ValueError("Query text is required for hybrid search")
1018+
if not index_names:
1019+
raise ValueError("At least one index name is required")
1020+
if top_k <= 0:
1021+
raise ValueError("top_k must be greater than 0")
1022+
if weight_accurate < 0 or weight_accurate > 1:
1023+
raise ValueError("weight_accurate must be between 0 and 1")
1024+
1025+
embedding_model = get_embedding_model(tenant_id)
1026+
if not embedding_model:
1027+
raise ValueError(
1028+
"No embedding model configured for the current tenant")
1029+
1030+
start_time = time.perf_counter()
1031+
raw_results = vdb_core.hybrid_search(
1032+
index_names=index_names,
1033+
query_text=query,
1034+
embedding_model=embedding_model,
1035+
top_k=top_k,
1036+
weight_accurate=weight_accurate,
1037+
)
1038+
elapsed_ms = int((time.perf_counter() - start_time) * 1000)
1039+
1040+
formatted_results = []
1041+
for item in raw_results:
1042+
document = dict(item.get("document", {}))
1043+
document["score"] = item.get("score")
1044+
document["index"] = item.get("index")
1045+
if "scores" in item:
1046+
document["score_details"] = item["scores"]
1047+
formatted_results.append(document)
1048+
1049+
return {
1050+
"results": formatted_results,
1051+
"total": len(formatted_results),
1052+
"query_time_ms": elapsed_ms,
1053+
}
1054+
except ValueError:
1055+
raise
1056+
except Exception as exc:
1057+
logger.error(
1058+
f"Hybrid search failed for indices {index_names}: {exc}",
1059+
exc_info=True,
1060+
)
1061+
raise Exception(f"Error executing hybrid search: {str(exc)}")

0 commit comments

Comments
 (0)