66import aiofiles .os
77from shutil import copyfileobj
88from typing import List , Iterable
9- from fastapi import APIRouter , Request , UploadFile , HTTPException , File , Form , Body , Query , status
9+ from fastapi import (
10+ APIRouter ,
11+ Request ,
12+ UploadFile ,
13+ HTTPException ,
14+ File ,
15+ Form ,
16+ Body ,
17+ Query ,
18+ status ,
19+ )
1020from langchain_core .documents import Document
1121from langchain_core .runnables import run_in_executor
1222from langchain_text_splitters import RecursiveCharacterTextSplitter
23+ from functools import lru_cache
1324
1425from app .config import logger , vector_store , RAG_UPLOAD_DIR , CHUNK_SIZE , CHUNK_OVERLAP
1526from app .constants import ERROR_MESSAGES
16- from app .models import StoreDocument , QueryRequestBody , DocumentResponse , QueryMultipleBody
27+ from app .models import (
28+ StoreDocument ,
29+ QueryRequestBody ,
30+ DocumentResponse ,
31+ QueryMultipleBody ,
32+ )
1733from app .services .vector_store .async_pg_vector import AsyncPgVector
1834from app .utils .document_loader import get_loader , clean_text , process_documents
1935from app .utils .health import is_health_ok
@@ -135,6 +151,12 @@ async def delete_documents(document_ids: List[str] = Body(...)):
135151 raise HTTPException (status_code = 500 , detail = str (e ))
136152
137153
154+ # Cache the embedding function with LRU cache
155+ @lru_cache (maxsize = 128 )
156+ def get_cached_query_embedding (query : str ):
157+ return vector_store .embedding_function .embed_query (query )
158+
159+
138160@router .post ("/query" )
139161async def query_embeddings_by_file_id (
140162 body : QueryRequestBody ,
@@ -150,7 +172,7 @@ async def query_embeddings_by_file_id(
150172 authorized_documents = []
151173
152174 try :
153- embedding = vector_store . embedding_function . embed_query (body .query )
175+ embedding = get_cached_query_embedding (body .query )
154176
155177 if isinstance (vector_store , AsyncPgVector ):
156178 documents = await run_in_executor (
@@ -543,7 +565,7 @@ async def embed_file_upload(
543565async def query_embeddings_by_file_ids (body : QueryMultipleBody ):
544566 try :
545567 # Get the embedding of the query text
546- embedding = vector_store . embedding_function . embed_query (body .query )
568+ embedding = get_cached_query_embedding (body .query )
547569
548570 # Perform similarity search with the query embedding and filter by the file_ids in metadata
549571 if isinstance (vector_store , AsyncPgVector ):
@@ -582,4 +604,3 @@ async def query_embeddings_by_file_ids(body: QueryMultipleBody):
582604 traceback .format_exc (),
583605 )
584606 raise HTTPException (status_code = 500 , detail = str (e ))
585-
0 commit comments