Skip to content

Commit f7a3254

Browse files
authored
💾 feat: Implement LRU caching for query embedding & bump deps. (#132)
* 💾 feat: Implement LRU caching for query embedding * 🔧 fix: Update cryptography package version to 44.0.1 in requirements files, remove starlette * 🔧 fix: Update FastAPI version to 0.115.12 in requirements files
1 parent 1ebf334 commit f7a3254

File tree

3 files changed

+31
-11
lines changed

3 files changed

+31
-11
lines changed

app/routes/document_routes.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,30 @@
66
import aiofiles.os
77
from shutil import copyfileobj
88
from typing import List, Iterable
9-
from fastapi import APIRouter, Request, UploadFile, HTTPException, File, Form, Body, Query, status
9+
from fastapi import (
10+
APIRouter,
11+
Request,
12+
UploadFile,
13+
HTTPException,
14+
File,
15+
Form,
16+
Body,
17+
Query,
18+
status,
19+
)
1020
from langchain_core.documents import Document
1121
from langchain_core.runnables import run_in_executor
1222
from langchain_text_splitters import RecursiveCharacterTextSplitter
23+
from functools import lru_cache
1324

1425
from app.config import logger, vector_store, RAG_UPLOAD_DIR, CHUNK_SIZE, CHUNK_OVERLAP
1526
from app.constants import ERROR_MESSAGES
16-
from app.models import StoreDocument, QueryRequestBody, DocumentResponse, QueryMultipleBody
27+
from app.models import (
28+
StoreDocument,
29+
QueryRequestBody,
30+
DocumentResponse,
31+
QueryMultipleBody,
32+
)
1733
from app.services.vector_store.async_pg_vector import AsyncPgVector
1834
from app.utils.document_loader import get_loader, clean_text, process_documents
1935
from app.utils.health import is_health_ok
@@ -135,6 +151,12 @@ async def delete_documents(document_ids: List[str] = Body(...)):
135151
raise HTTPException(status_code=500, detail=str(e))
136152

137153

154+
# Cache the embedding function with LRU cache
155+
@lru_cache(maxsize=128)
156+
def get_cached_query_embedding(query: str):
157+
return vector_store.embedding_function.embed_query(query)
158+
159+
138160
@router.post("/query")
139161
async def query_embeddings_by_file_id(
140162
body: QueryRequestBody,
@@ -150,7 +172,7 @@ async def query_embeddings_by_file_id(
150172
authorized_documents = []
151173

152174
try:
153-
embedding = vector_store.embedding_function.embed_query(body.query)
175+
embedding = get_cached_query_embedding(body.query)
154176

155177
if isinstance(vector_store, AsyncPgVector):
156178
documents = await run_in_executor(
@@ -543,7 +565,7 @@ async def embed_file_upload(
543565
async def query_embeddings_by_file_ids(body: QueryMultipleBody):
544566
try:
545567
# Get the embedding of the query text
546-
embedding = vector_store.embedding_function.embed_query(body.query)
568+
embedding = get_cached_query_embedding(body.query)
547569

548570
# Perform similarity search with the query embedding and filter by the file_ids in metadata
549571
if isinstance(vector_store, AsyncPgVector):
@@ -582,4 +604,3 @@ async def query_embeddings_by_file_ids(body: QueryMultipleBody):
582604
traceback.format_exc(),
583605
)
584606
raise HTTPException(status_code=500, detail=str(e))
585-

requirements.lite.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ langchain-openai==0.2.11
44
langchain-core==0.3.25
55
sqlalchemy==2.0.28
66
python-dotenv==1.0.1
7-
fastapi==0.110.0
7+
fastapi==0.115.12
88
psycopg2-binary==2.9.9
99
pgvector==0.2.5
1010
uvicorn==0.28.0
@@ -24,7 +24,7 @@ rapidocr-onnxruntime==1.3.24
2424
opencv-python-headless==4.9.0.80
2525
pymongo==4.6.3
2626
langchain-mongodb==0.2.0
27-
cryptography==43.0.1
27+
cryptography==44.0.1
2828
python-magic==0.4.27
2929
python-pptx==0.6.23
3030
xlrd==2.0.1

requirements.txt

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ langchain_text_splitters==0.3.3
77
boto3==1.34.144
88
sqlalchemy==2.0.28
99
python-dotenv==1.0.1
10-
fastapi==0.110.0
10+
fastapi==0.115.12
1111
psycopg2-binary==2.9.9
1212
pgvector==0.2.5
1313
uvicorn==0.28.0
@@ -30,9 +30,8 @@ pymongo==4.6.3
3030
langchain-mongodb==0.2.0
3131
langchain-ollama==0.2.0
3232
langchain-huggingface==0.1.0
33-
cryptography==43.0.1
33+
cryptography==44.0.1
3434
python-magic==0.4.27
3535
python-pptx==0.6.23
3636
xlrd==2.0.1
37-
pydantic==2.9.2
38-
starlette==0.36.3
37+
pydantic==2.9.2

0 commit comments

Comments
 (0)