Skip to content

Commit 3a91c39

Browse files
committed
backend
0 parents  commit 3a91c39

File tree

16 files changed

+480
-0
lines changed

16 files changed

+480
-0
lines changed

backend/.gitignore

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# Python
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
*.so
6+
.Python
7+
env/
8+
venv/
9+
ENV/
10+
build/
11+
develop-eggs/
12+
dist/
13+
downloads/
14+
eggs/
15+
.eggs/
16+
lib/
17+
lib64/
18+
parts/
19+
sdist/
20+
var/
21+
wheels/
22+
*.egg-info/
23+
.installed.cfg
24+
*.egg
25+
26+
# Virtual environments
27+
.venv
28+
pip-log.txt
29+
pip-delete-this-directory.txt
30+
31+
# IDE
32+
.vscode/
33+
.idea/
34+
*.swp
35+
*.swo
36+
*~
37+
.DS_Store
38+
39+
# Environment variables
40+
.env
41+
.env.local
42+
.env.*.local
43+
44+
# Database
45+
*.db
46+
*.sqlite
47+
*.sqlite3
48+
49+
# Logs
50+
*.log
51+
logs/
52+
53+
# Testing
54+
.pytest_cache/
55+
.coverage
56+
htmlcov/
57+
58+
# OS
59+
.DS_Store
60+
Thumbs.db
61+
62+
63+
venv

backend/app/api/ask.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
from fastapi import APIRouter, Depends, HTTPException
2+
from pydantic import BaseModel
3+
import faiss
4+
import numpy as np
5+
import json
6+
7+
from app.config import VECTOR_DIR
8+
from app.deps import get_session_id, get_gemini_key
9+
from app.utils.gemini import get_client, embed_texts, generate_answer
10+
11+
router = APIRouter()
12+
13+
14+
class Question(BaseModel):
15+
question: str
16+
17+
18+
@router.post("/ask")
19+
def ask(
20+
payload: Question,
21+
session_id: str = Depends(get_session_id),
22+
api_key: str = Depends(get_gemini_key),
23+
):
24+
# 1. Check vector store
25+
session_vector = VECTOR_DIR / session_id
26+
index_path = session_vector / "index.faiss"
27+
chunks_path = session_vector / "chunks.json"
28+
29+
if not index_path.exists() or not chunks_path.exists():
30+
raise HTTPException(
31+
status_code=400,
32+
detail="No document uploaded for this session"
33+
)
34+
35+
# 2. Gemini client (per request)
36+
client = get_client(api_key)
37+
38+
# 3. Load FAISS index
39+
index = faiss.read_index(str(index_path))
40+
41+
# 4. Embed question
42+
q_embedding = embed_texts([payload.question], client)[0]
43+
44+
# 5. Similarity search
45+
_, indices = index.search(
46+
np.array([q_embedding]).astype("float32"),
47+
k=5
48+
)
49+
50+
# 6. Load chunk metadata
51+
with open(chunks_path, "r", encoding="utf-8") as f:
52+
chunk_map = json.load(f)
53+
54+
# 7. Build real context from retrieved chunks
55+
retrieved_chunks = []
56+
for idx in indices[0]:
57+
text = chunk_map.get(str(idx))
58+
if text:
59+
retrieved_chunks.append(text)
60+
61+
context = "\n\n".join(retrieved_chunks)
62+
63+
# 8. Prompt (strict RAG)
64+
prompt = f"""
65+
You are answering strictly from the document context below.
66+
67+
If the answer is not present in the context, respond with:
68+
"Not found in the document"
69+
70+
Context:
71+
{context}
72+
73+
Question:
74+
{payload.question}
75+
"""
76+
77+
# 9. Generate answer
78+
answer = generate_answer(prompt, client)
79+
80+
return {"answer": answer}

backend/app/api/clear.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
from fastapi import APIRouter, Depends
2+
import shutil
3+
from app.config import UPLOAD_DIR, VECTOR_DIR
4+
from app.deps import get_session_id
5+
6+
router = APIRouter()
7+
8+
@router.delete("/clear")
9+
def clear_session(session_id: str = Depends(get_session_id)):
10+
shutil.rmtree(UPLOAD_DIR / session_id, ignore_errors=True)
11+
shutil.rmtree(VECTOR_DIR / session_id, ignore_errors=True)
12+
return {"cleared": True}

backend/app/api/upload.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
from fastapi import APIRouter, UploadFile, Depends, HTTPException
2+
from pathlib import Path
3+
4+
from app.config import UPLOAD_DIR, VECTOR_DIR
5+
from app.deps import get_session_id, get_gemini_key
6+
from app.utils.pdf_loader import extract_text_from_pdf
7+
from app.utils.text_splitter import split_text
8+
from app.utils.gemini import get_client, embed_texts
9+
from app.utils.vector_store import create_or_load_index, add_vectors, save_index
10+
import json
11+
12+
13+
router = APIRouter()
14+
15+
16+
@router.post("/upload")
17+
async def upload_file(
18+
file: UploadFile,
19+
session_id: str = Depends(get_session_id),
20+
api_key: str = Depends(get_gemini_key),
21+
):
22+
# 1. Validate file
23+
if not file.filename.lower().endswith(".pdf"):
24+
raise HTTPException(status_code=415, detail="Only PDF supported")
25+
26+
# 2. Create Gemini client (per request)
27+
client = get_client(api_key)
28+
29+
# 3. Save file
30+
session_upload = UPLOAD_DIR / session_id
31+
session_upload.mkdir(parents=True, exist_ok=True)
32+
33+
file_path = session_upload / file.filename
34+
with open(file_path, "wb") as f:
35+
f.write(await file.read())
36+
37+
# 4. Extract + split text
38+
text = extract_text_from_pdf(str(file_path))
39+
chunks = split_text(text)
40+
41+
if not chunks:
42+
raise HTTPException(status_code=400, detail="No text found in PDF")
43+
44+
# 5. Generate embeddings
45+
embeddings = embed_texts(chunks, client)
46+
47+
# 6. Create / append vector store
48+
session_vector = VECTOR_DIR / session_id
49+
session_vector.mkdir(parents=True, exist_ok=True)
50+
51+
index_path = session_vector / "index.faiss"
52+
index = create_or_load_index(index_path, len(embeddings[0]))
53+
54+
add_vectors(index, embeddings)
55+
save_index(index, index_path)
56+
57+
# ✅ SAVE CHUNK TEXT (REQUIRED FOR RAG)
58+
chunk_map = {str(i): chunk for i, chunk in enumerate(chunks)}
59+
60+
with open(session_vector / "chunks.json", "w", encoding="utf-8") as f:
61+
json.dump(chunk_map, f, ensure_ascii=False, indent=2)
62+
63+
64+
return {
65+
"status": "READY",
66+
"filename": file.filename,
67+
"chunks": len(chunks),
68+
}
69+
70+

backend/app/api/validate.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
from fastapi import APIRouter, Depends
2+
from app.deps import get_gemini_key
3+
from app.utils.gemini import get_client, embed_texts
4+
5+
router = APIRouter()
6+
7+
@router.post("/validate-key")
8+
def validate_key(api_key: str = Depends(get_gemini_key)):
9+
client = get_client(api_key)
10+
embed_texts(["ping"], client)
11+
return {"valid": True}

backend/app/config.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
from pathlib import Path
2+
3+
BASE_DIR = Path(__file__).resolve().parent.parent
4+
5+
UPLOAD_DIR = BASE_DIR / "tmp" / "uploads"
6+
VECTOR_DIR = BASE_DIR / "vectorstore"
7+
8+
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
9+
VECTOR_DIR.mkdir(parents=True, exist_ok=True)

backend/app/deps.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
from fastapi import Header, HTTPException
2+
3+
def get_session_id(x_session_id: str = Header(...)):
4+
return x_session_id
5+
6+
def get_gemini_key(x_gemini_key: str = Header(None)):
7+
if not x_gemini_key:
8+
raise HTTPException(status_code=401, detail="Gemini API key required")
9+
return x_gemini_key

backend/app/main.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
from fastapi import FastAPI
2+
from app.api import validate, upload, ask, clear
3+
4+
app = FastAPI(title="RAG Backend")
5+
6+
app.include_router(validate.router, prefix="/api")
7+
app.include_router(upload.router, prefix="/api")
8+
app.include_router(ask.router, prefix="/api")
9+
app.include_router(clear.router, prefix="/api")

backend/app/utils/gemini.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
from google import genai
2+
3+
4+
def get_client(api_key: str):
5+
return genai.Client(api_key=api_key)
6+
7+
8+
def embed_texts(texts: list[str], client):
9+
embeddings = []
10+
for text in texts:
11+
result = client.models.embed_content(
12+
model="models/text-embedding-004",
13+
contents=text
14+
)
15+
embeddings.append(result.embeddings[0].values)
16+
return embeddings
17+
18+
19+
def generate_answer(prompt: str, client):
20+
response = client.models.generate_content(
21+
model="gemini-2.5-flash",
22+
contents=prompt
23+
)
24+
return response.text

backend/app/utils/pdf_loader.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
import fitz
2+
3+
def extract_text_from_pdf(path: str) -> str:
4+
doc = fitz.open(path)
5+
text = ""
6+
for page in doc:
7+
text += page.get_text()
8+
return text

0 commit comments

Comments
 (0)