Skip to content

Commit 03538d0

Browse files
committed
Retrieval Evaluation
1 parent 5bbdf67 commit 03538d0

File tree

3 files changed

+53
-2
lines changed

3 files changed

+53
-2
lines changed
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import numpy as np
2+
from niw_np_rag.app.rag import RAGPipeline
3+
import json
4+
5+
with open(r".\evaluation\datasets\niw_qna.json", "r", encoding="utf-8") as f:
6+
dataset = json.load(f)
7+
8+
# Initialize your RAG pipeline
9+
rag = RAGPipeline(
10+
pdfs_path="./data/uscis_aao_pdfs",
11+
vector_store_path="./data/chunks_vector_store_faiss",
12+
semantic_chunking=True
13+
)
14+
15+
retriever = rag.get_retriever(k=5)
16+
17+
18+
def evaluate_recall_at_k(dataset, retriever, k=5):
19+
"""
20+
dataset: list of dicts with fields:
21+
- question
22+
- answer (not required for recall)
23+
- context (ground-truth context from source docs)
24+
"""
25+
26+
hits = []
27+
28+
for item in dataset:
29+
question = item["question"]
30+
ground_truth_context = item["context"]
31+
# print(question, ground_truth_context)
32+
33+
# Retrieve top-k documents
34+
retrieved_docs = retriever.invoke(question)
35+
36+
# Extract retrieved text
37+
retrieved_texts = [doc.page_content for doc in retrieved_docs[:k]]
38+
39+
# Check if ground-truth context appears in retrieved docs
40+
hit = any(
41+
ground_truth_context in retrieved_doc
42+
for retrieved_doc in retrieved_texts
43+
)
44+
45+
hits.append(1 if hit else 0)
46+
47+
recall_k = np.mean(hits)
48+
49+
print(f"Recall@{k}: {recall_k:.4f}")
50+
return recall_k
51+
52+
evaluate_recall_at_k(dataset, retriever, k=15)
0 Bytes
Binary file not shown.

niw_np_rag/app/rag.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,6 @@ def build_vector_store_Qdrant(self):
125125
)
126126
)
127127

128-
# ✅ Initialize QdrantVectorStore once
129128
vector_store = QdrantVectorStore(
130129
client=client,
131130
collection_name=collection_name,
@@ -137,7 +136,7 @@ def build_vector_store_Qdrant(self):
137136
for pdf_file in tqdm(filtered_pdf_files, desc="Processing PDFs for Qdrant"):
138137
try:
139138
texts = self.chunk_documents(pdf_file)
140-
vector_store.add_documents(texts) # ✅ add to existing collection
139+
vector_store.add_documents(texts)
141140
tqdm.write(f"[ADD] Added chunks from: {pdf_file}")
142141
count += 1
143142
except Exception as e:

0 commit comments

Comments
 (0)