1+ import numpy as np
2+ from niw_np_rag .app .rag import RAGPipeline
3+ import json
4+
5+ with open (r".\evaluation\datasets\niw_qna.json" , "r" , encoding = "utf-8" ) as f :
6+ dataset = json .load (f )
7+
8+ # Initialize your RAG pipeline
9+ rag = RAGPipeline (
10+ pdfs_path = "./data/uscis_aao_pdfs" ,
11+ vector_store_path = "./data/chunks_vector_store_faiss" ,
12+ semantic_chunking = True
13+ )
14+
15+ retriever = rag .get_retriever (k = 5 )
16+
17+
18+ def evaluate_recall_at_k (dataset , retriever , k = 5 ):
19+ """
20+ dataset: list of dicts with fields:
21+ - question
22+ - answer (not required for recall)
23+ - context (ground-truth context from source docs)
24+ """
25+
26+ hits = []
27+
28+ for item in dataset :
29+ question = item ["question" ]
30+ ground_truth_context = item ["context" ]
31+ # print(question, ground_truth_context)
32+
33+ # Retrieve top-k documents
34+ retrieved_docs = retriever .invoke (question )
35+
36+ # Extract retrieved text
37+ retrieved_texts = [doc .page_content for doc in retrieved_docs [:k ]]
38+
39+ # Check if ground-truth context appears in retrieved docs
40+ hit = any (
41+ ground_truth_context in retrieved_doc
42+ for retrieved_doc in retrieved_texts
43+ )
44+
45+ hits .append (1 if hit else 0 )
46+
47+ recall_k = np .mean (hits )
48+
49+ print (f"Recall@{ k } : { recall_k :.4f} " )
50+ return recall_k
51+
52+ evaluate_recall_at_k (dataset , retriever , k = 15 )
0 commit comments