Skip to content

Commit a839b33

Browse files
committed
Improvements to RAGAS code
1 parent 6471747 commit a839b33

File tree

9 files changed

+6823
-196
lines changed

9 files changed

+6823
-196
lines changed

docs/evaluation.md

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,13 @@ Follow these steps to evaluate the quality of the answers generated by the RAG f
1111

1212
## Deploy a GPT-4 model
1313

14-
1514
1. Run this command to tell `azd` to deploy a GPT-4 level model for evaluation:
1615

1716
```shell
1817
azd env set USE_EVAL true
1918
```
2019

21-
2. Set the capacity to the highest possible value to ensure that the evaluation runs quickly.
20+
2. Set the capacity to the highest possible value to ensure that the evaluation runs relatively quickly. Even with a high capacity, it can take a long time to generate ground truth data and run bulk evaluations.
2221

2322
```shell
2423
azd env set AZURE_OPENAI_EVAL_DEPLOYMENT_CAPACITY 100
@@ -47,9 +46,17 @@ Modify the search terms and tasks in `evals/generate_config.json` to match your
4746
Generate ground truth data by running the following command:
4847

4948
```bash
50-
python evals/generate_ground_truth_data.py
49+
python evals/generate_ground_truth.py --numquestions=200 --numsearchdocs=1000
5150
```
5251

52+
The options are:
53+
54+
* `numquestions`: The number of questions to generate. We suggest at least 200.
55+
* `numsearchdocs`: The number of documents (chunks) to retrieve from your search index. You can leave off the option to fetch all documents, but that will significantly increase time it takes to generate ground truth data. You may want to at least start with a subset.
56+
* `kgfile`: An existing RAGAS knowledge base JSON file, which is usually `ground_truth_kg.json`. You may want to specify this if you already created a knowledge base and just want to tweak the question generation steps.
57+
58+
🕰️ This may take a long time, possibly several hours, depending on the size of the search index.
59+
5360
Review the generated data in `evals/ground_truth.jsonl` after running that script, removing any question/answer pairs that don't seem like realistic user input.
5461
5562
## Run bulk evaluation
@@ -63,6 +70,8 @@ Run the evaluation script by running the following command:
6370
python evals/evaluate.py
6471
```
6572
73+
🕰️ This may take a long time, possibly several hours, depending on the number of ground truth questions.
74+
6675
## Review the evaluation results
6776
6877
The evaluation script will output a summary of the evaluation results, inside the `evals/results` directory.

evals/generate_config.json

Lines changed: 0 additions & 37 deletions
This file was deleted.

evals/generate_ground_truth.py

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
import argparse
2+
import json
3+
import logging
4+
import os
5+
import pathlib
6+
import re
7+
8+
from azure.identity import AzureDeveloperCliCredential, get_bearer_token_provider
9+
from azure.search.documents import SearchClient
10+
from dotenv_azd import load_azd_env
11+
from langchain_core.documents import Document as LCDocument
12+
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
13+
from ragas.embeddings import LangchainEmbeddingsWrapper
14+
from ragas.llms import LangchainLLMWrapper
15+
from ragas.testset import TestsetGenerator
16+
from ragas.testset.graph import KnowledgeGraph, Node, NodeType
17+
from ragas.testset.transforms import apply_transforms, default_transforms
18+
from rich.logging import RichHandler
19+
20+
logger = logging.getLogger("ragapp")
21+
22+
root_dir = pathlib.Path(__file__).parent
23+
24+
25+
def get_azure_credential():
26+
AZURE_TENANT_ID = os.getenv("AZURE_TENANT_ID")
27+
if AZURE_TENANT_ID:
28+
logger.info("Setting up Azure credential using AzureDeveloperCliCredential with tenant_id %s", AZURE_TENANT_ID)
29+
azure_credential = AzureDeveloperCliCredential(tenant_id=AZURE_TENANT_ID, process_timeout=60)
30+
else:
31+
logger.info("Setting up Azure credential using AzureDeveloperCliCredential for home tenant")
32+
azure_credential = AzureDeveloperCliCredential(process_timeout=60)
33+
return azure_credential
34+
35+
36+
def get_search_documents(azure_credential, num_search_documents=None) -> str:
37+
search_client = SearchClient(
38+
endpoint=f"https://{os.getenv('AZURE_SEARCH_SERVICE')}.search.windows.net",
39+
index_name=os.getenv("AZURE_SEARCH_INDEX"),
40+
credential=azure_credential,
41+
)
42+
all_documents = []
43+
if num_search_documents is None:
44+
num_search_documents = 100000
45+
response = search_client.search(search_text="*", top=num_search_documents).by_page()
46+
for page in response:
47+
page = list(page)
48+
all_documents.extend(page)
49+
return all_documents
50+
51+
52+
def generate_ground_truth_ragas(num_questions=200, num_search_documents=None, kg_file=None):
53+
azure_credential = get_azure_credential()
54+
azure_openai_api_version = os.getenv("AZURE_OPENAI_API_VERSION") or "2024-06-01"
55+
azure_endpoint = f"https://{os.getenv('AZURE_OPENAI_SERVICE')}.openai.azure.com"
56+
azure_ad_token_provider = get_bearer_token_provider(
57+
azure_credential, "https://cognitiveservices.azure.com/.default"
58+
)
59+
generator_llm = LangchainLLMWrapper(
60+
AzureChatOpenAI(
61+
openai_api_version=azure_openai_api_version,
62+
azure_endpoint=azure_endpoint,
63+
azure_ad_token_provider=azure_ad_token_provider,
64+
azure_deployment=os.getenv("AZURE_OPENAI_EVAL_DEPLOYMENT"),
65+
model=os.environ["AZURE_OPENAI_EVAL_MODEL"],
66+
validate_base_url=False,
67+
)
68+
)
69+
70+
# init the embeddings for answer_relevancy, answer_correctness and answer_similarity
71+
generator_embeddings = LangchainEmbeddingsWrapper(
72+
AzureOpenAIEmbeddings(
73+
openai_api_version=azure_openai_api_version,
74+
azure_endpoint=azure_endpoint,
75+
azure_ad_token_provider=azure_ad_token_provider,
76+
azure_deployment=os.getenv("AZURE_OPENAI_EMB_DEPLOYMENT"),
77+
model=os.environ["AZURE_OPENAI_EMB_MODEL_NAME"],
78+
)
79+
)
80+
81+
# Load or create the knowledge graph
82+
if kg_file and os.path.exists(kg_file):
83+
logger.info("Loading existing knowledge graph from %s", kg_file)
84+
kg = KnowledgeGraph.load(kg_file)
85+
else:
86+
# Make a knowledge_graph from Azure AI Search documents
87+
search_docs = get_search_documents(azure_credential, num_search_documents)
88+
# Create the transforms
89+
transforms = default_transforms(
90+
documents=[LCDocument(page_content=doc["content"]) for doc in search_docs],
91+
llm=generator_llm,
92+
embedding_model=generator_embeddings,
93+
)
94+
95+
# Convert the documents to RAGAS nodes
96+
nodes = []
97+
for doc in search_docs:
98+
content = doc["content"]
99+
citation = doc["sourcepage"]
100+
node = Node(
101+
type=NodeType.DOCUMENT,
102+
properties={
103+
"page_content": f"[[{citation}]]: {content}",
104+
"document_metadata": {"citation": citation},
105+
},
106+
)
107+
nodes.append(node)
108+
109+
kg = KnowledgeGraph(nodes=nodes)
110+
kg.save(root_dir / "ground_truth_kg.json")
111+
112+
apply_transforms(kg, transforms)
113+
generator = TestsetGenerator(llm=generator_llm, embedding_model=generator_embeddings, knowledge_graph=kg)
114+
dataset = generator.generate(testset_size=num_questions, with_debugging_logs=True)
115+
116+
qa_pairs = []
117+
for sample in dataset.samples:
118+
question = sample.eval_sample.user_input
119+
truth = sample.eval_sample.reference
120+
# Grab the citation in square brackets from the reference_contexts and add it to the truth
121+
citations = []
122+
for context in sample.eval_sample.reference_contexts:
123+
match = re.search(r"\[\[(.*?)\]\]", context)
124+
if match:
125+
citation = match.group(1)
126+
citations.append(f"[{citation}]")
127+
truth += " " + " ".join(citations)
128+
qa_pairs.append({"question": question, "truth": truth})
129+
130+
with open(root_dir / "ground_truth.jsonl", "a") as f:
131+
for qa_pair in qa_pairs:
132+
f.write(json.dumps(qa_pair) + "\n")
133+
134+
135+
if __name__ == "__main__":
136+
logging.basicConfig(
137+
level=logging.WARNING, format="%(message)s", datefmt="[%X]", handlers=[RichHandler(rich_tracebacks=True)]
138+
)
139+
logger.setLevel(logging.INFO)
140+
load_azd_env()
141+
142+
parser = argparse.ArgumentParser(description="Generate ground truth data using AI Search index and RAGAS.")
143+
parser.add_argument("--numsearchdocs", type=int, help="Specify the number of search results to fetch")
144+
parser.add_argument("--numquestions", type=int, help="Specify the number of questions to generate.", default=200)
145+
parser.add_argument("--kgfile", type=str, help="Specify the path to an existing knowledge graph file")
146+
147+
args = parser.parse_args()
148+
149+
generate_ground_truth_ragas(
150+
num_search_documents=args.numsearchdocs, num_questions=args.numquestions, kg_file=args.kgfile
151+
)

evals/generate_ragas.py

Lines changed: 0 additions & 119 deletions
This file was deleted.

0 commit comments

Comments
 (0)