Add RAGAS

pamelafox · pamelafox · commit dd6780e66d21 · 2024-12-13T16:57:06.000-08:00
diff --git a/evals/generate_ragas.py b/evals/generate_ragas.py
@@ -0,0 +1,125 @@
+import json
+import logging
+import os
+import pathlib
+import re
+
+import rich
+from azure.identity import AzureDeveloperCliCredential, get_bearer_token_provider
+from azure.search.documents import SearchClient
+from dotenv_azd import load_azd_env
+from langchain_community.document_loaders import JSONLoader
+from langchain_core.documents import Document as LCDocument
+from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
+from ragas.embeddings import LangchainEmbeddingsWrapper
+from ragas.llms import LangchainLLMWrapper
+from ragas.testset import TestsetGenerator
+from ragas.testset.graph import KnowledgeGraph, Node, NodeType
+from ragas.testset.transforms import apply_transforms, default_transforms
+
+logger = logging.getLogger("evals")
+
+load_azd_env()
+root_dir = pathlib.Path(__file__).parent.parent
+
+
+def get_azure_credential():
+    AZURE_TENANT_ID = os.getenv("AZURE_TENANT_ID")
+    if AZURE_TENANT_ID:
+        logger.info("Setting up Azure credential using AzureDeveloperCliCredential with tenant_id %s", AZURE_TENANT_ID)
+        azure_credential = AzureDeveloperCliCredential(tenant_id=AZURE_TENANT_ID, process_timeout=60)
+    else:
+        logger.info("Setting up Azure credential using AzureDeveloperCliCredential for home tenant")
+        azure_credential = AzureDeveloperCliCredential(process_timeout=60)
+    return azure_credential
+
+
+def get_search_documents(azure_credential) -> str:
+    search_client = SearchClient(
+        endpoint=f"https://{os.getenv('AZURE_SEARCH_SERVICE')}.search.windows.net",
+        index_name=os.getenv("AZURE_SEARCH_INDEX"),
+        credential=azure_credential,
+    )
+    search_results = search_client.search(search_text="*", top=10)
+    return [result for result in search_results]
+
+
+path = root_dir / "data/Json_Examples/2189.json"
+loader = JSONLoader(path, jq_schema=".Description")
+docs = loader.load()
+
+
+azure_credential = get_azure_credential()
+azure_openai_api_version = os.getenv("AZURE_OPENAI_API_VERSION") or "2024-06-01"
+azure_endpoint = f"https://{os.getenv('AZURE_OPENAI_SERVICE')}.openai.azure.com"
+azure_ad_token_provider = get_bearer_token_provider(azure_credential, "https://cognitiveservices.azure.com/.default")
+generator_llm = LangchainLLMWrapper(
+    AzureChatOpenAI(
+        openai_api_version=azure_openai_api_version,
+        azure_endpoint=azure_endpoint,
+        azure_ad_token_provider=azure_ad_token_provider,
+        azure_deployment=os.getenv("AZURE_OPENAI_EVAL_DEPLOYMENT"),
+        model=os.environ["AZURE_OPENAI_EVAL_MODEL"],
+        validate_base_url=False,
+    )
+)
+
+# init the embeddings for answer_relevancy, answer_correctness and answer_similarity
+generator_embeddings = LangchainEmbeddingsWrapper(
+    AzureOpenAIEmbeddings(
+        openai_api_version=azure_openai_api_version,
+        azure_endpoint=azure_endpoint,
+        azure_ad_token_provider=azure_ad_token_provider,
+        azure_deployment=os.getenv("AZURE_OPENAI_EMB_DEPLOYMENT"),
+        model=os.environ["AZURE_OPENAI_EMB_MODEL_NAME"],
+    )
+)
+
+# Let's make a knowledge_graph from Azure AI Search documents
+search_docs = get_search_documents(azure_credential)
+
+# create the transforms
+transforms = default_transforms(
+    documents=[LCDocument(page_content=doc["content"]) for doc in search_docs],
+    llm=generator_llm,
+    embedding_model=generator_embeddings,
+)
+
+# convert the documents to Ragas nodes
+nodes = []
+for doc in search_docs:
+    content = doc["content"]
+    citation = doc["sourcepage"]
+    node = Node(
+        type=NodeType.DOCUMENT,
+        properties={
+            "page_content": f"[[{citation}]]: {content}",
+            "document_metadata": {"citation": citation},
+        },
+    )
+    nodes.append(node)
+
+kg = KnowledgeGraph(nodes=nodes)
+apply_transforms(kg, transforms)
+
+generator = TestsetGenerator(llm=generator_llm, embedding_model=generator_embeddings, knowledge_graph=kg)
+dataset = generator.generate(testset_size=10, with_debugging_logs=True)
+
+qa_pairs = []
+for sample in dataset.samples:
+    rich.print(sample)
+    question = sample.eval_sample.user_input
+    truth = sample.eval_sample.reference
+    # Grab the citation in square brackets from the reference_contexts and add it to the truth
+    citations = []
+    for context in sample.eval_sample.reference_contexts:
+        match = re.search(r"\[\[(.*?)\]\]", context)
+        if match:
+            citation = match.group(1)
+            citations.append(f"[{citation}]")
+    truth += " " + " ".join(citations)
+    qa_pairs.append({"question": question, "truth": truth})
+
+with open(root_dir / "ground_truth_ragas.jsonl", "a") as f:
+    for qa_pair in qa_pairs:
+        f.write(json.dumps(qa_pair) + "\n")
diff --git a/evals/ground_truth_ragas.jsonl b/evals/ground_truth_ragas.jsonl
@@ -0,0 +1,10 @@
+{"question": "How does Contoso's partnership with Northwind Health benefit employees in terms of balance billing protection, and what role do in-network providers play in this arrangement?", "truth": "Contoso's partnership with Northwind Health benefits employees by offering the Northwind Standard plan, which includes balance billing protection. This protection ensures that employees are not subject to unexpected costs when visiting in-network providers. In-network providers are healthcare providers that have agreed to accept the Northwind Standard plan's payment terms, thereby protecting employees from balance billing. [Northwind_Standard_Benefits_Details.pdf#page=7]"}
+{"question": "What Northwind Health do for in-network?", "truth": "In-network providers have agreed to accept the amount of payment offered by Northwind Health, meaning that you pay the portion of the cost that is not covered by the insurance plan. In-network providers may also offer additional services or discounts that are not available to out-of-network providers. [Northwind_Standard_Benefits_Details.pdf#page=8]"}
+{"question": "So like, if I go to a doctor not in the Northwind Standard network, how much more do I gotta pay compared to in-network, and are there times when I gotta pay everything myself?", "truth": "If you choose an out-of-network provider under the Northwind Standard plan, you may be responsible for the entire cost of services, or a far greater portion of the cost than you would with an in-network provider. In some cases, Northwind Standard may offer coverage for out-of-network providers, but you may still have to pay more than you would for an in-network provider. [Northwind_Standard_Benefits_Details.pdf#page=10]"}
+{"question": "Does Northwind Health cover travel expenses for clinical trial participation?", "truth": "The Northwind Standard plan does not cover travel expenses associated with attending clinical trials. However, Northwind Health can provide information about nearby clinical trials and may be able to help with the cost of travel. [Northwind_Standard_Benefits_Details.pdf#page=24]"}
+{"question": "What should I consider when selecting a hospital under the Northwind Standard plan, and how does it affect my out-of-pocket expenses?", "truth": "When selecting a hospital under the Northwind Standard plan, you should consider the services offered by the hospital, such as physical therapy, nutrition counseling, or special programs for specific conditions. Additionally, you should be aware of the cost of care, including any co-pays, coinsurance, or additional charges for services not covered by your plan. Northwind Health may not cover the full cost of a specialist or non-emergency services, so it's important to be aware of these exceptions to manage your out-of-pocket expenses effectively. [Northwind_Standard_Benefits_Details.pdf#page=34]"}
+{"question": "How does Northwind Health's balance billing protection benefit participants when selecting in-network hospitals for their healthcare needs?", "truth": "Northwind Health's balance billing protection benefits participants by ensuring they are protected from unexpected costs when visiting in-network providers. This protection means that participants are not billed for the difference between what the provider charges and what the insurance company is willing to pay. By selecting in-network hospitals, participants can avoid balance billing, which can result in significant unexpected expenses. This allows participants to focus on selecting the best hospital for their healthcare needs without worrying about additional financial burdens. [Northwind_Standard_Benefits_Details.pdf#page=34] [Northwind_Standard_Benefits_Details.pdf#page=7]"}
+{"question": "What should patients consider about Northwind Health coverage when selecting a hospital and provider?", "truth": "Patients should consider that Northwind Health may not cover the full cost of a specialist or non-emergency services, and they should be aware of any additional costs such as co-pays or coinsurance. It is important to select an in-network hospital and provider, as in-network providers have agreed to accept the payment offered by Northwind Health, potentially offering additional services or discounts. Out-of-network providers may result in higher out-of-pocket costs for patients. [Northwind_Standard_Benefits_Details.pdf#page=34] [Northwind_Standard_Benefits_Details.pdf#page=8]"}
+{"question": "How does Northwind Health assist with balance billing protection and what should members consider regarding travel expenses for clinical trials?", "truth": "Northwind Health assists with balance billing protection by offering the Northwind Standard plan, which protects members from unexpected costs when visiting in-network providers. This means that members are not billed for the difference between what a provider charges and what the insurance company pays, as long as they visit in-network providers. However, when it comes to clinical trials, the Northwind Standard plan does not cover travel expenses associated with attending these trials. Members should be aware that they may need to travel to participate in clinical trials, and while Northwind Health can provide information about nearby trials, they may not cover the travel costs. It is important for members to consult with their doctor and understand the costs and benefits before participating in a clinical trial. [Northwind_Standard_Benefits_Details.pdf#page=7] [Northwind_Standard_Benefits_Details.pdf#page=24]"}
+{"question": "What are the benefits of choosing in-network providers with Northwind Health?", "truth": "Choosing in-network providers with Northwind Health offers several benefits. Firstly, you are protected from balance billing, which means you won't be billed for the difference between what the provider charges and what your insurance is willing to pay. In-network providers have agreed to accept the payment terms of the Northwind Standard plan, ensuring that you only pay the portion of the cost not covered by the insurance. Additionally, in-network providers may offer additional services or discounts that are not available with out-of-network providers, providing better value for your healthcare expenses. [Northwind_Standard_Benefits_Details.pdf#page=7] [Northwind_Standard_Benefits_Details.pdf#page=8]"}
+{"question": "Does Northwind Health cover travel expenses for clinical trials and what should participants consider regarding in-network providers?", "truth": "Northwind Health does not cover travel expenses associated with attending clinical trials under the Northwind Standard plan. Participants should consult with their doctor to ensure clinical trials are suitable for them and be aware of potential costs, as not all trials are free. Additionally, it is important to choose in-network providers whenever possible to minimize out-of-pocket expenses, as out-of-network providers may result in higher costs and lack of additional services or discounts. [Northwind_Standard_Benefits_Details.pdf#page=24] [Northwind_Standard_Benefits_Details.pdf#page=8]"}