From 5d703484dfc1570cfc8f40ab563ea56457eb16e7 Mon Sep 17 00:00:00 2001 From: Rahul Bhatnagar Date: Tue, 18 Nov 2025 02:05:22 +0530 Subject: [PATCH] Prompt class + implementation in Context Precision --- .../metrics/collections/_context_precision.py | 31 ++-- src/ragas/prompt/metrics/__init__.py | 12 +- src/ragas/prompt/metrics/base_prompt.py | 152 ++++++++++++++++ src/ragas/prompt/metrics/context_precision.py | 171 +++++++----------- 4 files changed, 240 insertions(+), 126 deletions(-) create mode 100644 src/ragas/prompt/metrics/base_prompt.py diff --git a/src/ragas/metrics/collections/_context_precision.py b/src/ragas/metrics/collections/_context_precision.py index 91f7a1486..bb2af380b 100644 --- a/src/ragas/metrics/collections/_context_precision.py +++ b/src/ragas/metrics/collections/_context_precision.py @@ -4,26 +4,19 @@ from typing import List import numpy as np -from pydantic import BaseModel from ragas.metrics.collections.base import BaseMetric from ragas.metrics.result import MetricResult from ragas.prompt.metrics.context_precision import ( - context_precision_with_reference_prompt, - context_precision_without_reference_prompt, + ContextPrecisionInput, + ContextPrecisionOutput, + ContextPrecisionPrompt, ) if t.TYPE_CHECKING: from ragas.llms.base import InstructorBaseRagasLLM -class ContextPrecisionOutput(BaseModel): - """Structured output for context precision evaluation.""" - - reason: str - verdict: int - - class ContextPrecisionWithReference(BaseMetric): """ Modern v2 implementation of context precision with reference. @@ -79,6 +72,7 @@ def __init__( """ # Set attributes explicitly before calling super() self.llm = llm + self.prompt = ContextPrecisionPrompt() # Initialize prompt class once # Call super() for validation (without passing llm in kwargs) super().__init__(name=name, **kwargs) @@ -108,10 +102,12 @@ async def ascore( # Evaluate each retrieved context verdicts = [] for context in retrieved_contexts: - prompt = context_precision_with_reference_prompt( - user_input, context, reference + # Create input data and generate prompt + input_data = ContextPrecisionInput( + question=user_input, context=context, answer=reference ) - result = await self.llm.agenerate(prompt, ContextPrecisionOutput) + prompt_string = self.prompt.to_string(input_data) + result = await self.llm.agenerate(prompt_string, ContextPrecisionOutput) verdicts.append(result.verdict) # Calculate average precision @@ -196,6 +192,7 @@ def __init__( """ # Set attributes explicitly before calling super() self.llm = llm + self.prompt = ContextPrecisionPrompt() # Initialize prompt class once # Call super() for validation (without passing llm in kwargs) super().__init__(name=name, **kwargs) @@ -225,10 +222,12 @@ async def ascore( # Evaluate each retrieved context verdicts = [] for context in retrieved_contexts: - prompt = context_precision_without_reference_prompt( - user_input, context, response + # Create input data and generate prompt + input_data = ContextPrecisionInput( + question=user_input, context=context, answer=response ) - result = await self.llm.agenerate(prompt, ContextPrecisionOutput) + prompt_string = self.prompt.to_string(input_data) + result = await self.llm.agenerate(prompt_string, ContextPrecisionOutput) verdicts.append(result.verdict) # Calculate average precision diff --git a/src/ragas/prompt/metrics/__init__.py b/src/ragas/prompt/metrics/__init__.py index 7025ab1fd..cb889ace1 100644 --- a/src/ragas/prompt/metrics/__init__.py +++ b/src/ragas/prompt/metrics/__init__.py @@ -4,16 +4,16 @@ from ragas.prompt.metrics.answer_relevance import answer_relevancy_prompt from ragas.prompt.metrics.common import nli_statement_prompt, statement_generator_prompt from ragas.prompt.metrics.context_precision import ( - context_precision_prompt, - context_precision_with_reference_prompt, - context_precision_without_reference_prompt, + ContextPrecisionInput, + ContextPrecisionOutput, + ContextPrecisionPrompt, ) __all__ = [ "answer_relevancy_prompt", - "context_precision_prompt", - "context_precision_with_reference_prompt", - "context_precision_without_reference_prompt", + "ContextPrecisionPrompt", + "ContextPrecisionInput", + "ContextPrecisionOutput", "correctness_classifier_prompt", "nli_statement_prompt", "statement_generator_prompt", diff --git a/src/ragas/prompt/metrics/base_prompt.py b/src/ragas/prompt/metrics/base_prompt.py new file mode 100644 index 000000000..15f7fd442 --- /dev/null +++ b/src/ragas/prompt/metrics/base_prompt.py @@ -0,0 +1,152 @@ +"""Base prompt class for metrics with structured input/output models.""" + +import json +import typing as t +from abc import ABC + +from pydantic import BaseModel + +# Type variables for generics +InputModel = t.TypeVar("InputModel", bound=BaseModel) +OutputModel = t.TypeVar("OutputModel", bound=BaseModel) + + +class BasePrompt(ABC, t.Generic[InputModel, OutputModel]): + """ + Base class for structured prompts with type-safe input/output models. + + Attributes: + input_model: Pydantic model class for input validation + output_model: Pydantic model class for output schema generation + instruction: Task description for the LLM + examples: List of (input, output) example pairs for few-shot learning + language: Language for the prompt (default: "english") + """ + + # Must be set by subclasses + input_model: t.Type[InputModel] + output_model: t.Type[OutputModel] + instruction: str + examples: t.List[t.Tuple[InputModel, OutputModel]] + language: str = "english" + + def to_string(self, data: InputModel) -> str: + """ + Convert prompt with input data to complete prompt string for LLM. + + Args: + data: Input data instance (validated by input_model) + + Returns: + Complete prompt string ready for LLM + """ + # Generate JSON schema for output + output_schema = json.dumps(self.output_model.model_json_schema()) + + # Generate examples section + examples_str = self._generate_examples() + + # Convert input data to JSON + input_json = data.model_dump_json(indent=4, exclude_none=True) + + # Build complete prompt (matches existing function format) + return f"""{self.instruction} +Please return the output in a JSON format that complies with the following schema as specified in JSON Schema: +{output_schema}Do not use single quotes in your response but double quotes,properly escaped with a backslash. + +{examples_str} +----------------------------- + +Now perform the same with the following input +input: {input_json} +Output: """ + + def _generate_examples(self) -> str: + """ + Generate examples section of the prompt. + + Returns: + Formatted examples string or empty string if no examples + """ + if not self.examples: + return "" + + example_strings = [] + for idx, (input_data, output_data) in enumerate(self.examples): + example_strings.append( + f"Example {idx + 1}\n" + f"Input: {input_data.model_dump_json(indent=4)}\n" + f"Output: {output_data.model_dump_json(indent=4)}" + ) + + return "--------EXAMPLES-----------\n" + "\n\n".join(example_strings) + + async def adapt( + self, + target_language: str, + llm, + adapt_instruction: bool = False, + ) -> "BasePrompt[InputModel, OutputModel]": + """ + Adapt the prompt to a new language using minimal translation. + + Args: + target_language: Target language (e.g., "spanish", "french") + llm: LLM instance for translation + adapt_instruction: Whether to adapt instruction text (default: False) + + Returns: + New prompt instance adapted to the target language + """ + import copy + + # Create adapted prompt + new_prompt = copy.deepcopy(self) + new_prompt.language = target_language + + # Translate instruction if requested + if adapt_instruction: + instruction_prompt = f"Translate this to {target_language}, keep technical terms: {self.instruction}" + try: + response = await llm.agenerate(instruction_prompt) + new_prompt.instruction = str(response).strip() + except Exception: + # Keep original if translation fails + pass + + # Translate examples (simplified approach) + translated_examples = [] + for input_ex, output_ex in self.examples: + try: + # Simple per-example translation + example_prompt = f"""Translate this example to {target_language}, keep the same structure: + +Input: {input_ex.model_dump_json()} +Output: {output_ex.model_dump_json()} + +Return as: Input: {{translated_input_json}} Output: {{translated_output_json}}""" + + response = await llm.agenerate(example_prompt) + + # Try to extract translated JSON (basic parsing) + response_str = str(response) + if "Input:" in response_str and "Output:" in response_str: + parts = response_str.split("Output:") + input_part = parts[0].replace("Input:", "").strip() + output_part = parts[1].strip() + + translated_input = self.input_model.model_validate_json(input_part) + translated_output = self.output_model.model_validate_json( + output_part + ) + translated_examples.append((translated_input, translated_output)) + else: + # Fallback to original + translated_examples.append((input_ex, output_ex)) + + except Exception: + # Fallback to original example if translation fails + translated_examples.append((input_ex, output_ex)) + + new_prompt.examples = translated_examples + return new_prompt diff --git a/src/ragas/prompt/metrics/context_precision.py b/src/ragas/prompt/metrics/context_precision.py index 23a8b2d9c..41a83324a 100644 --- a/src/ragas/prompt/metrics/context_precision.py +++ b/src/ragas/prompt/metrics/context_precision.py @@ -1,104 +1,67 @@ -"""Context Precision prompts - V1-identical using exact PydanticPrompt.to_string() output.""" - -import json - - -def context_precision_prompt(question: str, context: str, answer: str) -> str: - """ - V1-identical context precision prompt - matches PydanticPrompt.to_string() exactly. - - Args: - question: The question being asked - context: The context to evaluate for usefulness - answer: The answer that was generated - - Returns: - V1-identical prompt string for the LLM - """ - # Format inputs exactly like V1's model_dump_json(indent=4, exclude_none=True) - safe_question = json.dumps(question, ensure_ascii=False) - safe_context = json.dumps(context, ensure_ascii=False) - safe_answer = json.dumps(answer, ensure_ascii=False) - - return f"""Given question, answer and context verify if the context was useful in arriving at the given answer. Give verdict as "1" if useful and "0" if not with json output. -Please return the output in a JSON format that complies with the following schema as specified in JSON Schema: -{{"properties": {{"reason": {{"description": "Reason for verification", "title": "Reason", "type": "string"}}, "verdict": {{"description": "Binary (0/1) verdict of verification", "title": "Verdict", "type": "integer"}}}}, "required": ["reason", "verdict"], "title": "Verification", "type": "object"}}Do not use single quotes in your response but double quotes,properly escaped with a backslash. - ---------EXAMPLES----------- -Example 1 -Input: {{ - "question": "What can you tell me about Albert Einstein?", - "context": "Albert Einstein (14 March 1879 – 18 April 1955) was a German-born theoretical physicist, widely held to be one of the greatest and most influential scientists of all time. Best known for developing the theory of relativity, he also made important contributions to quantum mechanics, and was thus a central figure in the revolutionary reshaping of the scientific understanding of nature that modern physics accomplished in the first decades of the twentieth century. His mass–energy equivalence formula E = mc2, which arises from relativity theory, has been called 'the world's most famous equation'. He received the 1921 Nobel Prize in Physics 'for his services to theoretical physics, and especially for his discovery of the law of the photoelectric effect', a pivotal step in the development of quantum theory. His work is also known for its influence on the philosophy of science. In a 1999 poll of 130 leading physicists worldwide by the British journal Physics World, Einstein was ranked the greatest physicist of all time. His intellectual achievements and originality have made Einstein synonymous with genius.", - "answer": "Albert Einstein, born on 14 March 1879, was a German-born theoretical physicist, widely held to be one of the greatest and most influential scientists of all time. He received the 1921 Nobel Prize in Physics for his services to theoretical physics." -}} -Output: {{ - "reason": "The provided context was indeed useful in arriving at the given answer. The context includes key information about Albert Einstein's life and contributions, which are reflected in the answer.", - "verdict": 1 -}} - -Example 2 -Input: {{ - "question": "who won 2020 icc world cup?", - "context": "The 2022 ICC Men's T20 World Cup, held from October 16 to November 13, 2022, in Australia, was the eighth edition of the tournament. Originally scheduled for 2020, it was postponed due to the COVID-19 pandemic. England emerged victorious, defeating Pakistan by five wickets in the final to clinch their second ICC Men's T20 World Cup title.", - "answer": "England" -}} -Output: {{ - "reason": "the context was useful in clarifying the situation regarding the 2020 ICC World Cup and indicating that England was the winner of the tournament that was intended to be held in 2020 but actually took place in 2022.", - "verdict": 1 -}} - -Example 3 -Input: {{ - "question": "What is the tallest mountain in the world?", - "context": "The Andes is the longest continental mountain range in the world, located in South America. It stretches across seven countries and features many of the highest peaks in the Western Hemisphere. The range is known for its diverse ecosystems, including the high-altitude Andean Plateau and the Amazon rainforest.", - "answer": "Mount Everest." -}} -Output: {{ - "reason": "the provided context discusses the Andes mountain range, which, while impressive, does not include Mount Everest or directly relate to the question about the world's tallest mountain.", - "verdict": 0 -}} ------------------------------ - -Now perform the same with the following input -input: {{ - "question": {safe_question}, - "context": {safe_context}, - "answer": {safe_answer} -}} -Output: """ - - -def context_precision_with_reference_prompt( - question: str, context: str, reference: str -) -> str: - """ - V1-identical context precision prompt for WithReference variant. - - Args: - question: The question being asked - context: The context to evaluate for usefulness - reference: The reference answer to compare against - - Returns: - V1-identical prompt string for the LLM - """ - # This variant uses "reference" as the "answer" parameter in the prompt - return context_precision_prompt(question, context, reference) - - -def context_precision_without_reference_prompt( - question: str, context: str, response: str -) -> str: - """ - V1-identical context precision prompt for WithoutReference variant. - - Args: - question: The question being asked - context: The context to evaluate for usefulness - response: The response that was generated - - Returns: - V1-identical prompt string for the LLM - """ - # This variant uses "response" as the "answer" parameter in the prompt - return context_precision_prompt(question, context, response) +"""Context Precision prompt - Class-based implementation with structured input/output.""" + +from pydantic import BaseModel, Field + +from .base_prompt import BasePrompt + + +class ContextPrecisionInput(BaseModel): + """Input model for context precision evaluation.""" + + question: str = Field(..., description="The question being asked") + context: str = Field(..., description="The context to evaluate for usefulness") + answer: str = Field( + ..., description="The answer/reference/response to compare against" + ) + + +class ContextPrecisionOutput(BaseModel): + """Structured output for context precision evaluation.""" + + reason: str = Field(..., description="Reason for verification") + verdict: int = Field(..., description="Binary (0/1) verdict of verification") + + +class ContextPrecisionPrompt(BasePrompt[ContextPrecisionInput, ContextPrecisionOutput]): + """Context precision evaluation prompt with structured input/output.""" + + input_model = ContextPrecisionInput + output_model = ContextPrecisionOutput + + instruction = 'Given question, answer and context verify if the context was useful in arriving at the given answer. Give verdict as "1" if useful and "0" if not with json output.' + + examples = [ + ( + ContextPrecisionInput( + question="What can you tell me about Albert Einstein?", + context="Albert Einstein (14 March 1879 – 18 April 1955) was a German-born theoretical physicist, widely held to be one of the greatest and most influential scientists of all time. Best known for developing the theory of relativity, he also made important contributions to quantum mechanics, and was thus a central figure in the revolutionary reshaping of the scientific understanding of nature that modern physics accomplished in the first decades of the twentieth century. His mass–energy equivalence formula E = mc2, which arises from relativity theory, has been called 'the world's most famous equation'. He received the 1921 Nobel Prize in Physics 'for his services to theoretical physics, and especially for his discovery of the law of the photoelectric effect', a pivotal step in the development of quantum theory. His work is also known for its influence on the philosophy of science. In a 1999 poll of 130 leading physicists worldwide by the British journal Physics World, Einstein was ranked the greatest physicist of all time. His intellectual achievements and originality have made Einstein synonymous with genius.", + answer="Albert Einstein, born on 14 March 1879, was a German-born theoretical physicist, widely held to be one of the greatest and most influential scientists of all time. He received the 1921 Nobel Prize in Physics for his services to theoretical physics.", + ), + ContextPrecisionOutput( + reason="The provided context was indeed useful in arriving at the given answer. The context includes key information about Albert Einstein's life and contributions, which are reflected in the answer.", + verdict=1, + ), + ), + ( + ContextPrecisionInput( + question="who won 2020 icc world cup?", + context="The 2022 ICC Men's T20 World Cup, held from October 16 to November 13, 2022, in Australia, was the eighth edition of the tournament. Originally scheduled for 2020, it was postponed due to the COVID-19 pandemic. England emerged victorious, defeating Pakistan by five wickets in the final to clinch their second ICC Men's T20 World Cup title.", + answer="England", + ), + ContextPrecisionOutput( + reason="the context was useful in clarifying the situation regarding the 2020 ICC World Cup and indicating that England was the winner of the tournament that was intended to be held in 2020 but actually took place in 2022.", + verdict=1, + ), + ), + ( + ContextPrecisionInput( + question="What is the tallest mountain in the world?", + context="The Andes is the longest continental mountain range in the world, located in South America. It stretches across seven countries and features many of the highest peaks in the Western Hemisphere. The range is known for its diverse ecosystems, including the high-altitude Andean Plateau and the Amazon rainforest.", + answer="Mount Everest.", + ), + ContextPrecisionOutput( + reason="the provided context discusses the Andes mountain range, which, while impressive, does not include Mount Everest or directly relate to the question about the world's tallest mountain.", + verdict=0, + ), + ), + ]