Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 15 additions & 16 deletions src/ragas/metrics/collections/_context_precision.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,19 @@
from typing import List

import numpy as np
from pydantic import BaseModel

from ragas.metrics.collections.base import BaseMetric
from ragas.metrics.result import MetricResult
from ragas.prompt.metrics.context_precision import (
context_precision_with_reference_prompt,
context_precision_without_reference_prompt,
ContextPrecisionInput,
ContextPrecisionOutput,
ContextPrecisionPrompt,
)

if t.TYPE_CHECKING:
from ragas.llms.base import InstructorBaseRagasLLM


class ContextPrecisionOutput(BaseModel):
"""Structured output for context precision evaluation."""

reason: str
verdict: int


class ContextPrecisionWithReference(BaseMetric):
"""
Modern v2 implementation of context precision with reference.
Expand Down Expand Up @@ -79,6 +72,7 @@ def __init__(
"""
# Set attributes explicitly before calling super()
self.llm = llm
self.prompt = ContextPrecisionPrompt() # Initialize prompt class once

# Call super() for validation (without passing llm in kwargs)
super().__init__(name=name, **kwargs)
Expand Down Expand Up @@ -108,10 +102,12 @@ async def ascore(
# Evaluate each retrieved context
verdicts = []
for context in retrieved_contexts:
prompt = context_precision_with_reference_prompt(
user_input, context, reference
# Create input data and generate prompt
input_data = ContextPrecisionInput(
question=user_input, context=context, answer=reference
)
result = await self.llm.agenerate(prompt, ContextPrecisionOutput)
prompt_string = self.prompt.to_string(input_data)
result = await self.llm.agenerate(prompt_string, ContextPrecisionOutput)
verdicts.append(result.verdict)

# Calculate average precision
Expand Down Expand Up @@ -196,6 +192,7 @@ def __init__(
"""
# Set attributes explicitly before calling super()
self.llm = llm
self.prompt = ContextPrecisionPrompt() # Initialize prompt class once

# Call super() for validation (without passing llm in kwargs)
super().__init__(name=name, **kwargs)
Expand Down Expand Up @@ -225,10 +222,12 @@ async def ascore(
# Evaluate each retrieved context
verdicts = []
for context in retrieved_contexts:
prompt = context_precision_without_reference_prompt(
user_input, context, response
# Create input data and generate prompt
input_data = ContextPrecisionInput(
question=user_input, context=context, answer=response
)
result = await self.llm.agenerate(prompt, ContextPrecisionOutput)
prompt_string = self.prompt.to_string(input_data)
result = await self.llm.agenerate(prompt_string, ContextPrecisionOutput)
verdicts.append(result.verdict)

# Calculate average precision
Expand Down
12 changes: 6 additions & 6 deletions src/ragas/prompt/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,16 @@
from ragas.prompt.metrics.answer_relevance import answer_relevancy_prompt
from ragas.prompt.metrics.common import nli_statement_prompt, statement_generator_prompt
from ragas.prompt.metrics.context_precision import (
context_precision_prompt,
context_precision_with_reference_prompt,
context_precision_without_reference_prompt,
ContextPrecisionInput,
ContextPrecisionOutput,
ContextPrecisionPrompt,
)

__all__ = [
"answer_relevancy_prompt",
"context_precision_prompt",
"context_precision_with_reference_prompt",
"context_precision_without_reference_prompt",
"ContextPrecisionPrompt",
"ContextPrecisionInput",
"ContextPrecisionOutput",
"correctness_classifier_prompt",
"nli_statement_prompt",
"statement_generator_prompt",
Expand Down
152 changes: 152 additions & 0 deletions src/ragas/prompt/metrics/base_prompt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
"""Base prompt class for metrics with structured input/output models."""

import json
import typing as t
from abc import ABC

from pydantic import BaseModel

# Type variables for generics
InputModel = t.TypeVar("InputModel", bound=BaseModel)
OutputModel = t.TypeVar("OutputModel", bound=BaseModel)


class BasePrompt(ABC, t.Generic[InputModel, OutputModel]):
"""
Base class for structured prompts with type-safe input/output models.

Attributes:
input_model: Pydantic model class for input validation
output_model: Pydantic model class for output schema generation
instruction: Task description for the LLM
examples: List of (input, output) example pairs for few-shot learning
language: Language for the prompt (default: "english")
"""

# Must be set by subclasses
input_model: t.Type[InputModel]
output_model: t.Type[OutputModel]
instruction: str
examples: t.List[t.Tuple[InputModel, OutputModel]]
language: str = "english"

def to_string(self, data: InputModel) -> str:
"""
Convert prompt with input data to complete prompt string for LLM.

Args:
data: Input data instance (validated by input_model)

Returns:
Complete prompt string ready for LLM
"""
# Generate JSON schema for output
output_schema = json.dumps(self.output_model.model_json_schema())

# Generate examples section
examples_str = self._generate_examples()

# Convert input data to JSON
input_json = data.model_dump_json(indent=4, exclude_none=True)

# Build complete prompt (matches existing function format)
return f"""{self.instruction}
Please return the output in a JSON format that complies with the following schema as specified in JSON Schema:
{output_schema}Do not use single quotes in your response but double quotes,properly escaped with a backslash.

{examples_str}
-----------------------------

Now perform the same with the following input
input: {input_json}
Output: """

def _generate_examples(self) -> str:
"""
Generate examples section of the prompt.

Returns:
Formatted examples string or empty string if no examples
"""
if not self.examples:
return ""

example_strings = []
for idx, (input_data, output_data) in enumerate(self.examples):
example_strings.append(
f"Example {idx + 1}\n"
f"Input: {input_data.model_dump_json(indent=4)}\n"
f"Output: {output_data.model_dump_json(indent=4)}"
)

return "--------EXAMPLES-----------\n" + "\n\n".join(example_strings)

async def adapt(
self,
target_language: str,
llm,
adapt_instruction: bool = False,
) -> "BasePrompt[InputModel, OutputModel]":
"""
Adapt the prompt to a new language using minimal translation.

Args:
target_language: Target language (e.g., "spanish", "french")
llm: LLM instance for translation
adapt_instruction: Whether to adapt instruction text (default: False)

Returns:
New prompt instance adapted to the target language
"""
import copy

# Create adapted prompt
new_prompt = copy.deepcopy(self)
new_prompt.language = target_language

# Translate instruction if requested
if adapt_instruction:
instruction_prompt = f"Translate this to {target_language}, keep technical terms: {self.instruction}"
try:
response = await llm.agenerate(instruction_prompt)
new_prompt.instruction = str(response).strip()
except Exception:
# Keep original if translation fails
pass

# Translate examples (simplified approach)
translated_examples = []
for input_ex, output_ex in self.examples:
try:
# Simple per-example translation
example_prompt = f"""Translate this example to {target_language}, keep the same structure:

Input: {input_ex.model_dump_json()}
Output: {output_ex.model_dump_json()}

Return as: Input: {{translated_input_json}} Output: {{translated_output_json}}"""

response = await llm.agenerate(example_prompt)

# Try to extract translated JSON (basic parsing)
response_str = str(response)
if "Input:" in response_str and "Output:" in response_str:
parts = response_str.split("Output:")
input_part = parts[0].replace("Input:", "").strip()
output_part = parts[1].strip()

translated_input = self.input_model.model_validate_json(input_part)
translated_output = self.output_model.model_validate_json(
output_part
)
translated_examples.append((translated_input, translated_output))
else:
# Fallback to original
translated_examples.append((input_ex, output_ex))

except Exception:
# Fallback to original example if translation fails
translated_examples.append((input_ex, output_ex))

new_prompt.examples = translated_examples
return new_prompt
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we return a new prompt? Or should we replace the existing one.

Loading
Loading