refactor: docs and warnings for metric base new structure (#2333)

anistark · web-flow · commit 07655ffe1225 · 2025-10-01T11:34:56.000-07:00
Follow up on #2320
diff --git a/src/ragas/metrics/base.py b/src/ragas/metrics/base.py
@@ -781,11 +781,29 @@ def create_auto_response_model(name: str, **fields):
         Name for the model class
     **fields
         Field definitions in create_model format
+        Each field is specified as: field_name=(type, default_or_field_info)
 
     Returns:
     --------
     Type[BaseModel]
         Pydantic model class marked as auto-generated
+
+    Examples:
+    ---------
+    >>> from pydantic import Field
+    >>> # Simple model with required fields
+    >>> ResponseModel = create_auto_response_model(
+    ...     "ResponseModel",
+    ...     value=(str, ...),
+    ...     reason=(str, ...)
+    ... )
+    >>>
+    >>> # Model with Field validators and descriptions
+    >>> ResponseModel = create_auto_response_model(
+    ...     "ResponseModel",
+    ...     value=(str, Field(..., description="The predicted value")),
+    ...     reason=(str, Field(..., description="Reasoning for the prediction"))
+    ... )
     """
     from pydantic import create_model
 
@@ -927,23 +945,33 @@ def save(self, path: t.Optional[str] = None) -> None:
             elif not file_path.suffix:
                 file_path = file_path.with_suffix(".json")
 
+        # Collect warning messages for data loss
+        warning_messages = []
+
         if hasattr(self, "_response_model") and self._response_model:
             # Only warn for custom response models, not auto-generated ones
             if not getattr(self._response_model, "__ragas_auto_generated__", False):
-                warnings.warn(
-                    "Custom response_model cannot be saved and will be lost. "
-                    "You'll need to set it manually after loading."
+                warning_messages.append(
+                    "- Custom response_model will be lost (set it manually after loading)"
                 )
 
-        # Serialize the prompt
-        prompt_data = self._serialize_prompt()
+        # Serialize the prompt (may add embedding_model warning)
+        prompt_data = self._serialize_prompt(warning_messages)
 
         # Determine the metric type
         metric_type = self.__class__.__name__
 
         # Get metric-specific config
         config = self._get_metric_config()
 
+        # Emit consolidated warning if there's data loss
+        if warning_messages:
+            warnings.warn(
+                "Some metric components cannot be saved and will be lost:\n"
+                + "\n".join(warning_messages)
+                + "\n\nYou'll need to provide these when loading the metric."
+            )
+
         data = {
             "format_version": "1.0",
             "metric_type": metric_type,
@@ -962,22 +990,17 @@ def save(self, path: t.Optional[str] = None) -> None:
         except (OSError, IOError) as e:
             raise ValueError(f"Cannot save metric to {file_path}: {e}")
 
-    def _serialize_prompt(self) -> t.Dict[str, t.Any]:
+    def _serialize_prompt(self, warning_messages: t.List[str]) -> t.Dict[str, t.Any]:
         """Serialize the prompt for storage."""
         from ragas.prompt.dynamic_few_shot import DynamicFewShotPrompt
         from ragas.prompt.simple_prompt import Prompt
 
         if isinstance(self.prompt, str):
             return {"type": "string", "instruction": self.prompt}
         elif isinstance(self.prompt, DynamicFewShotPrompt):
-            # Warn about embedding model
             if self.prompt.example_store.embedding_model:
-                import warnings
-
-                warnings.warn(
-                    "embedding_model cannot be saved and will be lost. "
-                    "You'll need to provide it when loading using: "
-                    "load(path, embedding_model=YourModel)"
+                warning_messages.append(
+                    "- embedding_model will be lost (provide it when loading: load(path, embedding_model=YourModel))"
                 )
 
             return {
@@ -1171,13 +1194,26 @@ def _deserialize_prompt(
         prompt_type = prompt_data.get("type")
 
         if prompt_type == "string":
+            if "instruction" not in prompt_data:
+                raise ValueError(
+                    "Prompt data missing required 'instruction' field for string prompt"
+                )
             return prompt_data["instruction"]
         elif prompt_type == "Prompt":
+            if "instruction" not in prompt_data:
+                raise ValueError(
+                    "Prompt data missing required 'instruction' field for Prompt"
+                )
             examples = [
                 (ex["input"], ex["output"]) for ex in prompt_data.get("examples", [])
             ]
             return Prompt(instruction=prompt_data["instruction"], examples=examples)
         elif prompt_type == "DynamicFewShotPrompt":
+            if "instruction" not in prompt_data:
+                raise ValueError(
+                    "Prompt data missing required 'instruction' field for DynamicFewShotPrompt"
+                )
+
             if not embedding_model:
                 import warnings
 
@@ -1380,35 +1416,32 @@ def __repr__(self) -> str:
         """Return a clean string representation of the metric."""
         metric_type = self.__class__.__name__
 
-        # Get allowed values in a clean format
         allowed_values = self.allowed_values
-        if isinstance(allowed_values, list):
-            allowed_values_str = f", allowed_values={allowed_values}"
-        elif isinstance(allowed_values, tuple):
-            allowed_values_str = f", allowed_values={allowed_values}"
-        elif isinstance(allowed_values, range):
+        if isinstance(allowed_values, range):
             allowed_values_str = (
                 f", allowed_values=({allowed_values.start}, {allowed_values.stop})"
             )
-        else:
+        elif isinstance(allowed_values, (list, tuple, int)):
             allowed_values_str = f", allowed_values={allowed_values}"
+        else:
+            allowed_values_str = f", allowed_values={repr(allowed_values)}"
 
-        # Get prompt string (truncated)
         prompt_str = ""
         if self.prompt:
-            if isinstance(self.prompt, str):
-                instruction = self.prompt
-            else:
-                instruction = (
+            instruction = (
+                self.prompt
+                if isinstance(self.prompt, str)
+                else (
                     self.prompt.instruction
                     if hasattr(self.prompt, "instruction")
                     else str(self.prompt)
                 )
+            )
 
             if instruction:
-                # Truncate long prompts
-                if len(instruction) > 80:
-                    prompt_str = f", prompt='{instruction[:77]}...'"
+                max_len = 80
+                if len(instruction) > max_len:
+                    prompt_str = f", prompt='{instruction[: max_len - 3]}...'"
                 else:
                     prompt_str = f", prompt='{instruction}'"
 
diff --git a/src/ragas/metrics/decorator.py b/src/ragas/metrics/decorator.py
@@ -319,22 +319,15 @@ def __call__(self, *args, **kwargs):
                         return self._func(*args, **kwargs)
 
                 def __repr__(self) -> str:
-                    """Return a clean string representation of the decorator-based metric."""
-                    # Get function signature parameters
+                    from ragas.metrics.validators import get_metric_type_name
+
                     param_names = list(sig.parameters.keys())
                     param_str = ", ".join(param_names)
 
-                    # Get metric type based on allowed_values
                     metric_type = "CustomMetric"
                     if hasattr(self, "allowed_values"):
-                        if isinstance(self.allowed_values, list):
-                            metric_type = "DiscreteMetric"
-                        elif isinstance(self.allowed_values, tuple):
-                            metric_type = "NumericMetric"
-                        elif isinstance(self.allowed_values, int):
-                            metric_type = "RankingMetric"
-
-                    # Get allowed values string
+                        metric_type = get_metric_type_name(self.allowed_values)
+
                     allowed_values_str = ""
                     if hasattr(self, "allowed_values"):
                         allowed_values_str = f"[{self.allowed_values!r}]"
diff --git a/src/ragas/metrics/discrete.py b/src/ragas/metrics/discrete.py
@@ -105,4 +105,4 @@ def discrete_metric(
         allowed_values = ["pass", "fail"]
 
     decorator_factory = create_metric_decorator()
-    return decorator_factory(name=name, allowed_values=allowed_values, **metric_params)
+    return decorator_factory(name=name, allowed_values=allowed_values, **metric_params)  # type: ignore[return-value]
diff --git a/src/ragas/metrics/numeric.py b/src/ragas/metrics/numeric.py
@@ -119,4 +119,4 @@ def numeric_metric(
         allowed_values = (0.0, 1.0)
 
     decorator_factory = create_metric_decorator()
-    return decorator_factory(name=name, allowed_values=allowed_values, **metric_params)
+    return decorator_factory(name=name, allowed_values=allowed_values, **metric_params)  # type: ignore[return-value]
diff --git a/src/ragas/metrics/ranking.py b/src/ragas/metrics/ranking.py
@@ -110,4 +110,4 @@ def ranking_metric(
         allowed_values = 2
 
     decorator_factory = create_metric_decorator()
-    return decorator_factory(name=name, allowed_values=allowed_values, **metric_params)
+    return decorator_factory(name=name, allowed_values=allowed_values, **metric_params)  # type: ignore[return-value]
diff --git a/src/ragas/metrics/validators.py b/src/ragas/metrics/validators.py
@@ -5,6 +5,8 @@
     "NumericValidator",
     "RankingValidator",
     "AllowedValuesType",
+    "get_validator_for_allowed_values",
+    "get_metric_type_name",
 ]
 
 import typing as t
@@ -109,3 +111,15 @@ def get_validator_for_allowed_values(
     else:
         # Default to discrete if unclear
         return DiscreteValidator
+
+
+def get_metric_type_name(allowed_values: AllowedValuesType) -> str:
+    """Get the metric type name based on allowed_values type."""
+    if isinstance(allowed_values, list):
+        return "DiscreteMetric"
+    elif isinstance(allowed_values, (tuple, range)):
+        return "NumericMetric"
+    elif isinstance(allowed_values, int):
+        return "RankingMetric"
+    else:
+        return "CustomMetric"
diff --git a/tests/unit/test_simple_llm_metric_persistence.py b/tests/unit/test_simple_llm_metric_persistence.py
@@ -206,7 +206,7 @@ async def aembed_query(self, text: str):
 
         try:
             # Save (should warn about embedding model)
-            with pytest.warns(UserWarning, match="embedding_model cannot be saved"):
+            with pytest.warns(UserWarning, match="embedding_model will be lost"):
                 original_metric.save(temp_path)
 
             # Load (provide embedding model)