explodinggradients
diff --git a/‎src/ragas/backends/utils.py‎
Lines changed: 0 additions & 21 deletions b/‎src/ragas/backends/utils.py‎
Lines changed: 0 additions & 21 deletions
diff --git a/‎src/ragas/metrics/base.py‎
Lines changed: 113 additions & 13 deletions b/‎src/ragas/metrics/base.py‎
Lines changed: 113 additions & 13 deletions
diff --git a/‎src/ragas/metrics/discrete.py‎
Lines changed: 68 additions & 12 deletions b/‎src/ragas/metrics/discrete.py‎
Lines changed: 68 additions & 12 deletions
@@ -3,27 +3,6 @@
 from __future__ import annotations
 
 import random
-import string
-import typing as t
-import uuid
-
-
-def create_nano_id(size: int = 12) -> str:
-    """Create a short, URL-safe unique identifier."""
-    # Define characters to use (alphanumeric)
-    alphabet = string.ascii_letters + string.digits
-
-    # Generate UUID and convert to int
-    uuid_int = t.cast(int, uuid.uuid4().int)
-
-    # Convert to base62
-    result = ""
-    while uuid_int:
-        uuid_int, remainder = divmod(uuid_int, len(alphabet))
-        result = alphabet[remainder] + result
-
-    # Pad if necessary and return desired length
-    return result[:size]
 
 
 class MemorableNames:
 
@@ -139,7 +139,16 @@ def get_required_columns(
             return self.required_columns
 
     @abstractmethod
-    def init(self, run_config: RunConfig): ...
+    def init(self, run_config: RunConfig) -> None:
+        """
+        Initialize the metric with the given run configuration.
+
+        Parameters
+        ----------
+        run_config : RunConfig
+            Configuration for the metric run including timeouts and other settings.
+        """
+        ...
 
     @deprecated("0.2", removal="0.3", alternative="single_turn_ascore")
     def score(self, row: t.Dict, callbacks: Callbacks = None) -> float:
@@ -229,10 +238,23 @@ class MetricWithLLM(Metric, PromptMixin):
     llm: t.Optional[BaseRagasLLM] = None
     output_type: t.Optional[MetricOutputType] = None
 
-    def init(self, run_config: RunConfig):
+    def init(self, run_config: RunConfig) -> None:
+        """
+        Initialize the metric with run configuration and validate LLM is present.
+
+        Parameters
+        ----------
+        run_config : RunConfig
+            Configuration for the metric run.
+
+        Raises
+        ------
+        ValueError
+            If no LLM is provided to the metric.
+        """
         if self.llm is None:
             raise ValueError(
-                f"Metric '{self.name}' has no valid LLM provided (self.llm is None). Please initantiate a the metric with an LLM to run."  # noqa
+                f"Metric '{self.name}' has no valid LLM provided (self.llm is None). Please instantiate the metric with an LLM to run."
             )
         self.llm.set_run_config(run_config)
 
@@ -735,29 +757,106 @@ class ModeMetric(t.Protocol):
 
 @dataclass
 class SimpleBaseMetric(ABC):
-    """Base class for simple metrics that return MetricResult objects."""
+    """
+    Base class for simple metrics that return MetricResult objects.
+
+    This class provides the foundation for metrics that evaluate inputs
+    and return structured MetricResult objects containing scores and reasoning.
+
+    Attributes
+    ----------
+    name : str
+        The name of the metric.
+    allowed_values : AllowedValuesType
+        Allowed values for the metric output. Can be a list of strings for
+        discrete metrics, a tuple of floats for numeric metrics, or an integer
+        for ranking metrics.
+
+    Examples
+    --------
+    >>> from ragas.metrics import discrete_metric
+    >>>
+    >>> @discrete_metric(name="sentiment", allowed_values=["positive", "negative"])
+    >>> def sentiment_metric(user_input: str, response: str) -> str:
+    ...     return "positive" if "good" in response else "negative"
+    >>>
+    >>> result = sentiment_metric(user_input="How are you?", response="I'm good!")
+    >>> print(result.value)  # "positive"
+    """
 
     name: str
     allowed_values: AllowedValuesType = field(default_factory=lambda: ["pass", "fail"])
 
     @abstractmethod
     def score(self, **kwargs) -> "MetricResult":
+        """
+        Synchronously calculate the metric score.
+
+        Parameters
+        ----------
+        **kwargs : dict
+            Input parameters required by the specific metric implementation.
+
+        Returns
+        -------
+        MetricResult
+            The evaluation result containing the score and reasoning.
+        """
         pass
 
     @abstractmethod
     async def ascore(self, **kwargs) -> "MetricResult":
+        """
+        Asynchronously calculate the metric score.
+
+        Parameters
+        ----------
+        **kwargs : dict
+            Input parameters required by the specific metric implementation.
+
+        Returns
+        -------
+        MetricResult
+            The evaluation result containing the score and reasoning.
+        """
         pass
 
     def batch_score(
         self,
         inputs: t.List[t.Dict[str, t.Any]],
     ) -> t.List["MetricResult"]:
+        """
+        Synchronously calculate scores for a batch of inputs.
+
+        Parameters
+        ----------
+        inputs : List[Dict[str, Any]]
+            List of input dictionaries, each containing parameters for the metric.
+
+        Returns
+        -------
+        List[MetricResult]
+            List of evaluation results, one for each input.
+        """
         return [self.score(**input_dict) for input_dict in inputs]
 
     async def abatch_score(
         self,
         inputs: t.List[t.Dict[str, t.Any]],
     ) -> t.List["MetricResult"]:
+        """
+        Asynchronously calculate scores for a batch of inputs in parallel.
+
+        Parameters
+        ----------
+        inputs : List[Dict[str, Any]]
+            List of input dictionaries, each containing parameters for the metric.
+
+        Returns
+        -------
+        List[MetricResult]
+            List of evaluation results, one for each input.
+        """
         async_tasks = []
         for input_dict in inputs:
             # Process input asynchronously
@@ -767,29 +866,30 @@ async def abatch_score(
         return await asyncio.gather(*async_tasks)
 
 
-def create_auto_response_model(name: str, **fields):
-    """Create a response model and mark it as auto-generated by Ragas.
+def create_auto_response_model(name: str, **fields) -> t.Type["BaseModel"]:
+    """
+    Create a response model and mark it as auto-generated by Ragas.
 
     This function creates a Pydantic model using create_model and marks it
     with a special attribute to indicate it was auto-generated. This allows
     the save() method to distinguish between auto-generated models (which
     are recreated on load) and custom user models.
 
-    Parameters:
-    -----------
+    Parameters
+    ----------
     name : str
         Name for the model class
     **fields
-        Field definitions in create_model format
+        Field definitions in create_model format.
         Each field is specified as: field_name=(type, default_or_field_info)
 
-    Returns:
-    --------
+    Returns
+    -------
     Type[BaseModel]
         Pydantic model class marked as auto-generated
 
-    Examples:
-    ---------
+    Examples
+    --------
     >>> from pydantic import Field
     >>> # Simple model with required fields
     >>> ResponseModel = create_auto_response_model(
 
@@ -17,6 +17,33 @@
 
 @dataclass(repr=False)
 class DiscreteMetric(SimpleLLMMetric, DiscreteValidator):
+    """
+    Metric for categorical/discrete evaluations with predefined allowed values.
+
+    This class is used for metrics that output categorical values like
+    "pass/fail", "good/bad/excellent", or custom discrete categories.
+
+    Attributes
+    ----------
+    allowed_values : List[str]
+        List of allowed categorical values the metric can output.
+        Default is ["pass", "fail"].
+
+    Examples
+    --------
+    >>> from ragas.metrics import DiscreteMetric
+    >>> from ragas.llms import LangchainLLMWrapper
+    >>> from langchain_openai import ChatOpenAI
+    >>>
+    >>> # Create a custom discrete metric
+    >>> llm = LangchainLLMWrapper(ChatOpenAI())
+    >>> metric = DiscreteMetric(
+    ...     name="quality_check",
+    ...     llm=llm,
+    ...     allowed_values=["excellent", "good", "poor"]
+    ... )
+    """
+
     allowed_values: t.List[str] = field(default_factory=lambda: ["pass", "fail"])
 
     def __post_init__(self):
@@ -27,8 +54,8 @@ def __post_init__(self):
 
         self._response_model = create_auto_response_model(
             "DiscreteResponseModel",
-            reason=(str, Field(..., description="Reaoning for the value")),
-            value=(t.Literal[values], Field(..., description="the value predicted")),
+            reason=(str, Field(..., description="Reasoning for the value")),
+            value=(t.Literal[values], Field(..., description="The value predicted")),
         )
 
     def get_correlation(
@@ -88,18 +115,47 @@ def discrete_metric(
     *,
     name: t.Optional[str] = None,
     allowed_values: t.Optional[t.List[str]] = None,
-    **metric_params,
+    **metric_params: t.Any,
 ) -> t.Callable[[t.Callable[..., t.Any]], DiscreteMetricProtocol]:
     """
-    Decorator for creating discrete metrics.
-
-    Args:
-        name: Optional name for the metric (defaults to function name)
-        allowed_values: List of allowed string values for the metric
-        **metric_params: Additional parameters for the metric
-
-    Returns:
-        A decorator that transforms a function into a DiscreteMetric instance
+    Decorator for creating discrete/categorical metrics.
+
+    This decorator transforms a regular function into a DiscreteMetric instance
+    that can be used for evaluation with predefined categorical outputs.
+
+    Parameters
+    ----------
+    name : str, optional
+        Name for the metric. If not provided, uses the function name.
+    allowed_values : List[str], optional
+        List of allowed categorical values for the metric output.
+        Default is ["pass", "fail"].
+    **metric_params : Any
+        Additional parameters to pass to the metric initialization.
+
+    Returns
+    -------
+    Callable[[Callable[..., Any]], DiscreteMetricProtocol]
+        A decorator that transforms a function into a DiscreteMetric instance.
+
+    Examples
+    --------
+    >>> from ragas.metrics import discrete_metric
+    >>>
+    >>> @discrete_metric(name="sentiment", allowed_values=["positive", "neutral", "negative"])
+    >>> def sentiment_analysis(user_input: str, response: str) -> str:
+    ...     '''Analyze sentiment of the response.'''
+    ...     if "great" in response.lower() or "good" in response.lower():
+    ...         return "positive"
+    ...     elif "bad" in response.lower() or "poor" in response.lower():
+    ...         return "negative"
+    ...     return "neutral"
+    >>>
+    >>> result = sentiment_analysis(
+    ...     user_input="How was your day?",
+    ...     response="It was great!"
+    ... )
+    >>> print(result.value)  # "positive"
     """
     if allowed_values is None:
         allowed_values = ["pass", "fail"]