fix: docs for discrete, numeric and ranking using instructor (#2397)

anistark · web-flow · commit c59ba025d40d · 2025-11-04T12:41:35.000+05:30
diff --git a/src/ragas/metrics/discrete.py b/src/ragas/metrics/discrete.py
@@ -22,26 +22,43 @@ class DiscreteMetric(SimpleLLMMetric, DiscreteValidator):
 
     This class is used for metrics that output categorical values like
     "pass/fail", "good/bad/excellent", or custom discrete categories.
+    Uses the instructor library for structured LLM outputs.
 
     Attributes
     ----------
     allowed_values : List[str]
         List of allowed categorical values the metric can output.
         Default is ["pass", "fail"].
+    llm : Optional[BaseRagasLLM]
+        The language model instance for evaluation. Can be created using llm_factory().
+    prompt : Optional[Union[str, Prompt]]
+        The prompt template for the metric. Should contain placeholders for
+        evaluation inputs that will be formatted at runtime.
 
     Examples
     --------
     >>> from ragas.metrics import DiscreteMetric
-    >>> from ragas.llms import LangchainLLMWrapper
-    >>> from langchain_openai import ChatOpenAI
+    >>> from ragas.llms import llm_factory
+    >>> from openai import OpenAI
+    >>>
+    >>> # Create an LLM instance
+    >>> client = OpenAI(api_key="your-api-key")
+    >>> llm = llm_factory("gpt-4o-mini", client=client)
     >>>
     >>> # Create a custom discrete metric
-    >>> llm = LangchainLLMWrapper(ChatOpenAI())
     >>> metric = DiscreteMetric(
     ...     name="quality_check",
     ...     llm=llm,
+    ...     prompt="Check the quality of the response: {response}. Return 'excellent', 'good', or 'poor'.",
     ...     allowed_values=["excellent", "good", "poor"]
     ... )
+    >>>
+    >>> # Score with the metric
+    >>> result = metric.score(
+    ...     llm=llm,
+    ...     response="This is a great response!"
+    ... )
+    >>> print(result.value)  # Output: "excellent" or similar
     """
 
     allowed_values: t.List[str] = field(default_factory=lambda: ["pass", "fail"])
diff --git a/src/ragas/metrics/numeric.py b/src/ragas/metrics/numeric.py
@@ -20,26 +20,43 @@ class NumericMetric(SimpleLLMMetric, NumericValidator):
 
     This class is used for metrics that output numeric scores within a
     defined range, such as 0.0 to 1.0 for similarity scores or 1-10 ratings.
+    Uses the instructor library for structured LLM outputs.
 
     Attributes
     ----------
     allowed_values : Union[Tuple[float, float], range]
         The valid range for metric outputs. Can be a tuple of (min, max) floats
         or a range object. Default is (0.0, 1.0).
+    llm : Optional[BaseRagasLLM]
+        The language model instance for evaluation. Can be created using llm_factory().
+    prompt : Optional[Union[str, Prompt]]
+        The prompt template for the metric. Should contain placeholders for
+        evaluation inputs that will be formatted at runtime.
 
     Examples
     --------
     >>> from ragas.metrics import NumericMetric
-    >>> from ragas.llms import LangchainLLMWrapper
-    >>> from langchain_openai import ChatOpenAI
+    >>> from ragas.llms import llm_factory
+    >>> from openai import OpenAI
+    >>>
+    >>> # Create an LLM instance
+    >>> client = OpenAI(api_key="your-api-key")
+    >>> llm = llm_factory("gpt-4o-mini", client=client)
     >>>
     >>> # Create a custom numeric metric with 0-10 range
-    >>> llm = LangchainLLMWrapper(ChatOpenAI())
     >>> metric = NumericMetric(
     ...     name="quality_score",
     ...     llm=llm,
+    ...     prompt="Rate the quality of this response on a scale of 0-10: {response}",
     ...     allowed_values=(0.0, 10.0)
     ... )
+    >>>
+    >>> # Score with the metric
+    >>> result = metric.score(
+    ...     llm=llm,
+    ...     response="This is a great response!"
+    ... )
+    >>> print(result.value)  # Output: a float between 0.0 and 10.0
     """
 
     allowed_values: t.Union[t.Tuple[float, float], range] = (0.0, 1.0)
diff --git a/src/ragas/metrics/ranking.py b/src/ragas/metrics/ranking.py
@@ -22,26 +22,42 @@ class RankingMetric(SimpleLLMMetric, RankingValidator):
 
     This class is used for metrics that output ordered lists, such as
     ranking search results, prioritizing features, or ordering responses
-    by relevance.
+    by relevance. Uses the instructor library for structured LLM outputs.
 
     Attributes
     ----------
     allowed_values : int
         Expected number of items in the ranking list. Default is 2.
+    llm : Optional[BaseRagasLLM]
+        The language model instance for evaluation. Can be created using llm_factory().
+    prompt : Optional[Union[str, Prompt]]
+        The prompt template for the metric. Should contain placeholders for
+        evaluation inputs that will be formatted at runtime.
 
     Examples
     --------
     >>> from ragas.metrics import RankingMetric
-    >>> from ragas.llms import LangchainLLMWrapper
-    >>> from langchain_openai import ChatOpenAI
+    >>> from ragas.llms import llm_factory
+    >>> from openai import OpenAI
+    >>>
+    >>> # Create an LLM instance
+    >>> client = OpenAI(api_key="your-api-key")
+    >>> llm = llm_factory("gpt-4o-mini", client=client)
     >>>
     >>> # Create a ranking metric that returns top 3 items
-    >>> llm = LangchainLLMWrapper(ChatOpenAI())
     >>> metric = RankingMetric(
     ...     name="relevance_ranking",
     ...     llm=llm,
+    ...     prompt="Rank these results by relevance: {results}",
     ...     allowed_values=3
     ... )
+    >>>
+    >>> # Score with the metric
+    >>> result = metric.score(
+    ...     llm=llm,
+    ...     results="result1, result2, result3"
+    ... )
+    >>> print(result.value)  # Output: a list of 3 ranked items
     """
 
     allowed_values: int = 2