feat: train your own evaluators (#1701)

shahules786 · web-flow · commit 99d1a47b9025 · 2024-12-03T21:41:41.000+05:30
- [x] Config
- [x] Loss
- [x] Optimizer base
diff --git a/src/ragas/config.py b/src/ragas/config.py
@@ -0,0 +1,27 @@
+import typing as t
+
+from pydantic import BaseModel, Field
+
+from ragas.embeddings import BaseRagasEmbeddings
+from ragas.llms import BaseRagasLLM
+from ragas.losses import Loss
+from ragas.optimizers import Optimizer
+
+DEFAULT_OPTIMIZER_CONFIG = {"max_steps": 100}
+
+
+class DemonstrationConfig(BaseModel):
+    enabled: bool = True
+    top_k: int = 3
+    technique: t.Literal["random", "similarity"] = "similarity"
+    embedding: t.Optional[BaseRagasEmbeddings] = None
+
+
+class InstructionConfig(BaseModel):
+    enabled: bool = True
+    loss: t.Optional[Loss] = None
+    optimizer: Optimizer
+    optimizer_config: t.Dict[str, t.Any] = Field(
+        default_factory=lambda: DEFAULT_OPTIMIZER_CONFIG
+    )
+    llm: t.Optional[BaseRagasLLM] = None
diff --git a/src/ragas/dataset_schema.py b/src/ragas/dataset_schema.py
@@ -531,6 +531,7 @@ def upload(self, base_url: str = RAGAS_API_URL, verbose: bool = True) -> str:
         return evaluation_endpoint
 
 
+
 class PromptAnnotation(BaseModel):
     prompt_input: t.Dict[str, t.Any]
     prompt_output: t.Dict[str, t.Any]
diff --git a/src/ragas/embeddings/base.py b/src/ragas/embeddings/base.py
@@ -10,11 +10,14 @@
 from langchain_core.embeddings import Embeddings
 from langchain_openai.embeddings import OpenAIEmbeddings
 from pydantic.dataclasses import dataclass
+from pydantic_core import CoreSchema, core_schema
 
 from ragas.run_config import RunConfig, add_async_retry, add_retry
 
 if t.TYPE_CHECKING:
     from llama_index.core.base.embeddings.base import BaseEmbedding
+    from pydantic import GetCoreSchemaHandler
+
 
 DEFAULT_MODEL_NAME = "BAAI/bge-small-en-v1.5"
 
@@ -64,6 +67,17 @@ def set_run_config(self, run_config: RunConfig):
         """
         self.run_config = run_config
 
+    @classmethod
+    def __get_pydantic_core_schema__(
+        cls, source_type: t.Any, handler: GetCoreSchemaHandler
+    ) -> CoreSchema:
+        """
+        Define how Pydantic generates a schema for BaseRagasEmbeddings.
+        """
+        return core_schema.no_info_after_validator_function(
+            cls, core_schema.is_instance_schema(cls)  # The validator function
+        )
+
 
 class LangchainEmbeddingsWrapper(BaseRagasEmbeddings):
     """
diff --git a/src/ragas/losses.py b/src/ragas/losses.py
@@ -0,0 +1,99 @@
+import typing as t
+from abc import ABC, abstractmethod
+
+
+class Loss(ABC):
+    """
+    Abstract base class for all loss functions.
+    """
+
+    @abstractmethod
+    def __call__(self, predicted: t.List, actual: t.List) -> float:
+        raise NotImplementedError
+
+
+class MSELoss(Loss):
+    """
+    Mean Squared Error loss function.
+    """
+
+    reduction: t.Literal["mean", "sum"] = "mean"
+
+    def __call__(self, predicted: t.List[float], actual: t.List[float]) -> float:
+
+        errors = [(p - a) ** 2 for p, a in zip(predicted, actual)]
+        if self.reduction == "mean":
+            return sum(errors) / len(errors)
+        elif self.reduction == "sum":
+            return sum(errors)
+        else:
+            raise ValueError(f"Invalid reduction method: {self.reduction}")
+
+
+class BinaryMetricLoss(Loss):
+    """
+    Computes the loss for binary metrics.
+    Supports accuracy and F1-score.
+    """
+
+    metric: t.Literal["accuracy", "f1_score"] = "accuracy"
+
+    def __call__(self, predicted: t.List[int], actual: t.List[int]) -> float:
+        """
+        Computes the loss using the specified reduction.
+
+        Parameters
+        ----------
+        predicted : list[int]
+            List of predicted binary values (0 or 1).
+        actual : list[int]
+            List of actual binary values (0 or 1).
+
+        Returns
+        -------
+        float
+            The computed loss based on the reduction type.
+        """
+        if len(predicted) != len(actual):
+            raise ValueError("Predicted and actual lists must have the same length.")
+
+        if self.metric == "accuracy":
+            return self._accuracy(predicted, actual)
+        elif self.metric == "f1_score":
+            return self._f1_score(predicted, actual)
+        else:
+            raise ValueError(f"Unsupported reduction type: {self.metric}")
+
+    def _accuracy(self, predicted: list[int], actual: t.List[int]) -> float:
+        """
+        Computes accuracy as the reduction operation.
+
+        Returns
+        -------
+        float
+            Accuracy (proportion of correct predictions).
+        """
+        correct = sum(p == a for p, a in zip(predicted, actual))
+        return correct / len(actual)
+
+    def _f1_score(self, predicted: t.List[int], actual: t.List[int]) -> float:
+        """
+        Computes F1-score as the reduction operation.
+
+        Returns
+        -------
+        float
+            The F1-score.
+        """
+        tp = sum(p == 1 and a == 1 for p, a in zip(predicted, actual))
+        fp = sum(p == 1 and a == 0 for p, a in zip(predicted, actual))
+        fn = sum(p == 0 and a == 1 for p, a in zip(predicted, actual))
+
+        precision = tp / (tp + fp) if tp + fp > 0 else 0
+        recall = tp / (tp + fn) if tp + fn > 0 else 0
+        f1 = (
+            (2 * precision * recall) / (precision + recall)
+            if precision + recall > 0
+            else 0
+        )
+        return f1
diff --git a/src/ragas/metrics/base.py b/src/ragas/metrics/base.py
@@ -26,8 +26,10 @@
 if t.TYPE_CHECKING:
     from langchain_core.callbacks import Callbacks
 
+    from ragas.config import DemonstrationConfig, InstructionConfig
     from ragas.embeddings import BaseRagasEmbeddings
     from ragas.llms import BaseRagasLLM
+
 logger = logging.getLogger(__name__)
 
 
@@ -227,6 +229,16 @@ def init(self, run_config: RunConfig):
             )
         self.llm.set_run_config(run_config)
 
+    def train(
+        self,
+        path: str,
+        demonstration_config: DemonstrationConfig,
+        instruction_config: InstructionConfig,
+        callbacks: Callbacks,
+    ) -> None:
+
+        raise NotImplementedError("Training is not implemented for this metric.")
+
 
 @dataclass
 class MetricWithEmbeddings(Metric):
diff --git a/src/ragas/optimizers/__init__.py b/src/ragas/optimizers/__init__.py
@@ -0,0 +1,3 @@
+from .base import Optimizer
+
+__all__ = ["Optimizer"]
diff --git a/src/ragas/optimizers/base.py b/src/ragas/optimizers/base.py
@@ -0,0 +1,52 @@
+import typing as t
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+
+from langchain_core.callbacks import Callbacks
+
+from ragas.dataset_schema import SingleMetricAnnotation
+from ragas.llms.base import BaseRagasLLM
+from ragas.losses import Loss
+from ragas.metrics.base import MetricWithLLM
+from ragas.run_config import RunConfig
+
+
+@dataclass
+class Optimizer(ABC):
+    """
+    Abstract base class for all optimizers.
+    """
+
+    metric: t.Optional[MetricWithLLM] = None
+    llm: t.Optional[BaseRagasLLM] = None
+
+    @abstractmethod
+    def optimize(
+        self,
+        dataset: SingleMetricAnnotation,
+        loss: Loss,
+        config: t.Dict[t.Any, t.Any],
+        run_config: t.Optional[RunConfig] = None,
+        batch_size: t.Optional[int] = None,
+        callbacks: t.Optional[Callbacks] = None,
+        with_debugging_logs=False,
+        raise_exceptions: bool = True,
+    ) -> t.Dict[str, str]:
+        """
+        Optimizes the prompts for the given metric.
+
+        Parameters
+        ----------
+        metric : MetricWithLLM
+            The metric to optimize.
+        train_data : Any
+            The training data.
+        config : InstructionConfig
+            The training configuration.
+
+        Returns
+        -------
+        Dict[str, str]
+            The optimized prompts for given chain.
+        """
+        pass

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+from .base import Optimizer`
	`2`	`+`
	`3`	`+__all__ = ["Optimizer"]`