Merge pull request #212 from alimaredia/bump-ragas-version

RobotSail · web-flow · commit bdece441b03a · 2025-01-18T09:51:47.000-05:00
diff --git a/requirements.txt b/requirements.txt
@@ -10,4 +10,4 @@ pandas
 pandas-stubs
 lm-eval>=0.4.4
 httpx
-ragas
+ragas>=0.2.11
diff --git a/src/instructlab/eval/ragas.py b/src/instructlab/eval/ragas.py
@@ -11,17 +11,24 @@
 from pydantic import BaseModel, ConfigDict, Field
 from ragas.evaluation import EvaluationDataset, EvaluationResult, RunConfig, evaluate
 from ragas.metrics import Metric
-from ragas.metrics._domain_specific_rubrics import (  # the rubrics we must instantiate are located inside of a file marked as private
-    DEFAULT_WITH_REFERENCE_RUBRICS,
-    RubricsScore,
-)
+from ragas.metrics._domain_specific_rubrics import RubricsScore
 
 # Local
 from .evaluator import Evaluator
 from .logger_config import setup_logger
 
 logger = setup_logger(__name__)
 
+# DEFAULT_WITH_REFERENCE_RUBRICS from ragas v0.2.11.
+# This rubric is hardcoded in case ragas makes any changes to their DEFAULT_WITH_REFERENCE_RUBRICS in the future
+SCORING_RUBRICS = {
+    "score1_description": "The response is entirely incorrect, irrelevant, or does not align with the reference in any meaningful way.",
+    "score2_description": "The response partially matches the reference but contains major errors, significant omissions, or irrelevant information.",
+    "score3_description": "The response aligns with the reference overall but lacks sufficient detail, clarity, or contains minor inaccuracies.",
+    "score4_description": "The response is mostly accurate, aligns closely with the reference, and contains only minor issues or omissions.",
+    "score5_description": "The response is fully accurate, completely aligns with the reference, and is clear, thorough, and detailed.",
+}
+
 
 class Sample(TypedDict):
     """
@@ -256,9 +263,8 @@ def _generate_answers_from_model(
 
     @staticmethod
     def _get_metrics() -> List[Metric]:
-        # default set of metrics
         return [
             RubricsScore(
-                rubrics=DEFAULT_WITH_REFERENCE_RUBRICS,
+                rubrics=SCORING_RUBRICS,
             )
         ]