ragas: bump ragas version, pass old rubric in RubricScore

alimaredia · alimaredia · commit a69e557b5b96 · 2025-01-18T00:37:41.000-05:00
Before ragas v0.2.11 RubricScores.rubrics wasn't
being applied properly. This commit sets that
as the minimum version for this library.

A change in v0.2.11 from previous versions was a
change in the prompt for domain specific knowledge
evaluation with reference.

The new prompt is hardcoded in case ragas makes
any changes to their prompts again in the future.

Signed-off-by: Ali Maredia &lt;amaredia@redhat.com&gt;
diff --git a/requirements.txt b/requirements.txt
@@ -10,4 +10,4 @@ pandas
 pandas-stubs
 lm-eval>=0.4.4
 httpx
-ragas
+ragas>=0.2.11
diff --git a/src/instructlab/eval/ragas.py b/src/instructlab/eval/ragas.py
@@ -11,8 +11,7 @@
 from pydantic import BaseModel, ConfigDict, Field
 from ragas.evaluation import EvaluationDataset, EvaluationResult, RunConfig, evaluate
 from ragas.metrics import Metric
-from ragas.metrics._domain_specific_rubrics import (  # the rubrics we must instantiate are located inside of a file marked as private
-    DEFAULT_WITH_REFERENCE_RUBRICS,
+from ragas.metrics._domain_specific_rubrics import (
     RubricsScore,
 )
 
@@ -22,6 +21,16 @@
 
 logger = setup_logger(__name__)
 
+# DEFAULT_WITH_REFERENCE_RUBRICS from ragas v0.2.11.
+# This rubric is hardcoded in case ragas makes any changes to their DEFAULT_WITH_REFERENCE_RUBRICS in the future
+SCORING_RUBRICS = {
+    "score1_description": "The response is entirely incorrect, irrelevant, or does not align with the reference in any meaningful way.",
+    "score2_description": "The response partially matches the reference but contains major errors, significant omissions, or irrelevant information.",
+    "score3_description": "The response aligns with the reference overall but lacks sufficient detail, clarity, or contains minor inaccuracies.",
+    "score4_description": "The response is mostly accurate, aligns closely with the reference, and contains only minor issues or omissions.",
+    "score5_description": "The response is fully accurate, completely aligns with the reference, and is clear, thorough, and detailed.",
+}
+
 
 class Sample(TypedDict):
     """
@@ -256,9 +265,8 @@ def _generate_answers_from_model(
 
     @staticmethod
     def _get_metrics() -> List[Metric]:
-        # default set of metrics
         return [
             RubricsScore(
-                rubrics=DEFAULT_WITH_REFERENCE_RUBRICS,
+                rubrics=SCORING_RUBRICS,
             )
         ]