fix: add reference to simple scoring (#1758)

shahules786 · web-flow · commit 5d99909c2dba · 2024-12-19T10:02:14.000+05:30
fixes: #1758
diff --git a/docs/howtos/applications/_metrics_llm_calls.md b/docs/howtos/applications/_metrics_llm_calls.md
@@ -1,4 +1,4 @@
-## Debug LLM based metrics using tracing
+# Explain or debug LLM based metrics using tracing
 
 While evaluating using LLM based metrics, each metric may make one or more calls to the LLM. These traces are important to understand the results of the metrics and to debug any issues.
 This notebook demonstrates how to export the LLM traces and analyze them.
diff --git a/src/ragas/metrics/_simple_criteria.py b/src/ragas/metrics/_simple_criteria.py
@@ -49,9 +49,7 @@ class SingleTurnSimpleCriteriaInput(BaseModel):
 
 
 class MultiTurnSimpleCriteriaInput(BaseModel):
-    user_input: t.Optional[str] = Field(
-        description="The input to the model", default=None
-    )
+    user_input: str = Field(description="The input to the model")
     reference: t.Optional[str] = Field(
         description="The reference response", default=None
     )
@@ -172,20 +170,18 @@ async def _single_turn_ascore(
     async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float:
         assert self.llm is not None, "set LLM before use"
 
-        user_input, context, response = (
-            row["user_input"],
+        user_input, response, retrieved_contexts, reference = (
+            row.get("user_input"),
+            row.get("response"),
             row.get("retrieved_contexts"),
-            row["response"],
+            row.get("reference"),
         )
 
-        if context is not None:
-            if isinstance(context, list):
-                context = "\n".join(context)
-            user_input = f"Question: {user_input} Answer using context: {context}"
-
         prompt_input = SingleTurnSimpleCriteriaInput(
             user_input=user_input,
             response=response,
+            retrieved_contexts=retrieved_contexts,
+            reference=reference,
         )
 
         response = await self.single_turn_prompt.generate(
@@ -200,11 +196,11 @@ async def _multi_turn_ascore(
         self, sample: MultiTurnSample, callbacks: Callbacks
     ) -> float:
         assert self.llm is not None, "LLM is not set"
-        assert sample.reference is not None, "Reference is not set"
 
         interaction = sample.pretty_repr()
         prompt_input = MultiTurnSimpleCriteriaInput(
             user_input=interaction,
+            reference=sample.reference,
         )
         response = await self.multi_turn_prompt.generate(
             data=prompt_input,

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-## Debug LLM based metrics using tracing`
	`1`	`+# Explain or debug LLM based metrics using tracing`
`2`	`2`
`3`	`3`	`While evaluating using LLM based metrics, each metric may make one or more calls to the LLM. These traces are important to understand the results of the metrics and to debug any issues.`
`4`	`4`	`This notebook demonstrates how to export the LLM traces and analyze them.`