fix: add reference tool call to required cols (#1580)

shahules786 · web-flow · commit 2319dce61eb5 · 2024-10-25T23:31:13.000+05:30
diff --git a/docs/concepts/metrics/available_metrics/agents.md b/docs/concepts/metrics/available_metrics/agents.md
@@ -71,10 +71,9 @@ scorer = TopicAdherenceScore(mode="recall")
 `ToolCallAccuracy` is a metric that can be used to evaluate the performance of the LLM in identifying and calling the required tools to complete a given task. This metric needs `user_input` and `reference_tool_calls` to evaluate the performance of the LLM in identifying and calling the required tools to complete a given task. The metric is computed by comparing the `reference_tool_calls` with the Tool calls made by the AI. The values range between 0 and 1, with higher values indicating better performance. 
 
 ```python
+from ragas.metrics import ToolCallAccuracy
 from ragas.dataset_schema import  MultiTurnSample
 from ragas.messages import HumanMessage,AIMessage,ToolMessage,ToolCall
-from ragas.metrics import ToolCallAccuracy
-
 
 sample = [
     HumanMessage(content="What's the weather like in New York right now?"),
@@ -89,7 +88,7 @@ sample = [
     AIMessage(content="75°F is approximately 23.9°C.")
 ]
 
-sampl2 = MultiTurnSample(
+sample = MultiTurnSample(
     user_input=sample,
     reference_tool_calls=[
         ToolCall(name="weather_check", args={"location": "New York"}),
@@ -98,7 +97,7 @@ sampl2 = MultiTurnSample(
 )
 
 scorer = ToolCallAccuracy()
-await metric.multi_turn_ascore(sample)
+await scorer.multi_turn_ascore(sample)
 ```
 
 The tool call sequence specified in `reference_tool_calls` is used as the ideal outcome. If the tool calls made by the AI does not the the order or sequence of the `reference_tool_calls`, the metric will return a score of 0. This helps to ensure that the AI is able to identify and call the required tools in the correct order to complete a given task.
diff --git a/src/ragas/metrics/_tool_call_accuracy.py b/src/ragas/metrics/_tool_call_accuracy.py
@@ -20,7 +20,7 @@ class ToolCallAccuracy(MultiTurnMetric):
         default_factory=lambda: {
             MetricType.MULTI_TURN: {
                 "user_input",
-                "reference",
+                "reference_tool_calls",
             }
         }
     )
@@ -61,7 +61,7 @@ def is_sequence_aligned(
     async def _multi_turn_ascore(
         self, sample: MultiTurnSample, callbacks: Callbacks
     ) -> float:
-        assert sample.reference_tool_calls is not None, "Reference is not set"
+        assert sample.reference_tool_calls is not None, "Reference tool calls is not set"
 
         pred_tool_calls = []
         for item in sample.user_input:

Original file line number	Diff line number	Diff line change
`@@ -20,7 +20,7 @@ class ToolCallAccuracy(MultiTurnMetric):`
`20`	`20`	`default_factory=lambda: {`
`21`	`21`	`MetricType.MULTI_TURN: {`
`22`	`22`	`"user_input",`
`23`		`- "reference",`
	`23`	`+ "reference_tool_calls",`
`24`	`24`	`}`
`25`	`25`	`}`
`26`	`26`	`)`
`@@ -61,7 +61,7 @@ def is_sequence_aligned(`
`61`	`61`	`async def _multi_turn_ascore(`
`62`	`62`	`self, sample: MultiTurnSample, callbacks: Callbacks`
`63`	`63`	`) -> float:`
`64`		`- assert sample.reference_tool_calls is not None, "Reference is not set"`
	`64`	`+ assert sample.reference_tool_calls is not None, "Reference tool calls is not set"`
`65`	`65`
`66`	`66`	`pred_tool_calls = []`
`67`	`67`	`for item in sample.user_input:`