@@ -297,7 +297,7 @@ def test_evaluate_tools_one_eval_fails(self, mock_model_config):
297297
298298 assert "Invalid score value" in str (exc_info .value )
299299
300- def test_evaluate_tools_some_not_applicable (self , mock_model_config ):
300+ def test_evaluate_tools_some_missing_tool_definitions (self , mock_model_config ):
301301 evaluator = ToolCallAccuracyEvaluator (model_config = mock_model_config )
302302 evaluator ._flow = MagicMock (side_effect = flow_side_effect )
303303
@@ -331,21 +331,7 @@ def test_evaluate_tools_some_not_applicable(self, mock_model_config):
331331 }
332332 },
333333 },
334- },
335- {
336- "name" : "buy_jacket" ,
337- "type" : "another_built_in" , # This tool will be filtered out
338- "description" : "Buy a jacket of the given type." ,
339- "parameters" : {
340- "type" : "object" ,
341- "properties" : {
342- "type" : {
343- "type" : "string" ,
344- "description" : "The type of jacket to buy." ,
345- }
346- },
347- },
348- },
334+ }, # buy_jacket definition is missing
349335 ]
350336 result = evaluator (query = query , tool_calls = tool_calls , tool_definitions = tool_definitions )
351337
@@ -357,7 +343,7 @@ def test_evaluate_tools_some_not_applicable(self, mock_model_config):
357343 assert result [f"{ key } _reason" ] == ToolCallAccuracyEvaluator ._TOOL_DEFINITIONS_MISSING_MESSAGE
358344 assert result [f"{ key } _details" ] == {}
359345
360- def test_evaluate_tools_all_not_applicable (self , mock_model_config ):
346+ def test_evaluate_tools_built_in_tool_definition (self , mock_model_config ):
361347 evaluator = ToolCallAccuracyEvaluator (model_config = mock_model_config )
362348 evaluator ._flow = MagicMock (side_effect = flow_side_effect )
363349
@@ -374,7 +360,7 @@ def test_evaluate_tools_all_not_applicable(self, mock_model_config):
374360 tool_definitions = [
375361 {
376362 "name" : "fetch_weather" ,
377- "type" : "some_built_in" , # Not a 'function' type
363+ "type" : "some_built_in" , # Not a 'function' type but shouldn't be filtered out
378364 "description" : "Fetches the weather information for the specified location." ,
379365 "parameters" : {
380366 "type" : "object" ,
@@ -391,11 +377,13 @@ def test_evaluate_tools_all_not_applicable(self, mock_model_config):
391377
392378 key = ToolCallAccuracyEvaluator ._RESULT_KEY
393379 assert result is not None
394- assert result [key ] == ToolCallAccuracyEvaluator ._NOT_APPLICABLE_RESULT
380+ assert key in result and f"{ key } _result" in result and f"{ key } _threshold" in result
381+ assert result [key ] == 5.0 # All good gets score 5
395382 assert result [f"{ key } _result" ] == "pass"
396383 assert result [f"{ key } _threshold" ] == ToolCallAccuracyEvaluator ._DEFAULT_TOOL_CALL_ACCURACY_SCORE
397- assert result [f"{ key } _reason" ] == ToolCallAccuracyEvaluator ._TOOL_DEFINITIONS_MISSING_MESSAGE
398- assert result [f"{ key } _details" ] == {}
384+ assert f"{ key } _reason" in result
385+ assert result [f"{ key } _reason" ] == "Evaluated 1 tool calls with 1 correct calls."
386+ assert f"{ key } _details" in result
399387
400388 def test_evaluate_tools_no_tools (self , mock_model_config ):
401389 evaluator = ToolCallAccuracyEvaluator (model_config = mock_model_config )
0 commit comments