diff --git a/examples/smolagents_benchmark/run.py b/examples/smolagents_benchmark/run.py index 424ed4a77..aa8e48570 100644 --- a/examples/smolagents_benchmark/run.py +++ b/examples/smolagents_benchmark/run.py @@ -60,7 +60,6 @@ def parse_arguments(): parser.add_argument( "--provider", type=str, - default="hf-inference", help="The provider for InferenceClientModel - will not be used for LiteLLMModel", ) parser.add_argument( @@ -135,9 +134,12 @@ def answer_single_question(example, model, answers_file, action_type): ) elif action_type == "tool-calling": agent = ToolCallingAgent( - tools=[GoogleSearchTool(provider="serper"), VisitWebpageTool(), PythonInterpreterTool()], + tools=[ + GoogleSearchTool(provider="serper"), + VisitWebpageTool(), + PythonInterpreterTool(authorized_imports=["numpy", "sympy"]), + ], model=model, - additional_authorized_imports=["numpy", "sympy"], max_steps=10, ) @@ -158,7 +160,7 @@ def answer_single_question(example, model, answers_file, action_type): # Run agent 🚀 answer = str(agent.run(augmented_question)) token_counts = agent.monitor.get_total_token_counts() - intermediate_steps = [dict(message) for message in agent.write_memory_to_messages()] + intermediate_steps = [message.dict() for message in agent.write_memory_to_messages()] end_time = time.time() except Exception as e: diff --git a/examples/smolagents_benchmark/score.ipynb b/examples/smolagents_benchmark/score.ipynb index d81906481..7d8709498 100644 --- a/examples/smolagents_benchmark/score.ipynb +++ b/examples/smolagents_benchmark/score.ipynb @@ -45,15 +45,15 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ + "import datetime\n", "import re\n", "import string\n", "import warnings\n", "from concurrent.futures import ThreadPoolExecutor, as_completed\n", - "from datetime import datetime\n", "\n", "import numpy as np\n", "from tqdm import tqdm\n",