diff --git a/examples/smolagents_benchmark/run.py b/examples/smolagents_benchmark/run.py
index 424ed4a77..aa8e48570 100644
--- a/examples/smolagents_benchmark/run.py
+++ b/examples/smolagents_benchmark/run.py
@@ -60,7 +60,6 @@ def parse_arguments():
     parser.add_argument(
         "--provider",
         type=str,
-        default="hf-inference",
         help="The provider for InferenceClientModel - will not be used for LiteLLMModel",
     )
     parser.add_argument(
@@ -135,9 +134,12 @@ def answer_single_question(example, model, answers_file, action_type):
         )
     elif action_type == "tool-calling":
         agent = ToolCallingAgent(
-            tools=[GoogleSearchTool(provider="serper"), VisitWebpageTool(), PythonInterpreterTool()],
+            tools=[
+                GoogleSearchTool(provider="serper"),
+                VisitWebpageTool(),
+                PythonInterpreterTool(authorized_imports=["numpy", "sympy"]),
+            ],
             model=model,
-            additional_authorized_imports=["numpy", "sympy"],
             max_steps=10,
         )
 
@@ -158,7 +160,7 @@ def answer_single_question(example, model, answers_file, action_type):
             # Run agent 🚀
             answer = str(agent.run(augmented_question))
             token_counts = agent.monitor.get_total_token_counts()
-            intermediate_steps = [dict(message) for message in agent.write_memory_to_messages()]
+            intermediate_steps = [message.dict() for message in agent.write_memory_to_messages()]
 
         end_time = time.time()
     except Exception as e:
diff --git a/examples/smolagents_benchmark/score.ipynb b/examples/smolagents_benchmark/score.ipynb
index d81906481..7d8709498 100644
--- a/examples/smolagents_benchmark/score.ipynb
+++ b/examples/smolagents_benchmark/score.ipynb
@@ -45,15 +45,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
+    "import datetime\n",
     "import re\n",
     "import string\n",
     "import warnings\n",
     "from concurrent.futures import ThreadPoolExecutor, as_completed\n",
-    "from datetime import datetime\n",
     "\n",
     "import numpy as np\n",
     "from tqdm import tqdm\n",