diff --git a/.github/workflows/quality.yml b/.github/workflows/quality.yml
index 774fa5296..b8808567e 100644
--- a/.github/workflows/quality.yml
+++ b/.github/workflows/quality.yml
@@ -2,6 +2,9 @@ name: Quality Check
 
 on: [pull_request]
 
+env:
+  UV_SYSTEM_PYTHON: 1
+
 jobs:
   check_code_quality:
     runs-on: ubuntu-latest
@@ -16,15 +19,13 @@ jobs:
           python-version: "3.12"
 
       # Setup venv
-      - name: Setup venv + uv
+      - name: Setup uv
         run: |
           pip install --upgrade uv
-          uv venv
 
       - name: Install dependencies
         run: uv pip install "smolagents[quality] @ ."
 
       # Equivalent of "make quality" but step by step
-      - run: uv run ruff check examples src tests utils # linter
-      - run: uv run ruff format --check examples src tests utils # formatter
-      - run: uv run python utils/check_tests_in_ci.py
\ No newline at end of file
+      - run: ruff check examples src tests  # linter
+      - run: ruff format --check examples src tests  # formatter
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index c16a90a72..12a794c7b 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -1,6 +1,13 @@
 name: Python tests
 
-on: [pull_request]
+on:
+  pull_request:
+  push:
+    branches:
+      - ci-*
+
+env:
+  UV_SYSTEM_PYTHON: 1
 
 jobs:
   build-ubuntu:
@@ -21,99 +28,16 @@ jobs:
           python-version: ${{ matrix.python-version }}
 
       # Setup venv
-      - name: Setup venv + uv
+      - name: Setup uv
         run: |
           pip install --upgrade uv
-          uv venv
 
       # Install dependencies
       - name: Install dependencies
         run: |
           uv pip install "smolagents[test] @ ."
 
-      # Run all tests separately for individual feedback
-      # Use 'if success() || failure()' so that all tests are run even if one failed
-      # See https://stackoverflow.com/a/62112985
-      - name: Import tests
-        run: |
-          uv run pytest ./tests/test_import.py
-        if: ${{ success() || failure() }}
-
-      - name: Agent tests
-        run: |
-          uv run pytest ./tests/test_agents.py
-        if: ${{ success() || failure() }}
-
-      - name: Default tools tests
-        run: |
-          uv run pytest ./tests/test_default_tools.py
-        if: ${{ success() || failure() }}
-
-      # - name: Docs tests # Disabled for now (slow test + requires API keys)
-      #   run: |
-      #     uv run pytest ./tests/test_all_docs.py
-
-      - name: Final answer tests
-        run: |
-          uv run pytest ./tests/test_final_answer.py
-        if: ${{ success() || failure() }}
-
-      - name: Models tests
-        run: |
-          uv run pytest ./tests/test_models.py
-        if: ${{ success() || failure() }}
-
-      - name: Memory tests
-        run: |
-          uv run pytest ./tests/test_memory.py
-        if: ${{ success() || failure() }}
-
-      - name: Monitoring tests
-        run: |
-          uv run pytest ./tests/test_monitoring.py
-        if: ${{ success() || failure() }}
-
-      - name: Local Python executor tests
-        run: |
-          uv run pytest ./tests/test_local_python_executor.py
-        if: ${{ success() || failure() }}
-
-      - name: E2B executor tests
-        run: |
-          uv run pytest ./tests/test_e2b_executor.py
-        if: ${{ success() || failure() }}
-
-      - name: Search tests
-        run: |
-          uv run pytest ./tests/test_search.py
-        if: ${{ success() || failure() }}
-
-      - name: Tools tests
-        run: |
-          uv run pytest ./tests/test_tools.py
-        if: ${{ success() || failure() }}
-
-      - name: Tool validation tests
-        run: |
-          uv run pytest ./tests/test_tool_validation.py
-        if: ${{ success() || failure() }}
-
-      - name: Types tests
-        run: |
-          uv run pytest ./tests/test_types.py
-        if: ${{ success() || failure() }}
-
-      - name: Utils tests
-        run: |
-          uv run pytest ./tests/test_utils.py
-        if: ${{ success() || failure() }}
-
-      - name: Gradio UI tests
-        run: |
-          uv run pytest ./tests/test_gradio_ui.py
-        if: ${{ success() || failure() }}
-
-      - name: Function type hints utils tests
+      # Run tests
+      - name: Test with pytest
         run: |
-          uv run pytest ./tests/test_function_type_hints_utils.py
-        if: ${{ success() || failure() }}
+          pytest ./tests/
diff --git a/.gitignore b/.gitignore
index 59bba3ae6..b18528112 100644
--- a/.gitignore
+++ b/.gitignore
@@ -150,3 +150,6 @@ archive/
 savedir/
 output/
 tool_output/
+
+# Gradio runtime
+.gradio/
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
deleted file mode 100644
index a4ff4b8b2..000000000
--- a/Dockerfile
+++ /dev/null
@@ -1,29 +0,0 @@
-# Base Python image
-FROM python:3.12-slim
-
-# Set working directory
-WORKDIR /app
-
-# Install build dependencies
-RUN apt-get update && apt-get install -y \
-    build-essential \
-    zlib1g-dev \
-    libjpeg-dev \
-    libpng-dev \
-    && rm -rf /var/lib/apt/lists/*
-
-# Copy package files
-COPY . /app/
-
-# Install dependencies
-RUN pip install --no-cache-dir -r requirements.txt
-
-# Install the package
-RUN pip install -e .
-
-COPY server.py /app/server.py
-
-# Expose the port your server will run on
-EXPOSE 65432
-
-CMD ["python", "/app/server.py"]
diff --git a/Makefile b/Makefile
index c8e7c04f6..01bb05690 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-.PHONY: quality style test docs utils
+.PHONY: quality style test docs
 
 check_dirs := examples src tests utils
 
@@ -6,7 +6,6 @@ check_dirs := examples src tests utils
 quality:
 	ruff check $(check_dirs)
 	ruff format --check $(check_dirs)
-	python utils/check_tests_in_ci.py
 
 # Format source code automatically
 style:
diff --git a/README.md b/README.md
index fb853b06e..9d90637fb 100644
--- a/README.md
+++ b/README.md
@@ -26,7 +26,7 @@ limitations under the License.
 <h3 align="center">
   <div style="display:flex;flex-direction:row;">
     <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/smolagents.png" alt="Hugging Face mascot as James Bond" width=400px>
-    <p>A smol library to build great agents!</p>
+    <p>Agents that think in code!</p>
   </div>
 </h3>
 
@@ -34,15 +34,15 @@ limitations under the License.
 
 ✨ **Simplicity**: the logic for agents fits in ~1,000 lines of code (see [agents.py](https://github.com/huggingface/smolagents/blob/main/src/smolagents/agents.py)). We kept abstractions to their minimal shape above raw code!
 
-🧑‍💻 **First-class support for Code Agents**. Our [`CodeAgent`](https://huggingface.co/docs/smolagents/reference/agents#smolagents.CodeAgent) writes its actions in code (as opposed to "agents being used to write code"). To make it secure, we support executing in sandboxed environments via [E2B](https://e2b.dev/).
+🧑‍💻 **First-class support for Code Agents**. Our [`CodeAgent`](https://huggingface.co/docs/smolagents/reference/agents#smolagents.CodeAgent) writes its actions in code (as opposed to "agents being used to write code"). To make it secure, we support executing in sandboxed environments via [E2B](https://e2b.dev/) or via Docker.
 
-🤗 **Hub integrations**: you can [share/pull tools to/from the Hub](https://huggingface.co/docs/smolagents/reference/tools#smolagents.Tool.from_hub), and more is to come!
+🤗 **Hub integrations**: you can [share/pull tools or agents to/from the Hub](https://huggingface.co/docs/smolagents/reference/tools#smolagents.Tool.from_hub) for instant sharing of the most efficient agents!
 
 🌐 **Model-agnostic**: smolagents supports any LLM. It can be a local `transformers` or `ollama` model, one of [many providers on the Hub](https://huggingface.co/blog/inference-providers), or any model from OpenAI, Anthropic and many others via our [LiteLLM](https://www.litellm.ai/) integration.
 
 👁️ **Modality-agnostic**: Agents support text, vision, video, even audio inputs! Cf [this tutorial](https://huggingface.co/docs/smolagents/examples/web_browser) for vision.
 
-🛠️ **Tool-agnostic**: you can use tools from [LangChain](https://huggingface.co/docs/smolagents/reference/tools#smolagents.Tool.from_langchain), [Anthropic's MCP](https://huggingface.co/docs/smolagents/reference/tools#smolagents.ToolCollection.from_mcp), you can even use a [Hub Space](https://huggingface.co/docs/smolagents/reference/tools#smolagents.Tool.from_space) as a tool.
+🛠️ **Tool-agnostic**: you can use tools from [LangChain](https://huggingface.co/docs/smolagents/reference/tools#smolagents.Tool.from_langchain), [MCP](https://huggingface.co/docs/smolagents/reference/tools#smolagents.ToolCollection.from_mcp), you can even use a [Hub Space](https://huggingface.co/docs/smolagents/reference/tools#smolagents.Tool.from_space) as a tool.
 
 Full documentation can be found [here](https://huggingface.co/docs/smolagents/index).
 
@@ -57,9 +57,9 @@ pip install smolagents
 ```
 Then define your agent, give it the tools it needs and run it!
 ```py
-from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel
+from smolagents import CodeAgent, DuckDuckGoSearchTool, InferenceClientModel
 
-model = HfApiModel()
+model = InferenceClientModel()
 agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model)
 
 agent.run("How many seconds would it take for a leopard at full speed to run through Pont des Arts?")
@@ -67,7 +67,7 @@ agent.run("How many seconds would it take for a leopard at full speed to run thr
 
 https://github.com/user-attachments/assets/cd0226e2-7479-4102-aea0-57c22ca47884
 
-You can even share your agent to hub:
+You can even share your agent to the Hub, as a Space repository:
 ```py
 agent.push_to_hub("m-ric/my_agent")
 
@@ -77,12 +77,12 @@ agent.push_to_hub("m-ric/my_agent")
 Our library is LLM-agnostic: you could switch the example above to any inference provider.
 
 <details>
-<summary> <b>HfApiModel, gateway for 4 inference providers</b></summary>
+<summary> <b>InferenceClientModel, gateway for all <a href="https://huggingface.co/docs/inference-providers/index">inference providers</a> supported on HF</b></summary>
 
 ```py
-from smolagents import HfApiModel
+from smolagents import InferenceClientModel
 
-model = HfApiModel(
+model = InferenceClientModel(
     model_id="deepseek-ai/DeepSeek-R1",
     provider="together",
 )
@@ -95,7 +95,7 @@ model = HfApiModel(
 from smolagents import LiteLLMModel
 
 model = LiteLLMModel(
-    "anthropic/claude-3-5-sonnet-latest",
+    model_id="anthropic/claude-3-5-sonnet-latest",
     temperature=0.2,
     api_key=os.environ["ANTHROPIC_API_KEY"]
 )
@@ -143,6 +143,18 @@ model = AzureOpenAIServerModel(
 )
 ```
 </details>
+<details>
+<summary> <b>Amazon Bedrock models</b></summary>
+
+```py
+import os
+from smolagents import AmazonBedrockServerModel
+
+model = AmazonBedrockServerModel(
+    model_id = os.environ.get("AMAZON_BEDROCK_MODEL_ID") 
+)
+```
+</details>
 
 ## CLI
 
@@ -151,7 +163,7 @@ You can run agents from CLI using two commands: `smolagent` and `webagent`.
 `smolagent` is a generalist command to run a multi-step `CodeAgent` that can be equipped with various tools.
 
 ```bash
-smolagent "Plan a trip to Tokyo, Kyoto and Osaka between Mar 28 and Apr 7."  --model-type "HfApiModel" --model-id "Qwen/Qwen2.5-Coder-32B-Instruct" --imports "pandas numpy" --tools "web_search"
+smolagent "Plan a trip to Tokyo, Kyoto and Osaka between Mar 28 and Apr 7."  --model-type "InferenceClientModel" --model-id "Qwen/Qwen2.5-Coder-32B-Instruct" --imports "pandas numpy" --tools "web_search"
 ```
 
 Meanwhile `webagent` is a specific web-browsing agent using [helium](https://github.com/mherrmann/helium) (read more [here](https://github.com/huggingface/smolagents/blob/main/src/smolagents/vision_web_browser.py)).
@@ -201,7 +213,7 @@ Writing actions as code snippets is demonstrated to work better than the current
 
 Especially, since code execution can be a security concern (arbitrary code execution!), we provide options at runtime:
   - a secure python interpreter to run code more safely in your environment (more secure than raw code execution but still risky)
-  - a sandboxed environment using [E2B](https://e2b.dev/) (removes the risk to your own system).
+  - a sandboxed environment using [E2B](https://e2b.dev/) or Docker (removes the risk to your own system).
 
 On top of this [`CodeAgent`](https://huggingface.co/docs/smolagents/reference/agents#smolagents.CodeAgent) class, we still support the standard [`ToolCallingAgent`](https://huggingface.co/docs/smolagents/reference/agents#smolagents.ToolCallingAgent) that writes actions as JSON/text blobs. But we recommend always using `CodeAgent`.
 
@@ -216,7 +228,7 @@ By the way, why use a framework at all? Well, because a big part of this stuff i
 
 We've created [`CodeAgent`](https://huggingface.co/docs/smolagents/reference/agents#smolagents.CodeAgent) instances with some leading models, and compared them on [this benchmark](https://huggingface.co/datasets/m-ric/agents_medium_benchmark_2) that gathers questions from a few different benchmarks to propose a varied blend of challenges.
 
-[Find the benchmarking code here](https://github.com/huggingface/smolagents/blob/main/examples/benchmark.ipynb) for more detail on the agentic setup used, and see a comparison of using LLMs code agents compared to vanilla (spoilers: code agents works better).
+[Find the benchmarking code here](https://github.com/huggingface/smolagents/blob/main/examples/smolagents_benchmark/run.py) for more detail on the agentic setup used, and see a comparison of using LLMs code agents compared to vanilla (spoilers: code agents works better).
 
 <p align="center">
     <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/benchmark_code_agents.jpeg" alt="benchmark of different models on agentic workflows. Open model DeepSeek-R1 beats closed-source models." width=60% max-width=500px>
@@ -224,6 +236,14 @@ We've created [`CodeAgent`](https://huggingface.co/docs/smolagents/reference/age
 
 This comparison shows that open-source models can now take on the best closed models!
 
+## Security
+
+Security is a critical consideration when working with code-executing agents. Our library provides:
+- Sandboxed execution options using [E2B](https://e2b.dev/) or Docker
+- Best practices for running agent code securely
+
+For security policies, vulnerability reporting, and more information on secure agent execution, please see our [Security Policy](SECURITY.md).
+
 ## Contribute
 
 Everyone is welcome to contribute, get started with our [contribution guide](https://github.com/huggingface/smolagents/blob/main/CONTRIBUTING.md).
diff --git a/SECURITY.md b/SECURITY.md
new file mode 100644
index 000000000..0a55a5631
--- /dev/null
+++ b/SECURITY.md
@@ -0,0 +1,9 @@
+# Security Policy
+
+## Reporting a Vulnerability
+
+To report a security vulnerability, please contact: security@huggingface.co
+
+## Learning More About Security
+
+To learn more about running agents more securely, please see the [Secure Code Execution tutorial](docs/source/en/tutorials/secure_code_execution.mdx) which covers sandboxing with E2B and Docker.
diff --git a/docs/README.md b/docs/README.md
index be716450b..af4b61c6c 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -121,10 +121,6 @@ Adding a new tutorial or section is done in two steps:
 
 Make sure to put your new file under the proper section. If you have a doubt, feel free to ask in a Github Issue or PR.
 
-### Translating
-
-When translating, refer to the guide at [./TRANSLATING.md](https://github.com/huggingface/smolagents/blob/main/docs/TRANSLATING.md).
-
 ### Writing source documentation
 
 Values that should be put in `code` should either be surrounded by backticks: \`like so\`. Note that argument names
@@ -271,4 +267,5 @@ is to be used in inference and also include the expected (ideally sensible)
 output.
 Often, readers will try out the example before even going through the function
 or class definitions. Therefore, it is of utmost importance that the example
-works as expected.
\ No newline at end of file
+works as expected.
+
diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml
index c1efd31dc..c5c2a9a93 100644
--- a/docs/source/en/_toctree.yml
+++ b/docs/source/en/_toctree.yml
@@ -13,7 +13,7 @@
   - local: tutorials/tools
     title: 🛠️ Tools - in-depth guide
   - local: tutorials/secure_code_execution
-    title: 🛡️ Secure your code execution with E2B
+    title: 🛡️ Secure code execution
   - local: tutorials/memory
     title: 📚 Manage your agent's memory
 - title: Conceptual guides
@@ -27,7 +27,7 @@
   - local: examples/text_to_sql
     title: Self-correcting Text-to-SQL
   - local: examples/rag
-    title: Master you knowledge base with agentic RAG
+    title: Master your knowledge base with agentic RAG
   - local: examples/multiagents
     title: Orchestrate a multi-agent system
   - local: examples/web_browser
diff --git a/docs/source/en/conceptual_guides/intro_agents.mdx b/docs/source/en/conceptual_guides/intro_agents.mdx
index ca5ad31c5..ef76b103e 100644
--- a/docs/source/en/conceptual_guides/intro_agents.mdx
+++ b/docs/source/en/conceptual_guides/intro_agents.mdx
@@ -1,18 +1,3 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
 # Introduction to Agents
 
 ## 🤔 What are agents?
@@ -28,13 +13,14 @@ Note that with this definition, "agent" is not a discrete, 0 or 1 definition: in
 
 See in the table below how agency can vary across systems:
 
-| Agency Level | Description                                             | How that's called | Example Pattern                                    |
-| ------------ | ------------------------------------------------------- | ----------------- | -------------------------------------------------- |
-| ☆☆☆          | LLM output has no impact on program flow                | Simple Processor  | `process_llm_output(llm_response)`                 |
-| ★☆☆          | LLM output determines an if/else switch                 | Router            | `if llm_decision(): path_a() else: path_b()`       |
-| ★★☆          | LLM output determines function execution                | Tool Caller       | `run_function(llm_chosen_tool, llm_chosen_args)`   |
-| ★★★          | LLM output controls iteration and program continuation  | Multi-step Agent  | `while llm_should_continue(): execute_next_step()` |
-| ★★★          | One agentic workflow can start another agentic workflow | Multi-Agent       | `if llm_trigger(): execute_agent()`                |
+| Agency Level | Description                                            | Short name       | Example Code                                       |
+| ------------ | ------------------------------------------------------ | ---------------- | -------------------------------------------------- |
+| ☆☆☆ | LLM output has no impact on program flow                        | Simple processor | `process_llm_output(llm_response)`                 |
+| ★☆☆ | LLM output controls an if/else switch                           | Router           | `if llm_decision(): path_a() else: path_b()`       |
+| ★★☆ | LLM output controls function execution                          | Tool call        | `run_function(llm_chosen_tool, llm_chosen_args)`   |
+| ★★☆ | LLM output controls iteration and program continuation          | Multi-step Agent | `while llm_should_continue(): execute_next_step()` |
+| ★★★ | One agentic workflow can start another agentic workflow         | Multi-Agent      | `if llm_trigger(): execute_agent()`                |
+| ★★★ | LLM acts in code, can define its own tools / start other agents | Code Agents      | `def custom_tool(args): ...`                       |
 
 The multi-step agent has this code structure:
 
diff --git a/docs/source/en/conceptual_guides/react.mdx b/docs/source/en/conceptual_guides/react.mdx
index b86c438e2..6358c78fd 100644
--- a/docs/source/en/conceptual_guides/react.mdx
+++ b/docs/source/en/conceptual_guides/react.mdx
@@ -1,18 +1,3 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
 # How do multi-step agents work?
 
 The ReAct framework ([Yao et al., 2022](https://huggingface.co/papers/2210.03629)) is currently the main approach to building agents.
diff --git a/docs/source/en/examples/multiagents.mdx b/docs/source/en/examples/multiagents.mdx
index 4f41fe8e6..4e43f99f5 100644
--- a/docs/source/en/examples/multiagents.mdx
+++ b/docs/source/en/examples/multiagents.mdx
@@ -1,18 +1,3 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
 # Orchestrate a multi-agent system 🤖🤝🤖
 
 [[open-in-colab]]
@@ -39,19 +24,19 @@ Let's set up this system.
 
 Run the line below to install the required dependencies:
 
-```
-!pip install markdownify duckduckgo-search smolagents --upgrade -q
+```py
+! pip install markdownify duckduckgo-search smolagents --upgrade -q
 ```
 
-Let's login in order to call the HF Inference API:
+Let's login to HF in order to call Inference Providers:
 
-```
+```py
 from huggingface_hub import login
 
 login()
 ```
 
-⚡️ Our agent will be powered by [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct) using `HfApiModel` class that uses HF's Inference API: the Inference API allows to quickly and easily run any OS model.
+⚡️ Our agent will be powered by [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct) using `InferenceClientModel` class that uses HF's Inference API: the Inference API allows to quickly and easily run any OS model.
 
 _Note:_ The Inference API hosts models based on various criteria, and deployed models may be updated or replaced without prior notice. Learn more about it [here](https://huggingface.co/docs/api-inference/supported-models).
 
@@ -123,19 +108,19 @@ Which configuration to choose for this agent?
 from smolagents import (
     CodeAgent,
     ToolCallingAgent,
-    HfApiModel,
+    InferenceClientModel,
     DuckDuckGoSearchTool,
     LiteLLMModel,
 )
 
-model = HfApiModel(model_id)
+model = InferenceClientModel(model_id=model_id)
 
 web_agent = ToolCallingAgent(
     tools=[DuckDuckGoSearchTool(), visit_webpage],
     model=model,
     max_steps=10,
-    name="search",
-    description="Runs web searches for you. Give it your query as an argument.",
+    name="web_search_agent",
+    description="Runs web searches for you.",
 )
 ```
 
diff --git a/docs/source/en/examples/rag.mdx b/docs/source/en/examples/rag.mdx
index eb1c4c27f..212d38cb7 100644
--- a/docs/source/en/examples/rag.mdx
+++ b/docs/source/en/examples/rag.mdx
@@ -1,18 +1,3 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
 # Agentic RAG
 
 [[open-in-colab]]
@@ -37,7 +22,7 @@ Run the line below to install required dependencies:
 ```bash
 !pip install smolagents pandas langchain langchain-community sentence-transformers datasets python-dotenv rank_bm25 --upgrade -q
 ```
-To call the HF Inference API, you will need a valid token as your environment variable `HF_TOKEN`.
+To call Inference Providers, you will need a valid token as your environment variable `HF_TOKEN`.
 We use python-dotenv to load it.
 ```py
 from dotenv import load_dotenv
@@ -127,13 +112,13 @@ The agent will need these arguments upon initialization:
 - `model`: the LLM that powers the agent.
 Our `model` must be a callable that takes as input a list of messages and returns text. It also needs to accept a stop_sequences argument that indicates when to stop its generation. For convenience, we directly use the HfEngine class provided in the package to get a LLM engine that calls Hugging Face's Inference API.
 
->[!NOTE] To use a specific model, pass it like this: `HfApiModel("meta-llama/Llama-3.3-70B-Instruct")`. The Inference API hosts models based on various criteria, and deployed models may be updated or replaced without prior notice. Learn more about it [here](https://huggingface.co/docs/api-inference/supported-models).
+>[!NOTE] To use a specific model, pass it like this: `InferenceClientModel(model_id="meta-llama/Llama-3.3-70B-Instruct")`. The Inference API hosts models based on various criteria, and deployed models may be updated or replaced without prior notice. Learn more about it [here](https://huggingface.co/docs/api-inference/supported-models).
 
 ```py
-from smolagents import HfApiModel, CodeAgent
+from smolagents import InferenceClientModel, CodeAgent
 
 agent = CodeAgent(
-    tools=[retriever_tool], model=HfApiModel(), max_steps=4, verbosity_level=2
+    tools=[retriever_tool], model=InferenceClientModel(), max_steps=4, verbosity_level=2
 )
 ```
 Upon initializing the CodeAgent, it has been automatically given a default system prompt that tells the LLM engine to process step-by-step and generate tool calls as code snippets, but you could replace this prompt template with your own as needed.
diff --git a/docs/source/en/examples/text_to_sql.mdx b/docs/source/en/examples/text_to_sql.mdx
index 600d8d95c..5cd93479c 100644
--- a/docs/source/en/examples/text_to_sql.mdx
+++ b/docs/source/en/examples/text_to_sql.mdx
@@ -1,18 +1,3 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
 # Text-to-SQL
 
 [[open-in-colab]]
@@ -31,7 +16,7 @@ Run the line below to install required dependencies:
 ```bash
 !pip install smolagents python-dotenv sqlalchemy --upgrade -q
 ```
-To call the HF Inference API, you will need a valid token as your environment variable `HF_TOKEN`.
+To call Inference Providers, you will need a valid token as your environment variable `HF_TOKEN`.
 We use python-dotenv to load it.
 ```py
 from dotenv import load_dotenv
@@ -137,14 +122,14 @@ Now let us create an agent that leverages this tool.
 
 We use the `CodeAgent`, which is smolagents’ main agent class: an agent that writes actions in code and can iterate on previous output according to the ReAct framework.
 
-The model is the LLM that powers the agent system. `HfApiModel` allows you to call LLMs using HF’s Inference API, either via Serverless or Dedicated endpoint, but you could also use any proprietary API.
+The model is the LLM that powers the agent system. `InferenceClientModel` allows you to call LLMs using HF’s Inference API, either via Serverless or Dedicated endpoint, but you could also use any proprietary API.
 
 ```py
-from smolagents import CodeAgent, HfApiModel
+from smolagents import CodeAgent, InferenceClientModel
 
 agent = CodeAgent(
     tools=[sql_engine],
-    model=HfApiModel("meta-llama/Meta-Llama-3.1-8B-Instruct"),
+    model=InferenceClientModel(model_id="meta-llama/Meta-Llama-3.1-8B-Instruct"),
 )
 agent.run("Can you give me the name of the client who got the most expensive receipt?")
 ```
@@ -197,7 +182,7 @@ sql_engine.description = updated_description
 
 agent = CodeAgent(
     tools=[sql_engine],
-    model=HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct"),
+    model=InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
 )
 
 agent.run("Which waiter got more total money from tips?")
diff --git a/docs/source/en/examples/web_browser.mdx b/docs/source/en/examples/web_browser.mdx
index fe2fc67de..1f464be9a 100644
--- a/docs/source/en/examples/web_browser.mdx
+++ b/docs/source/en/examples/web_browser.mdx
@@ -111,11 +111,11 @@ def save_screenshot(memory_step: ActionStep, agent: CodeAgent) -> None:
 Now let's create our web automation agent:
 
 ```python
-from smolagents import HfApiModel
+from smolagents import InferenceClientModel
 
 # Initialize the model
 model_id = "meta-llama/Llama-3.3-70B-Instruct"  # You can change this to your preferred model
-model = HfApiModel(model_id)
+model = InferenceClientModel(model_id=model_id)
 
 # Create the agent
 agent = CodeAgent(
diff --git a/docs/source/en/guided_tour.mdx b/docs/source/en/guided_tour.mdx
index 5eca7fc21..01e247357 100644
--- a/docs/source/en/guided_tour.mdx
+++ b/docs/source/en/guided_tour.mdx
@@ -1,18 +1,3 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
 # Agents - Guided tour
 
 [[open-in-colab]]
@@ -25,28 +10,29 @@ To initialize a minimal agent, you need at least these two arguments:
 
 - `model`, a text-generation model to power your agent - because the agent is different from a simple LLM, it is a system that uses a LLM as its engine. You can use any of these options:
     - [`TransformersModel`] takes a pre-initialized `transformers` pipeline to run inference on your local machine using `transformers`.
-    - [`HfApiModel`] leverages a `huggingface_hub.InferenceClient` under the hood and supports all Inference Providers on the Hub.
+    - [`InferenceClientModel`] leverages a `huggingface_hub.InferenceClient` under the hood and supports all Inference Providers on the Hub: Cerebras, Cohere, Fal, Fireworks, HF-Inference, Hyperbolic, Nebius, Novita, Replicate, SambaNova, Together, and more.
     - [`LiteLLMModel`] similarly lets you call 100+ different models and providers through [LiteLLM](https://docs.litellm.ai/)!
     - [`AzureOpenAIServerModel`] allows you to use OpenAI models deployed in [Azure](https://azure.microsoft.com/en-us/products/ai-services/openai-service).
+    - [`AmazonBedrockServerModel`] allows you to use Amazon Bedrock in [AWS](https://aws.amazon.com/bedrock/?nc1=h_ls).
     - [`MLXModel`] creates a [mlx-lm](https://pypi.org/project/mlx-lm/) pipeline to run inference on your local machine.
 
 - `tools`, a list of `Tools` that the agent can use to solve the task. It can be an empty list. You can also add the default toolbox on top of your `tools` list by defining the optional argument `add_base_tools=True`.
 
-Once you have these two arguments, `tools` and `model`,  you can create an agent and run it. You can use any LLM you'd like, either through [Inference Providers](https://huggingface.co/blog/inference-providers), [transformers](https://github.com/huggingface/transformers/), [ollama](https://ollama.com/), [LiteLLM](https://www.litellm.ai/), [Azure OpenAI](https://azure.microsoft.com/en-us/products/ai-services/openai-service), or [mlx-lm](https://pypi.org/project/mlx-lm/).
+Once you have these two arguments, `tools` and `model`,  you can create an agent and run it. You can use any LLM you'd like, either through [Inference Providers](https://huggingface.co/blog/inference-providers), [transformers](https://github.com/huggingface/transformers/), [ollama](https://ollama.com/), [LiteLLM](https://www.litellm.ai/), [Azure OpenAI](https://azure.microsoft.com/en-us/products/ai-services/openai-service), [Amazon Bedrock](https://aws.amazon.com/bedrock/?nc1=h_ls), or [mlx-lm](https://pypi.org/project/mlx-lm/).
 
 <hfoptions id="Pick a LLM">
-<hfoption id="HF Inference API">
+<hfoption id="Inference Providers">
 
-HF Inference API is free to use without a token, but then it will have a rate limit.
+Inference Providers need a `HF_TOKEN` to authenticate, but a free HF account already comes with included credits. Upgrade to PRO to raise your included credits.
 
-To access gated models or rise your rate limits with a PRO account, you need to set the environment variable `HF_TOKEN` or pass `token` variable upon initialization of `HfApiModel`. You can get your token from your [settings page](https://huggingface.co/settings/tokens)
+To access gated models or rise your rate limits with a PRO account, you need to set the environment variable `HF_TOKEN` or pass `token` variable upon initialization of `InferenceClientModel`. You can get your token from your [settings page](https://huggingface.co/settings/tokens)
 
 ```python
-from smolagents import CodeAgent, HfApiModel
+from smolagents import CodeAgent, InferenceClientModel
 
 model_id = "meta-llama/Llama-3.3-70B-Instruct" 
 
-model = HfApiModel(model_id=model_id, token="<YOUR_HUGGINGFACEHUB_API_TOKEN>") # You can choose to not pass any model_id to HfApiModel to use a default free model
+model = InferenceClientModel(model_id=model_id, token="<YOUR_HUGGINGFACEHUB_API_TOKEN>") # You can choose to not pass any model_id to InferenceClientModel to use a default model
 # you can also specify a particular provider e.g. provider="together" or provider="sambanova"
 agent = CodeAgent(tools=[], model=model, add_base_tools=True)
 
@@ -149,6 +135,76 @@ agent.run(
 )
 ```
 
+</hfoption>
+<hfoption id="Amazon Bedrock">
+
+The `AmazonBedrockServerModel` class provides native integration with Amazon Bedrock, allowing for direct API calls and comprehensive configuration.
+
+#### Basic Usage
+
+```python
+# !pip install smolagents[aws_sdk]
+from smolagents import CodeAgent, AmazonBedrockServerModel
+
+model = AmazonBedrockServerModel(model_id="anthropic.claude-3-sonnet-20240229-v1:0")
+agent = CodeAgent(tools=[], model=model, add_base_tools=True)
+
+agent.run(
+    "Could you give me the 118th number in the Fibonacci sequence?",
+)
+```
+
+#### Advanced Configuration
+
+```python
+import boto3
+from smolagents import AmazonBedrockServerModel
+
+# Create a custom Bedrock client
+bedrock_client = boto3.client(
+    'bedrock-runtime',
+    region_name='us-east-1',
+    aws_access_key_id='YOUR_ACCESS_KEY',
+    aws_secret_access_key='YOUR_SECRET_KEY'
+)
+
+additional_api_config = {
+    "inferenceConfig": {
+        "maxTokens": 3000
+    },
+    "guardrailConfig": {
+        "guardrailIdentifier": "identify1",
+        "guardrailVersion": 'v1'
+    },
+}
+
+# Initialize with comprehensive configuration
+model = AmazonBedrockServerModel(
+    model_id="us.amazon.nova-pro-v1:0",
+    client=bedrock_client,  # Use custom client
+    **additional_api_config
+)
+
+agent = CodeAgent(tools=[], model=model, add_base_tools=True)
+
+agent.run(
+    "Could you give me the 118th number in the Fibonacci sequence?",
+)
+```
+
+#### Using LiteLLMModel
+
+Alternatively, you can use `LiteLLMModel` with Bedrock models:
+
+```python
+from smolagents import LiteLLMModel, CodeAgent
+
+model = LiteLLMModel(model_name="bedrock/anthropic.claude-3-sonnet-20240229-v1:0")
+agent = CodeAgent(tools=[], model=model)
+
+agent.run("Explain the concept of quantum computing")
+```
+
 </hfoption>
 <hfoption id="mlx-lm">
 
@@ -176,17 +232,22 @@ The Python interpreter also doesn't allow imports by default outside of a safe l
 You can authorize additional imports by passing the authorized modules as a list of strings in argument `additional_authorized_imports` upon initialization of your [`CodeAgent`]:
 
 ```py
-model = HfApiModel()
+model = InferenceClientModel()
 agent = CodeAgent(tools=[], model=model, additional_authorized_imports=['requests', 'bs4'])
 agent.run("Could you get me the title of the page at url 'https://huggingface.co/blog'?")
 ```
 
+Additionally, as an extra security layer, access to submodule is forbidden by default, unless explicitly authorized within the import list.
+For instance, to access the `numpy.random` submodule, you need to add `'numpy.random'` to the `additional_authorized_imports` list.
+This could also be authorized by using `numpy.*`, which will allow `numpy` as well as any subpackage like `numpy.random` and its own subpackages.
+
 > [!WARNING]
 > The LLM can generate arbitrary code that will then be executed: do not add any unsafe imports!
 
 The execution will stop at any code trying to perform an illegal operation or if there is a regular Python error with the code generated by the agent.
 
-You can also use [E2B code executor](https://e2b.dev/docs#what-is-e2-b) instead of a local Python interpreter by first [setting the `E2B_API_KEY` environment variable](https://e2b.dev/dashboard?tab=keys) and then passing `use_e2b_executor=True` upon agent initialization.
+You can also use [E2B code executor](https://e2b.dev/docs#what-is-e2-b) or Docker instead of a local Python interpreter. For E2B, first [set the `E2B_API_KEY` environment variable](https://e2b.dev/dashboard?tab=keys) and then pass `executor_type="e2b"` upon agent initialization. For Docker, pass `executor_type="docker"` during initialization.
+
 
 > [!TIP]
 > Learn more about code execution [in this tutorial](tutorials/secure_code_execution).
@@ -220,7 +281,7 @@ When the agent is initialized, the tool attributes are used to generate a tool d
 
 ### Default toolbox
 
-`smolagents` comes with a default toolbox for empowering agents, that you can add to your agent upon initialization with argument `add_base_tools = True`:
+`smolagents` comes with a default toolbox for empowering agents, that you can add to your agent upon initialization with argument `add_base_tools=True`:
 
 - **DuckDuckGo web search***: performs a web search using DuckDuckGo browser.
 - **Python code interpreter**: runs your LLM generated Python code in a secure environment. This tool will only be added to [`ToolCallingAgent`] if you initialize it with `add_base_tools=True`, since code-based agent can already natively execute Python code
@@ -279,6 +340,7 @@ The function needs:
 - A clear name. The name should be descriptive enough of what this tool does to help the LLM brain powering the agent. Since this tool returns the model with the most downloads for a task, let's name it `model_download_tool`.
 - Type hints on both inputs and output
 - A description, that includes an 'Args:' part where each argument is described (without a type indication this time, it will be pulled from the type hint). Same as for the tool name, this description is an instruction manual for the LLM powering you agent, so do not neglect it.
+
 All these elements will be automatically baked into the agent's system prompt upon initialization: so strive to make them as clear as possible!
 
 > [!TIP]
@@ -312,8 +374,8 @@ All these attributes will be automatically baked into the agent's system prompt
 
 Then you can directly initialize your agent:
 ```py
-from smolagents import CodeAgent, HfApiModel
-agent = CodeAgent(tools=[model_download_tool], model=HfApiModel())
+from smolagents import CodeAgent, InferenceClientModel
+agent = CodeAgent(tools=[model_download_tool], model=InferenceClientModel())
 agent.run(
     "Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub?"
 )
@@ -326,7 +388,7 @@ You get the following logs:
 │ Can you give me the name of the model that has the most downloads in the 'text-to-video' │
 │ task on the Hugging Face Hub?                                                            │
 │                                                                                          │
-╰─ HfApiModel - Qwen/Qwen2.5-Coder-32B-Instruct ───────────────────────────────────────────╯
+╰─ InferenceClientModel - Qwen/Qwen2.5-Coder-32B-Instruct ───────────────────────────────────────────╯
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ Step 0 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 ╭─ Executing this code: ───────────────────────────────────────────────────────────────────╮
 │   1 model_name = model_download_tool(task="text-to-video")                               │
@@ -364,9 +426,9 @@ Then you can pass this managed agent in the parameter managed_agents upon initia
 Here's an example of making an agent that managed a specific web search agent using our [`DuckDuckGoSearchTool`]:
 
 ```py
-from smolagents import CodeAgent, HfApiModel, DuckDuckGoSearchTool
+from smolagents import CodeAgent, InferenceClientModel, DuckDuckGoSearchTool
 
-model = HfApiModel()
+model = InferenceClientModel()
 
 web_agent = CodeAgent(
     tools=[DuckDuckGoSearchTool()],
@@ -394,14 +456,14 @@ You can use `GradioUI` to interactively submit tasks to your agent and observe i
 from smolagents import (
     load_tool,
     CodeAgent,
-    HfApiModel,
+    InferenceClientModel,
     GradioUI
 )
 
 # Import tool from Hub
 image_generation_tool = load_tool("m-ric/text-to-image", trust_remote_code=True)
 
-model = HfApiModel(model_id)
+model = InferenceClientModel(model_id=model_id)
 
 # Initialize the agent with the image generation tool
 agent = CodeAgent(tools=[image_generation_tool], model=model)
@@ -414,6 +476,9 @@ The `reset=False` flag means the agent's memory is not flushed before launching
 
 You can also use this `reset=False` argument to keep the conversation going in any other agentic application.
 
+In gradio UIs, if you want to allow users to interrupt a running agent, you could do this with a button that triggers method `agent.interrupt()`.
+This will stop the agent at the end of its current step, then raise an error.
+
 ## Next steps
 
 Finally, when you've configured your agent to your needs, you can share it to the Hub!
diff --git a/docs/source/en/index.mdx b/docs/source/en/index.mdx
index 14f80ff5b..97cc905fc 100644
--- a/docs/source/en/index.mdx
+++ b/docs/source/en/index.mdx
@@ -1,18 +1,3 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
--->
-
 # `smolagents`
 
 <div class="flex justify-center">
@@ -25,7 +10,7 @@ This library offers:
 
 ✨ **Simplicity**: the logic for agents fits in ~thousand lines of code. We kept abstractions to their minimal shape above raw code!
 
-🌐 **Support for any LLM**: it supports models hosted on the Hub loaded in their `transformers` version or through our inference API and Inference providers, but also models from OpenAI, Anthropic... it's really easy to power an agent with any LLM.
+🌐 **Support for any LLM**: it supports models hosted on the Hub loaded in their `transformers` version or through [Inference providers](https://huggingface.co/docs/inference-providers/index): Cerebras, Cohere, Fal, Fireworks, Hyperbolic, Nebius, Novita, Replicate, SambaNova, Together, etc. It also supports models from OpenAI, Anthropic... it's really easy to power an agent with any LLM.
 
 🧑‍💻 **First-class support for Code Agents**, i.e. agents that write their actions in code (as opposed to "agents being used to write code"), [read more here](tutorials/secure_code_execution).
 
diff --git a/docs/source/en/reference/agents.mdx b/docs/source/en/reference/agents.mdx
index a6f57183e..d8f975e34 100644
--- a/docs/source/en/reference/agents.mdx
+++ b/docs/source/en/reference/agents.mdx
@@ -1,18 +1,3 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
 # Agents
 
 <Tip warning={true}>
diff --git a/docs/source/en/reference/models.mdx b/docs/source/en/reference/models.mdx
index 2a7f8f45d..59816c60e 100644
--- a/docs/source/en/reference/models.mdx
+++ b/docs/source/en/reference/models.mdx
@@ -1,18 +1,3 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
 # Models
 
 <Tip warning={true}>
@@ -27,13 +12,17 @@ contains the API docs for the underlying classes.
 
 ## Models
 
+### Your custom Model
+
 You're free to create and use your own models to power your agent.
 
-You could use any `model` callable for your agent, as long as:
-1. It follows the [messages format](./chat_templating) (`List[Dict[str, str]]`) for its input `messages`, and it returns a `str`.
-2. It stops generating outputs *before* the sequences passed in the argument `stop_sequences`
+You could subclass the base `Model` class to create a model for your agent.
+The main criteria is to subclass the `generate` method, with these two criteria:
+1. It follows the [messages format](./chat_templating) (`List[Dict[str, str]]`) for its input `messages`, and it returns an object with a `.content` attribute.
+2. It stops generating outputs at the sequences passed in the argument `stop_sequences`.
 
-For defining your LLM, you can make a `custom_model` method which accepts a list of [messages](./chat_templating) and returns an object with a .content attribute containing the text. This callable also needs to accept a `stop_sequences` argument that indicates when to stop generating.
+For defining your LLM, you can make a `CustomModel` class that inherits from the base `Model` class.
+It should have a generate method that takes a list of [messages](./chat_templating) and returns an object with a .content attribute containing the text. The `generate` method also needs to accept a `stop_sequences` argument that indicates when to stop generating.
 
 ```python
 from huggingface_hub import login, InferenceClient
@@ -44,13 +33,16 @@ model_id = "meta-llama/Llama-3.3-70B-Instruct"
 
 client = InferenceClient(model=model_id)
 
-def custom_model(messages, stop_sequences=["Task"]):
-    response = client.chat_completion(messages, stop=stop_sequences, max_tokens=1000)
-    answer = response.choices[0].message
-    return answer
+class CustomModel(Model):
+    def generate(messages, stop_sequences=["Task"]):
+        response = client.chat_completion(messages, stop=stop_sequences, max_tokens=1024)
+        answer = response.choices[0].message
+        return answer
+
+custom_model = CustomModel()
 ```
 
-Additionally, `custom_model` can also take a `grammar` argument. In the case where you specify a `grammar` upon agent initialization, this argument will be passed to the calls to model, with the `grammar` that you defined upon initialization, to allow [constrained generation](https://huggingface.co/docs/text-generation-inference/conceptual/guidance) in order to force properly-formatted agent outputs.
+Additionally, `generate` can also take a `grammar` argument. In the case where you specify a `grammar` upon agent initialization, this argument will be passed to the calls to model, with the `grammar` that you defined upon initialization, to allow [constrained generation](https://huggingface.co/docs/text-generation-inference/conceptual/guidance) in order to force properly-formatted agent outputs.
 
 ### TransformersModel
 
@@ -72,24 +64,24 @@ print(model([{"role": "user", "content": [{"type": "text", "text": "Ok!"}]}], st
 
 [[autodoc]] TransformersModel
 
-### HfApiModel
+### InferenceClientModel
 
-The `HfApiModel` wraps huggingface_hub's [InferenceClient](https://huggingface.co/docs/huggingface_hub/main/en/guides/inference) for the execution of the LLM. It supports both HF's own [Inference API](https://huggingface.co/docs/api-inference/index) as well as all [Inference Providers](https://huggingface.co/blog/inference-providers) available on the Hub.
+The `HfApiModel` wraps huggingface_hub's [InferenceClient](https://huggingface.co/docs/huggingface_hub/main/en/guides/inference) for the execution of the LLM. It supports all [Inference Providers](https://huggingface.co/docs/inference-providers/index) available on the Hub: Cerebras, Cohere, Fal, Fireworks, HF-Inference, Hyperbolic, Nebius, Novita, Replicate, SambaNova, Together, and more.
 
 ```python
-from smolagents import HfApiModel
+from smolagents import InferenceClientModel
 
 messages = [
   {"role": "user", "content": [{"type": "text", "text": "Hello, how are you?"}]}
 ]
 
-model = HfApiModel()
+model = InferenceClientModel(provider="novita")
 print(model(messages))
 ```
 ```text
 >>> Of course! If you change your mind, feel free to reach out. Take care!
 ```
-[[autodoc]] HfApiModel
+[[autodoc]] InferenceClientModel
 
 ### LiteLLMModel
 
@@ -103,12 +95,46 @@ messages = [
   {"role": "user", "content": [{"type": "text", "text": "Hello, how are you?"}]}
 ]
 
-model = LiteLLMModel("anthropic/claude-3-5-sonnet-latest", temperature=0.2, max_tokens=10)
+model = LiteLLMModel(model_id="anthropic/claude-3-5-sonnet-latest", temperature=0.2, max_tokens=10)
 print(model(messages))
 ```
 
 [[autodoc]] LiteLLMModel
 
+### LiteLLMRouterModel
+
+The `LiteLLMRouterModel` is a wrapper around the [LiteLLM Router](https://docs.litellm.ai/docs/routing) that leverages
+advanced routing strategies: load-balancing across multiple deployments, prioritizing critical requests via queueing,
+and implementing basic reliability measures such as cooldowns, fallbacks, and exponential backoff retries.
+
+```python
+from smolagents import LiteLLMRouterModel
+
+messages = [
+  {"role": "user", "content": [{"type": "text", "text": "Hello, how are you?"}]}
+]
+
+model = LiteLLMRouterModel(
+    model_id="llama-3.3-70b",
+    model_list=[
+        {
+            "model_name": "llama-3.3-70b",
+            "litellm_params": {"model": "groq/llama-3.3-70b", "api_key": os.getenv("GROQ_API_KEY")},
+        },
+        {
+            "model_name": "llama-3.3-70b",
+            "litellm_params": {"model": "cerebras/llama-3.3-70b", "api_key": os.getenv("CEREBRAS_API_KEY")},
+        },
+    ],
+    client_kwargs={
+        "routing_strategy": "simple-shuffle",
+    },
+)
+print(model(messages))
+```
+
+[[autodoc]] LiteLLMRouterModel
+
 ### OpenAIServerModel
 
 This class lets you call any OpenAIServer compatible model.
@@ -149,6 +175,24 @@ model = AzureOpenAIServerModel(
 
 [[autodoc]] AzureOpenAIServerModel
 
+### AmazonBedrockServerModel
+
+`AmazonBedrockServerModel` helps you connect to Amazon Bedrock and run your agent with any available models.
+
+Below is an example setup. This class also offers additional options for customization.
+
+```py
+import os
+
+from smolagents import AmazonBedrockServerModel
+
+model = AmazonBedrockServerModel(
+    model_id = os.environ.get("AMAZON_BEDROCK_MODEL_ID"),
+)
+```
+
+[[autodoc]] AmazonBedrockServerModel
+
 ### MLXModel
 
 
@@ -167,3 +211,20 @@ print(model([{"role": "user", "content": "Ok!"}], stop_sequences=["great"]))
 > You must have `mlx-lm` installed on your machine. Please run `pip install smolagents[mlx-lm]` if it's not the case.
 
 [[autodoc]] MLXModel
+
+### VLLMModel
+
+Model to use [vLLM](https://docs.vllm.ai/) for fast LLM inference and serving.
+
+```python
+from smolagents import VLLMModel
+
+model = VLLMModel(model_id="HuggingFaceTB/SmolLM-135M-Instruct")
+
+print(model([{"role": "user", "content": "Ok!"}], stop_sequences=["great"]))
+```
+
+> [!TIP]
+> You must have `vllm` installed on your machine. Please run `pip install smolagents[vllm]` if it's not the case.
+
+[[autodoc]] VLLMModel
diff --git a/docs/source/en/reference/tools.mdx b/docs/source/en/reference/tools.mdx
index 68c70b897..a5d217bb8 100644
--- a/docs/source/en/reference/tools.mdx
+++ b/docs/source/en/reference/tools.mdx
@@ -1,18 +1,3 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
 # Tools
 
 <Tip warning={true}>
@@ -77,6 +62,10 @@ contains the API docs for the underlying classes.
 
 [[autodoc]] ToolCollection
 
+## MCP Client
+
+[[autodoc]] smolagents.mcp_client.MCPClient
+
 ## Agent Types
 
 Agents can handle any type of object in-between tools; tools, being completely multimodal, can accept and return
diff --git a/docs/source/en/tutorials/building_good_agents.mdx b/docs/source/en/tutorials/building_good_agents.mdx
index 8c17de1af..53bda8f92 100644
--- a/docs/source/en/tutorials/building_good_agents.mdx
+++ b/docs/source/en/tutorials/building_good_agents.mdx
@@ -1,18 +1,3 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
 # Building good agents
 
 [[open-in-colab]]
@@ -43,7 +28,7 @@ This leads to a few takeaways:
 
 ### Improve the information flow to the LLM engine
 
-Remember that your LLM engine is like an *intelligent* robot, tapped into a room with the only communication with the outside world being notes passed under a door.
+Remember that your LLM engine is like an *intelligent* robot, trapped into a room with the only communication with the outside world being notes passed under a door.
 
 It won't know of anything that happened if you don't explicitly put that into its prompt.
 
@@ -120,11 +105,11 @@ In general, to ease the load on your LLM, the good question to ask yourself is:
 To pass some additional objects to your agent beyond the simple string describing the task, you can use the `additional_args` argument to pass any type of object:
 
 ```py
-from smolagents import CodeAgent, HfApiModel
+from smolagents import CodeAgent, InferenceClientModel
 
 model_id = "meta-llama/Llama-3.3-70B-Instruct"
 
-agent = CodeAgent(tools=[], model=HfApiModel(model_id=model_id), add_base_tools=True)
+agent = CodeAgent(tools=[], model=InferenceClientModel(model_id=model_id), add_base_tools=True)
 
 agent.run(
     "Why does Mike not know many people in New York?",
@@ -210,13 +195,153 @@ In the end you have to return a final answer using the `final_answer` tool.
 
 Here are a few examples using notional tools:
 ---
-{examples}
+Task: "Generate an image of the oldest person in this document."
 
-Above example were using notional tools that might not exist for you. On top of performing computations in the Python code snippets that you create, you only have access to these tools:
+Thought: I will proceed step by step and use the following tools: `document_qa` to find the oldest person in the document, then `image_generator` to generate an image according to the answer.
+Code:
+```py
+answer = document_qa(document=document, question="Who is the oldest person mentioned?")
+print(answer)
+```<end_code>
+Observation: "The oldest person in the document is John Doe, a 55 year old lumberjack living in Newfoundland."
+
+Thought: I will now generate an image showcasing the oldest person.
+Code:
+```py
+image = image_generator("A portrait of John Doe, a 55-year-old man living in Canada.")
+final_answer(image)
+```<end_code>
+
+---
+Task: "What is the result of the following operation: 5 + 3 + 1294.678?"
+
+Thought: I will use python code to compute the result of the operation and then return the final answer using the `final_answer` tool
+Code:
+```py
+result = 5 + 3 + 1294.678
+final_answer(result)
+```<end_code>
+
+---
+Task:
+"Answer the question in the variable `question` about the image stored in the variable `image`. The question is in French.
+You have been provided with these additional arguments, that you can access using the keys as variables in your python code:
+{'question': 'Quel est l'animal sur l'image?', 'image': 'path/to/image.jpg'}"
+
+Thought: I will use the following tools: `translator` to translate the question into English and then `image_qa` to answer the question on the input image.
+Code:
+```py
+translated_question = translator(question=question, src_lang="French", tgt_lang="English")
+print(f"The translated question is {translated_question}.")
+answer = image_qa(image=image, question=translated_question)
+final_answer(f"The answer is {answer}")
+```<end_code>
+
+---
+Task:
+In a 1979 interview, Stanislaus Ulam discusses with Martin Sherwin about other great physicists of his time, including Oppenheimer.
+What does he say was the consequence of Einstein learning too much math on his creativity, in one word?
 
-{{tool_descriptions}}
+Thought: I need to find and read the 1979 interview of Stanislaus Ulam with Martin Sherwin.
+Code:
+```py
+pages = search(query="1979 interview Stanislaus Ulam Martin Sherwin physicists Einstein")
+print(pages)
+```<end_code>
+Observation:
+No result found for query "1979 interview Stanislaus Ulam Martin Sherwin physicists Einstein".
+
+Thought: The query was maybe too restrictive and did not find any results. Let's try again with a broader query.
+Code:
+```py
+pages = search(query="1979 interview Stanislaus Ulam")
+print(pages)
+```<end_code>
+Observation:
+Found 6 pages:
+[Stanislaus Ulam 1979 interview](https://ahf.nuclearmuseum.org/voices/oral-histories/stanislaus-ulams-interview-1979/)
+
+[Ulam discusses Manhattan Project](https://ahf.nuclearmuseum.org/manhattan-project/ulam-manhattan-project/)
+
+(truncated)
+
+Thought: I will read the first 2 pages to know more.
+Code:
+```py
+for url in ["https://ahf.nuclearmuseum.org/voices/oral-histories/stanislaus-ulams-interview-1979/", "https://ahf.nuclearmuseum.org/manhattan-project/ulam-manhattan-project/"]:
+    whole_page = visit_webpage(url)
+    print(whole_page)
+    print("\n" + "="*80 + "\n")  # Print separator between pages
+```<end_code>
+Observation:
+Manhattan Project Locations:
+Los Alamos, NM
+Stanislaus Ulam was a Polish-American mathematician. He worked on the Manhattan Project at Los Alamos and later helped design the hydrogen bomb. In this interview, he discusses his work at
+(truncated)
+
+Thought: I now have the final answer: from the webpages visited, Stanislaus Ulam says of Einstein: "He learned too much mathematics and sort of diminished, it seems to me personally, it seems to me his purely physics creativity." Let's answer in one word.
+Code:
+```py
+final_answer("diminished")
+```<end_code>
 
-{{managed_agents_descriptions}}
+---
+Task: "Which city has the highest population: Guangzhou or Shanghai?"
+
+Thought: I need to get the populations for both cities and compare them: I will use the tool `search` to get the population of both cities.
+Code:
+```py
+for city in ["Guangzhou", "Shanghai"]:
+    print(f"Population {city}:", search(f"{city} population")
+```<end_code>
+Observation:
+Population Guangzhou: ['Guangzhou has a population of 15 million inhabitants as of 2021.']
+Population Shanghai: '26 million (2019)'
+
+Thought: Now I know that Shanghai has the highest population.
+Code:
+```py
+final_answer("Shanghai")
+```<end_code>
+
+---
+Task: "What is the current age of the pope, raised to the power 0.36?"
+
+Thought: I will use the tool `wiki` to get the age of the pope, and confirm that with a web search.
+Code:
+```py
+pope_age_wiki = wiki(query="current pope age")
+print("Pope age as per wikipedia:", pope_age_wiki)
+pope_age_search = web_search(query="current pope age")
+print("Pope age as per google search:", pope_age_search)
+```<end_code>
+Observation:
+Pope age: "The pope Francis is currently 88 years old."
+
+Thought: I know that the pope is 88 years old. Let's compute the result using python code.
+Code:
+```py
+pope_current_age = 88 ** 0.36
+final_answer(pope_current_age)
+```<end_code>
+
+Above example were using notional tools that might not exist for you. On top of performing computations in the Python code snippets that you create, you only have access to these tools:
+{%- for tool in tools.values() %}
+- {{ tool.name }}: {{ tool.description }}
+    Takes inputs: {{tool.inputs}}
+    Returns an output of type: {{tool.output_type}}
+{%- endfor %}
+
+{%- if managed_agents and managed_agents.values() | list %}
+You can also give tasks to team members.
+Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task', a long string explaining your task.
+Given that this team member is a real human, you should be very verbose in your task.
+Here is a list of the team members that you can call:
+{%- for agent in managed_agents.values() %}
+- {{ agent.name }}: {{ agent.description }}
+{%- endfor %}
+{%- else %}
+{%- endif %}
 
 Here are the rules you should always follow to solve your task:
 1. Always provide a 'Thought:' sequence, and a 'Code:\n```py' sequence ending with '```<end_code>' sequence, else you will fail.
@@ -225,7 +350,7 @@ Here are the rules you should always follow to solve your task:
 4. Take care to not chain too many sequential tool calls in the same code block, especially when the output format is unpredictable. For instance, a call to search has an unpredictable return format, so do not have another tool call that depends on its output in the same block: rather output results with print() to use them in the next block.
 5. Call a tool only when needed, and never re-do a tool call that you previously did with the exact same parameters.
 6. Don't name any new variable with the same name as a tool: for instance don't name a variable 'final_answer'.
-7. Never create any notional variables in our code, as having these in your logs might derail you from the true variables.
+7. Never create any notional variables in our code, as having these in your logs will derail you from the true variables.
 8. You can use imports in your code, but only from the following list of modules: {{authorized_imports}}
 9. The state persists between code executions: so if in one step you've created variables or imported modules, these will all persist.
 10. Don't give up! You're in charge of solving the task, not providing directions to solve it.
@@ -233,12 +358,30 @@ Here are the rules you should always follow to solve your task:
 Now Begin! If you solve the task correctly, you will receive a reward of $1,000,000.
 ```
 
-As you can see, there are placeholders like `"{{tool_descriptions}}"`: these will be used upon agent initialization to insert certain automatically generated descriptions of tools or managed agents.
-
-So while you can overwrite this system prompt template by passing your custom prompt as an argument to the `system_prompt` parameter, your new system prompt must contain the following placeholders:
-- `"{{tool_descriptions}}"` to insert tool descriptions.
-- `"{{managed_agents_description}}"` to insert the description for managed agents if there are any.
-- For `CodeAgent` only: `"{{authorized_imports}}"` to insert the list of authorized imports.
+As you can see, there are placeholders like `"{{ tool.description }}"`: these will be used upon agent initialization to insert certain automatically generated descriptions of tools or managed agents.
+
+So while you can overwrite this system prompt template by passing your custom prompt as an argument to the `system_prompt` parameter, your new system prompt can contain the following placeholders:
+- To insert tool descriptions:
+  ```
+  {%- for tool in tools.values() %}
+  - {{ tool.name }}: {{ tool.description }}
+      Takes inputs: {{tool.inputs}}
+      Returns an output of type: {{tool.output_type}}
+  {%- endfor %}
+  ```
+- To insert the descriptions for managed agents if there are any:
+  ```
+  {%- if managed_agents and managed_agents.values() | list %}
+  You can also give tasks to team members.
+  Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task', a long string explaining your task.
+  Given that this team member is a real human, you should be very verbose in your task.
+  Here is a list of the team members that you can call:
+  {%- for agent in managed_agents.values() %}
+  - {{ agent.name }}: {{ agent.description }}
+  {%- endfor %}
+  {%- endif %}
+  ```
+- For `CodeAgent` only, to insert the list of authorized imports: `"{{authorized_imports}}"`
 
 Then you can change the system prompt as follows:
 
@@ -254,7 +397,7 @@ This also works with the [`ToolCallingAgent`].
 We provide a model for a supplementary planning step, that an agent can run regularly in-between normal action steps. In this step, there is no tool call, the LLM is simply asked to update a list of facts it knows and to reflect on what steps it should take next based on those facts.
 
 ```py
-from smolagents import load_tool, CodeAgent, HfApiModel, DuckDuckGoSearchTool
+from smolagents import load_tool, CodeAgent, InferenceClientModel, DuckDuckGoSearchTool
 from dotenv import load_dotenv
 
 load_dotenv()
@@ -266,7 +409,7 @@ search_tool = DuckDuckGoSearchTool()
 
 agent = CodeAgent(
     tools=[search_tool, image_generation_tool],
-    model=HfApiModel("Qwen/Qwen2.5-72B-Instruct"),
+    model=InferenceClientModel(model_id="Qwen/Qwen2.5-72B-Instruct"),
     planning_interval=3 # This is where you activate planning!
 )
 
diff --git a/docs/source/en/tutorials/inspect_runs.mdx b/docs/source/en/tutorials/inspect_runs.mdx
index 4ade8427b..333db728b 100644
--- a/docs/source/en/tutorials/inspect_runs.mdx
+++ b/docs/source/en/tutorials/inspect_runs.mdx
@@ -1,18 +1,3 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
 # Inspecting runs with OpenTelemetry
 
 [[open-in-colab]]
@@ -71,10 +56,10 @@ from smolagents import (
     ToolCallingAgent,
     DuckDuckGoSearchTool,
     VisitWebpageTool,
-    HfApiModel,
+    InferenceClientModel,
 )
 
-model = HfApiModel()
+model = InferenceClientModel()
 
 search_agent = ToolCallingAgent(
     tools=[DuckDuckGoSearchTool(), VisitWebpageTool()],
@@ -160,10 +145,10 @@ from smolagents import (
     ToolCallingAgent,
     DuckDuckGoSearchTool,
     VisitWebpageTool,
-    HfApiModel,
+    InferenceClientModel,
 )
 
-model = HfApiModel(
+model = InferenceClientModel(
     model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
 )
 
diff --git a/docs/source/en/tutorials/memory.mdx b/docs/source/en/tutorials/memory.mdx
index 0732d9596..df982da82 100644
--- a/docs/source/en/tutorials/memory.mdx
+++ b/docs/source/en/tutorials/memory.mdx
@@ -1,18 +1,3 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
 # 📚 Manage your agent's memory
 
 [[open-in-colab]]
@@ -30,9 +15,9 @@ You can also use `agent.replay()`, as follows:
 
 After the agent has run:
 ```py
-from smolagents import HfApiModel, CodeAgent
+from smolagents import InferenceClientModel, CodeAgent
 
-agent = CodeAgent(tools=[], model=HfApiModel(), verbosity_level=0)
+agent = CodeAgent(tools=[], model=InferenceClientModel(), verbosity_level=0)
 
 result = agent.run("What's the 20th Fibonacci number?")
 ```
@@ -73,7 +58,7 @@ You can also use step callbacks to dynamically change the agent's memory.
 
 Step callbacks can access the `agent` itself in their arguments, so they can access any memory step as highlighted above, and change it if needed. For instance, let's say you are observing screenshots of each step performed by a web browser agent. You want to log the newest screenshot, and remove the images from ancient steps to save on token costs.
 
-You culd run something like the following.
+You could run something like the following.
 _Note: this code is incomplete, some imports and object definitions have been removed for the sake of concision, visit [the original script](https://github.com/huggingface/smolagents/blob/main/src/smolagents/vision_web_browser.py) to get the full working code._
 
 ```py
@@ -115,9 +100,10 @@ This can be useful in case you have tool calls that take days: you can just run
 This will also let you update the memory on each step.
 
 ```py
-from smolagents import HfApiModel, CodeAgent, ActionStep, TaskStep
+from smolagents import InferenceClientModel, CodeAgent, ActionStep, TaskStep
 
-agent = CodeAgent(tools=[], model=HfApiModel(), verbosity_level=1)
+agent = CodeAgent(tools=[], model=InferenceClientModel(), verbosity_level=1)
+agent.python_executor.send_tools({**agent.tools})
 print(agent.memory.system_prompt)
 
 task = "What is the 20th Fibonacci number?"
@@ -145,4 +131,4 @@ while final_answer is None and step_number <= 10:
     # agent.memory.steps[-1] = ...
 
 print("The final answer is:", final_answer)
-```
\ No newline at end of file
+```
diff --git a/docs/source/en/tutorials/secure_code_execution.mdx b/docs/source/en/tutorials/secure_code_execution.mdx
index daa8ee900..8716f63c6 100644
--- a/docs/source/en/tutorials/secure_code_execution.mdx
+++ b/docs/source/en/tutorials/secure_code_execution.mdx
@@ -1,18 +1,3 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
 # Secure code execution
 
 [[open-in-colab]]
@@ -24,12 +9,12 @@ rendered properly in your Markdown viewer.
 
 [Multiple](https://huggingface.co/papers/2402.01030) [research](https://huggingface.co/papers/2411.01747) [papers](https://huggingface.co/papers/2401.00812) have shown that having the LLM write its actions (the tool calls) in code is much better than the current standard format for tool calling, which is across the industry different shades of "writing actions as a JSON of tools names and arguments to use".
 
-Why is code better? Well, because we crafted our code languages specifically to be great at expressing actions performed by a computer. If JSON snippets was a better way, this package would have been written in JSON snippets and the devil would be laughing at us.
+Why is code better? Well, because we crafted our code languages specifically to be great at expressing actions performed by a computer. If JSON snippets were a better way, this package would have been written in JSON snippets and the devil would be laughing at us.
 
 Code is just a better way to express actions on a computer. It has better:
 - **Composability:** could you nest JSON actions within each other, or define a set of JSON actions to re-use later, the same way you could just define a python function?
 - **Object management:** how do you store the output of an action like `generate_image` in JSON?
-- **Generality:** code is built to express simply anything you can do have a computer do.
+- **Generality:** code is built to express simply anything you can have a computer do.
 - **Representation in LLM training corpus:** why not leverage this benediction of the sky that plenty of quality actions have already been included in LLM training corpus?
 
 This is illustrated on the figure below, taken from [Executable Code Actions Elicit Better LLM Agents](https://huggingface.co/papers/2402.01030).
@@ -38,45 +23,392 @@ This is illustrated on the figure below, taken from [Executable Code Actions Eli
 
 This is why we put emphasis on proposing code agents, in this case python agents, which meant putting higher effort on building secure python interpreters.
 
-### Local python interpreter
+### Local code execution??
 
 By default, the `CodeAgent` runs LLM-generated code in your environment.
-This execution is not done by the vanilla Python interpreter: we've re-built a more secure `LocalPythonInterpreter` from the ground up.
-This interpreter is designed for security by:
- - Restricting the imports to a list explicitly passed by the user
- - Capping the number of operations to prevent infinite loops and resource bloating.
- - Will not perform any operation that's not pre-defined.
 
-We've used this on many use cases, without ever observing any damage to the environment. 
+This is inherently risky, LLM-generated code could be harmful to your environment.
+
+Malicious code execution can occur in several ways:
+- **Plain LLM error:** LLMs are still far from perfect and may unintentionally generate harmful commands while attempting to be helpful. While this risk is low, instances have been observed where an LLM attempted to execute potentially dangerous code.  
+- **Supply chain attack:** Running an untrusted or compromised LLM could expose a system to harmful code generation. While this risk is extremely low when using well-known models on secure inference infrastructure, it remains a theoretical possibility.  
+- **Prompt injection:** an agent browsing the web could arrive on a malicious website that contains harmful instructions, thus injecting an attack into the agent's memory
+- **Exploitation of publicly accessible agents:** Agents exposed to the public can be misused by malicious actors to execute harmful code. Attackers may craft adversarial inputs to exploit the agent's execution capabilities, leading to unintended consequences.
+Once malicious code is executed, whether accidentally or intentionally, it can damage the file system, exploit local or cloud-based resources, abuse API services, and even compromise network security.
+
+One could argue that on the [spectrum of agency](../conceptual_guides/intro_agents), code agents give much higher agency to the LLM on your system than other less agentic setups: this goes hand-in-hand with higher risk.
+
+So you need to be very mindful of security.
+
+To improve safety, we propose a range of measures that propose elevated levels of security, at a higher setup cost.
+
+We advise you to keep in mind that no solution will be 100% safe.
+
+<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/code_execution_safety_diagram.png">
+
+### Our local Python executor
+
+To add a first layer of security, code execution in `smolagents` is not performed by the vanilla Python interpreter.
+We have re-built a more secure `LocalPythonExecutor` from the ground up.
+
+To be precise, this interpreter works by loading the Abstract Syntax Tree (AST) from your Code and executes it operation by operation, making sure to always follow certain rules:
+- By default, imports are disallowed unless they have been explicitly added to an authorization list by the user.
+- Furthermore, access to submodules is disabled by default, and each must be explicitly authorized in the import list as well, or you can pass for instance `numpy.*` to allow both `numpy` and all its subpackags, like `numpy.random` or `numpy.a.b`.
+   - Note that some seemingly innocuous packages like `random` can give access to potentially harmful submodules, as in `random._os`.
+- The total count of elementary operations processed is capped to prevent infinite loops and resource bloating.
+- Any operation that has not been explicitly defined in our custom interpreter will raise an error.
+
+You could try these safeguards as follows:
+
+```py
+from smolagents.local_python_executor import LocalPythonExecutor
+
+# Set up custom executor, authorize package "numpy"
+custom_executor = LocalPythonExecutor(["numpy"])
+
+# Utilisty for pretty printing errors
+def run_capture_exception(command: str):
+    try:
+        custom_executor(harmful_command)
+    except Exception as e:
+        print("ERROR:\n", e)
+
+# Undefined command just do not work
+harmful_command="!echo Bad command"
+run_capture_exception(harmful_command)
+# >>> ERROR: invalid syntax (<unknown>, line 1)
+
+
+# Imports like os will not be performed unless explicitly added to `additional_authorized_imports`
+harmful_command="import os; exit_code = os.system("echo Bad command")"
+run_capture_exception(harmful_command)
+# >>> ERROR: Code execution failed at line 'import os' due to: InterpreterError: Import of os is not allowed. Authorized imports are: ['statistics', 'numpy', 'itertools', 'time', 'queue', 'collections', 'math', 'random', 're', 'datetime', 'stat', 'unicodedata']
+
+# Even in authorized imports, potentially harmful packages will not be imported
+harmful_command="import random; random._os.system('echo Bad command')"
+run_capture_exception(harmful_command)
+# >>> ERROR: Code execution failed at line 'random._os.system('echo Bad command')' due to: InterpreterError: Forbidden access to module: os
+
+# Infinite loop are interrupted after N operations
+harmful_command="""
+while True:
+    pass
+"""
+run_capture_exception(harmful_command)
+# >>> ERROR: Code execution failed at line 'while True: pass' due to: InterpreterError: Maximum number of 1000000 iterations in While loop exceeded
+```
+
+These safeguards make out interpreter is safer.
+We have used it on a diversity of use cases, without ever observing any damage to the environment.
+
+> [!WARNING]
+> It's important to understand that no local python sandbox can ever be completely secure. While our interpreter provides significant safety improvements over the standard Python interpreter, it is still possible for a determined attacker or a fine-tuned malicious LLM to find vulnerabilities and potentially harm your environment. 
+> 
+> For example, if you've allowed packages like `Pillow` to process images, the LLM could generate code that creates thousands of large image files to fill your hard drive. Other advanced escape techniques might exploit deeper vulnerabilities in authorized packages.
+> 
+> Running LLM-generated code in your local environment always carries some inherent risk. The only way to run LLM-generated code with truly robust security isolation is to use remote execution options like E2B or Docker, as detailed below.
+
+The risk of a malicious attack is low when using well-known LLMs from trusted inference providers, but it is not zero.
+For high-security applications or when using less trusted models, you should consider using a remote execution sandbox.
+
+## Sandbox approaches for secure code execution
+
+When working with AI agents that execute code, security is paramount. There are two main approaches to sandboxing code execution in smolagents, each with different security properties and capabilities:
+
+
+![Sandbox approaches comparison](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/remote_execution.png)
+
+1. **Running individual code snippets in a sandbox**: This approach (left side of diagram) only executes the agent-generated Python code snippets in a sandbox while keeping the rest of the agentic system in your local environment. It's simpler to set up using `executor_type="e2b"` or `executor_type="docker"`, but it doesn't support multi-agents and still requires passing state data between your environment and the sandbox.
+
+2. **Running the entire agentic system in a sandbox**: This approach (right side of diagram) runs the entire agentic system, including the agent, model, and tools, within a sandbox environment. This provides better isolation but requires more manual setup and may require passing sensitive credentials (like API keys) to the sandbox environment.
+
+This guide describes how to set up and use both types of sandbox approaches for your agent applications.
+
+### E2B setup
+
+#### Installation
+
+1. Create an E2B account at [e2b.dev](https://e2b.dev)
+2. Install the required packages:
+```bash
+pip install 'smolagents[e2b]'
+```
+
+#### Running your agent in E2B: quick start
+
+We provide a simple way to use an E2B Sandbox: simply add `executor_type="e2b"` to the agent initialization, as follows:
+
+```py
+from smolagents import InferenceClientModel, CodeAgent
+
+agent = CodeAgent(model=InferenceClientModel(), tools=[], executor_type="e2b")
+
+agent.run("Can you give me the 100th Fibonacci number?")
+```
+
+This solution send the agent state to the server at the start of each `agent.run()`.
+Then the models are called from the local environment, but the generated code will be sent to the sandbox for execution, and only the output will be returned.
+
+This is illustrated in the figure below.
+
+<p align="center">
+    <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/sandboxed_execution.png" alt="sandboxed code execution" width=60% max-width=500px>
+</p>
+
 
-However this solution is not watertight: one could imagine occasions where LLMs fine-tuned for malignant actions could still hurt your environment. For instance if you've allowed an innocuous package like `Pillow` to process images, the LLM could generate thousands of saves of images to bloat your hard drive.
-It's certainly not likely if you've chosen the LLM engine yourself, but it could happen.
+However, since any call to a [managed agent](../examples/multiagents) would require model calls, since we do not transfer secrets to the remote sandbox, the model call would lack credentials.
+Hence this solution does not work (yet) with more complicated multi-agent setups.
 
-So if you want to be extra cautious, you can use the remote code execution option described below.
+#### Running your agent in E2B: multi-agents
 
-### E2B code executor
+To use multi-agents in an E2B sandbox, you need to run your agents completely from within E2B.
 
-For maximum security, you can use our integration with E2B to run code in a sandboxed environment. This is a remote execution service that runs your code in an isolated container, making it impossible for the code to affect your local environment.
+Here is how to do it:
 
-For this, you will need to setup your E2B account and set your `E2B_API_KEY` in your environment variables. Head to [E2B's quickstart documentation](https://e2b.dev/docs/quickstart) for more information.
+```python
+from e2b_code_interpreter import Sandbox
+import os
 
-Then you can install it with `pip install "smolagents[e2b]"`.
+# Create the sandbox
+sandbox = Sandbox()
 
-Now you're set!
+# Install required packages
+sandbox.commands.run("pip install smolagents")
 
-To set the code executor to E2B, simply pass the flag `use_e2b_executor=True` when initializing your `CodeAgent`.
-Note that you should add all the tool's dependencies in `additional_authorized_imports`, so that the executor installs them.
+def run_code_raise_errors(sandbox, code: str, verbose: bool = False) -> str:
+    execution = sandbox.run_code(
+        code,
+        envs={'HF_TOKEN': os.getenv('HF_TOKEN')}
+    )
+    if execution.error:
+        execution_logs = "\n".join([str(log) for log in execution.logs.stdout])
+        logs = execution_logs
+        logs += execution.error.traceback
+        raise ValueError(logs)
+    return "\n".join([str(log) for log in execution.logs.stdout])
 
+# Define your agent application
+agent_code = """
+import os
+from smolagents import CodeAgent, InferenceClientModel
+
+# Initialize the agents
+agent = CodeAgent(
+    model=InferenceClientModel(token=os.getenv("HF_TOKEN"), provider="together"),
+    tools=[],
+    name="coder_agent",
+    description="This agent takes care of your difficult algorithmic problems using code."
+)
+
+manager_agent = CodeAgent(
+    model=InferenceClientModel(token=os.getenv("HF_TOKEN"), provider="together"),
+    tools=[],
+    managed_agents=[agent],
+)
+
+# Run the agent
+response = manager_agent.run("What's the 20th Fibonacci number?")
+print(response)
+"""
+
+# Run the agent code in the sandbox
+execution_logs = run_code_raise_errors(sandbox, agent_code)
+print(execution_logs)
+```
+
+### Docker setup
+
+#### Installation
+
+1. [Install Docker on your system](https://docs.docker.com/get-started/get-docker/)
+2. Install the required packages:
+```bash
+pip install 'smolagents[docker]'
+```
+
+#### Running your agent in E2B: quick start
+
+Similar to the E2B Sandbox above, to quickly get started with Docker, simply add `executor_type="docker"` to the agent initialization, like:
 ```py
-from smolagents import CodeAgent, VisitWebpageTool, HfApiModel
+from smolagents import InferenceClientModel, CodeAgent
+
+agent = CodeAgent(model=InferenceClientModel(), tools=[], executor_type="docker")
+
+agent.run("Can you give me the 100th Fibonacci number?")
+```
+
+#### Advanced docker usage
+
+If you want to run multi-agent systems in Docker, you'll need to setup a custom interpreter in a sandbox.
+
+Here is how to setup the a Dockerfile:
+
+```dockerfile
+FROM python:3.10-bullseye
+
+# Install build dependencies
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        build-essential \
+        python3-dev && \
+    pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir smolagents && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+# Set working directory
+WORKDIR /app
+
+# Run with limited privileges
+USER nobody
+
+# Default command
+CMD ["python", "-c", "print('Container ready')"]
+```
+
+Create a sandbox manager to run code:
+
+```python
+import docker
+import os
+from typing import Optional
+
+class DockerSandbox:
+    def __init__(self):
+        self.client = docker.from_env()
+        self.container = None
+
+    def create_container(self):
+        try:
+            image, build_logs = self.client.images.build(
+                path=".",
+                tag="agent-sandbox",
+                rm=True,
+                forcerm=True,
+                buildargs={},
+                # decode=True
+            )
+        except docker.errors.BuildError as e:
+            print("Build error logs:")
+            for log in e.build_log:
+                if 'stream' in log:
+                    print(log['stream'].strip())
+            raise
+
+        # Create container with security constraints and proper logging
+        self.container = self.client.containers.run(
+            "agent-sandbox",
+            command="tail -f /dev/null",  # Keep container running
+            detach=True,
+            tty=True,
+            mem_limit="512m",
+            cpu_quota=50000,
+            pids_limit=100,
+            security_opt=["no-new-privileges"],
+            cap_drop=["ALL"],
+            environment={
+                "HF_TOKEN": os.getenv("HF_TOKEN")
+            },
+        )
+
+    def run_code(self, code: str) -> Optional[str]:
+        if not self.container:
+            self.create_container()
+
+        # Execute code in container
+        exec_result = self.container.exec_run(
+            cmd=["python", "-c", code],
+            user="nobody"
+        )
+
+        # Collect all output
+        return exec_result.output.decode() if exec_result.output else None
+
+
+    def cleanup(self):
+        if self.container:
+            try:
+                self.container.stop()
+            except docker.errors.NotFound:
+                # Container already removed, this is expected
+                pass
+            except Exception as e:
+                print(f"Error during cleanup: {e}")
+            finally:
+                self.container = None  # Clear the reference
+
+# Example usage:
+sandbox = DockerSandbox()
+
+try:
+    # Define your agent code
+    agent_code = """
+import os
+from smolagents import CodeAgent, InferenceClientModel
+
+# Initialize the agent
 agent = CodeAgent(
-    tools = [VisitWebpageTool()],
-    model=HfApiModel(),
-    additional_authorized_imports=["requests", "markdownify"],
-    use_e2b_executor=True
+    model=InferenceClientModel(token=os.getenv("HF_TOKEN"), provider="together"),
+    tools=[]
 )
 
-agent.run("What was Abraham Lincoln's preferred pet?")
+# Run the agent
+response = agent.run("What's the 20th Fibonacci number?")
+print(response)
+"""
+
+    # Run the code in the sandbox
+    output = sandbox.run_code(agent_code)
+    print(output)
+
+finally:
+    sandbox.cleanup()
 ```
 
-E2B code execution is not compatible with multi-agents at the moment - because having an agent call in a code blob that should be executed remotely is a mess. But we're working on adding it!
+### Best practices for sandboxes
+
+These key practices apply to both E2B and Docker sandboxes:
+
+- Resource management
+  - Set memory and CPU limits
+  - Implement execution timeouts
+  - Monitor resource usage
+- Security
+  - Run with minimal privileges
+  - Disable unnecessary network access
+  - Use environment variables for secrets
+- Environment
+  - Keep dependencies minimal
+  - Use fixed package versions
+  - If you use base images, update them regularly
+
+- Cleanup
+  - Always ensure proper cleanup of resources, especially for Docker containers, to avoid having dangling containers eating up resources.
+
+✨ By following these practices and implementing proper cleanup procedures, you can ensure your agent runs safely and efficiently in a sandboxed environment.
+
+## Comparing security approaches
+
+As illustrated in the diagram earlier, both sandboxing approaches have different security implications:
+
+### Approach 1: Running just the code snippets in a sandbox
+- **Pros**: 
+  - Easier to set up with a simple parameter (`executor_type="e2b"` or `executor_type="docker"`)
+  - No need to transfer API keys to the sandbox
+  - Better protection for your local environment
+- **Cons**:
+  - Doesn't support multi-agents (managed agents)
+  - Still requires transferring state between your environment and the sandbox
+  - Limited to specific code execution
+
+### Approach 2: Running the entire agentic system in a sandbox
+- **Pros**:
+  - Supports multi-agents
+  - Complete isolation of the entire agent system
+  - More flexible for complex agent architectures
+- **Cons**:
+  - Requires more manual setup
+  - May require transferring sensitive API keys to the sandbox
+  - Potentially higher latency due to more complex operations
+
+Choose the approach that best balances your security needs with your application's requirements. For most applications with simpler agent architectures, Approach 1 provides a good balance of security and ease of use. For more complex multi-agent systems where you need full isolation, Approach 2, while more involved to set up, offers better security guarantees.
\ No newline at end of file
diff --git a/docs/source/en/tutorials/tools.mdx b/docs/source/en/tutorials/tools.mdx
index d9da1e94f..a6b24d280 100644
--- a/docs/source/en/tutorials/tools.mdx
+++ b/docs/source/en/tutorials/tools.mdx
@@ -1,18 +1,3 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
 # Tools
 
 [[open-in-colab]]
@@ -82,7 +67,7 @@ In this case, you can build your tool by subclassing [`Tool`] as described above
 
 ### Share your tool to the Hub
 
-You can share your custom tool to the Hub by calling [`~Tool.push_to_hub`] on the tool. Make sure you've created a repository for it on the Hub and are using a token with read access.
+You can share your custom tool to the Hub as a Space repository by calling [`~Tool.push_to_hub`] on the tool. Make sure you've created a repository for it on the Hub and are using a token with read access.
 
 ```python
 model_downloads_tool.push_to_hub("{your_username}/hf-model-downloads", token="<YOUR_HUGGINGFACEHUB_API_TOKEN>")
@@ -112,7 +97,7 @@ model_download_tool = load_tool(
 
 ### Import a Space as a tool
 
-You can directly import a Space from the Hub as a tool using the [`Tool.from_space`] method!
+You can directly import a Gradio Space from the Hub as a tool using the [`Tool.from_space`] method!
 
 You only need to provide the id of the Space on the Hub, its name, and a description that will help you agent understand what the tool does. Under the hood, this will use [`gradio-client`](https://pypi.org/project/gradio-client/) library to call the Space.
 
@@ -131,12 +116,12 @@ And voilà, here's your image! 🏖️
 
 <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/sunny_beach.webp">
 
-Then you can use this tool just like any other tool.  For example, let's improve the prompt  `a rabbit wearing a space suit` and generate an image of it. This example also shows how you can pass additional arguments to the agent.
+Then you can use this tool just like any other tool.  For example, let's improve the prompt `a rabbit wearing a space suit` and generate an image of it. This example also shows how you can pass additional arguments to the agent.
 
 ```python
-from smolagents import CodeAgent, HfApiModel
+from smolagents import CodeAgent, InferenceClientModel
 
-model = HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct")
+model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct")
 agent = CodeAgent(tools=[image_generation_tool], model=model)
 
 agent.run(
@@ -182,9 +167,9 @@ You can manage an agent's toolbox by adding or replacing a tool in attribute `ag
 Let's add the `model_download_tool` to an existing agent initialized with only the default toolbox.
 
 ```python
-from smolagents import HfApiModel
+from smolagents import InferenceClientModel
 
-model = HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct")
+model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct")
 
 agent = CodeAgent(tools=[], model=model, add_base_tools=True)
 agent.tools[model_download_tool.name] = model_download_tool
@@ -204,7 +189,7 @@ agent.run(
 
 ### Use a collection of tools
 
-You can leverage tool collections by using the `ToolCollection` object. It supports loading either a collection from the Hub or an MCP server tools.
+You can leverage tool collections by using [`ToolCollection`]. It supports loading either a collection from the Hub or an MCP server tools.
 
 #### Tool Collection from a collection in the Hub
 
@@ -229,19 +214,119 @@ To speed up the start, tools are loaded only if called by the agent.
 
 Leverage tools from the hundreds of MCP servers available on [glama.ai](https://glama.ai/mcp/servers) or [smithery.ai](https://smithery.ai/).
 
-The MCP servers tools can be loaded in a `ToolCollection` object as follow:
+> [!WARNING]
+> **Security Warning:** Using MCP servers comes with security risks:
+> - **Trust is essential:** Only use MCP servers from trusted sources. Malicious servers can execute harmful code on your machine.
+> - **Stdio-based MCP servers** will always execute code on your machine (that's their intended functionality).
+> - **SSE-based MCP servers** while the remote MCP servers will not be able to execute code on your machine, still proceed with caution.
+>
+> Always verify the source and integrity of any MCP server before connecting to it, especially for production environments.
+
+The MCP servers tools can be loaded with [`ToolCollection.from_mcp`].
 
+For stdio-based MCP servers, pass the server parameters as an instance of `mcp.StdioServerParameters`:
 ```py
 from smolagents import ToolCollection, CodeAgent
 from mcp import StdioServerParameters
 
 server_parameters = StdioServerParameters(
-    command="uv",
+    command="uvx",
     args=["--quiet", "pubmedmcp@0.1.3"],
     env={"UV_PYTHON": "3.12", **os.environ},
 )
 
-with ToolCollection.from_mcp(server_parameters) as tool_collection:
+with ToolCollection.from_mcp(server_parameters, trust_remote_code=True) as tool_collection:
+    agent = CodeAgent(tools=[*tool_collection.tools], model=model, add_base_tools=True)
+    agent.run("Please find a remedy for hangover.")
+```
+
+For SSE-based MCP servers, simply pass a dict with parameters to `mcp.client.sse.sse_client`:
+```py
+from smolagents import ToolCollection, CodeAgent
+
+with ToolCollection.from_mcp({"url": "http://127.0.0.1:8000/sse"}, trust_remote_code=True) as tool_collection:
     agent = CodeAgent(tools=[*tool_collection.tools], add_base_tools=True)
     agent.run("Please find a remedy for hangover.")
-```
\ No newline at end of file
+```
+
+### Use MCP tools with MCPClient directly
+
+You can also work with MCP tools by using the `MCPClient` directly, which gives you more control over the connection and tool management:
+
+For stdio-based MCP servers:
+```python
+from smolagents import MCPClient, CodeAgent
+from mcp import StdioServerParameters
+import os
+
+server_parameters = StdioServerParameters(
+    command="uvx",  # Using uvx ensures dependencies are available
+    args=["--quiet", "pubmedmcp@0.1.3"],
+    env={"UV_PYTHON": "3.12", **os.environ},
+)
+
+with MCPClient(server_parameters) as tools:
+    agent = CodeAgent(tools=tools, model=model, add_base_tools=True)
+    agent.run("Please find the latest research on COVID-19 treatment.")
+```
+
+For SSE-based MCP servers:
+```python
+from smolagents import MCPClient, CodeAgent
+
+with MCPClient({"url": "http://127.0.0.1:8000/sse"}) as tools:
+    agent = CodeAgent(tools=tools, model=model, add_base_tools=True)
+    agent.run("Please find a remedy for hangover.")
+```
+
+You can also manually manage the connection lifecycle with the try...finally pattern:
+
+```python
+from smolagents import MCPClient, CodeAgent
+from mcp import StdioServerParameters
+import os
+
+# Initialize server parameters
+server_parameters = StdioServerParameters(
+    command="uvx",
+    args=["--quiet", "pubmedmcp@0.1.3"],
+    env={"UV_PYTHON": "3.12", **os.environ},
+)
+
+# Manually manage the connection
+try:
+    mcp_client = MCPClient(server_parameters)
+    tools = mcp_client.get_tools()
+
+    # Use the tools with your agent
+    agent = CodeAgent(tools=tools, model=model, add_base_tools=True)
+    result = agent.run("What are the recent therapeutic approaches for Alzheimer's disease?")
+
+    # Process the result as needed
+    print(f"Agent response: {result}")
+finally:
+    # Always ensure the connection is properly closed
+    mcp_client.disconnect()
+```
+
+You can also connect to multiple MCP servers at once by passing a list of server parameters:
+```python
+from smolagents import MCPClient, CodeAgent
+from mcp import StdioServerParameters
+import os
+
+server_params1 = StdioServerParameters(
+    command="uvx",
+    args=["--quiet", "pubmedmcp@0.1.3"],
+    env={"UV_PYTHON": "3.12", **os.environ},
+)
+
+server_params2 = {"url": "http://127.0.0.1:8000/sse"}
+
+with MCPClient([server_params1, server_params2]) as tools:
+    agent = CodeAgent(tools=tools, model=model, add_base_tools=True)
+    agent.run("Please analyze the latest research and suggest remedies for headaches.")
+```
+
+> [!WARNING]
+> **Security Warning:** The same security warnings mentioned for `ToolCollection.from_mcp` apply when using `MCPClient` directly.
diff --git a/docs/source/hi/conceptual_guides/intro_agents.mdx b/docs/source/hi/conceptual_guides/intro_agents.mdx
index 15b93798e..071df435d 100644
--- a/docs/source/hi/conceptual_guides/intro_agents.mdx
+++ b/docs/source/hi/conceptual_guides/intro_agents.mdx
@@ -1,18 +1,3 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
 # Agents का परिचय
 
 ## 🤔 Agents क्या हैं?
diff --git a/docs/source/hi/conceptual_guides/react.mdx b/docs/source/hi/conceptual_guides/react.mdx
index 0f17901e8..8c0ce0f27 100644
--- a/docs/source/hi/conceptual_guides/react.mdx
+++ b/docs/source/hi/conceptual_guides/react.mdx
@@ -1,18 +1,3 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
 # मल्टी-स्टेप एजेंट्स कैसे काम करते हैं?
 
 ReAct फ्रेमवर्क ([Yao et al., 2022](https://huggingface.co/papers/2210.03629)) वर्तमान में एजेंट्स बनाने का मुख्य दृष्टिकोण है।
diff --git a/docs/source/hi/examples/multiagents.mdx b/docs/source/hi/examples/multiagents.mdx
index 1e9fcc745..7ee85f92d 100644
--- a/docs/source/hi/examples/multiagents.mdx
+++ b/docs/source/hi/examples/multiagents.mdx
@@ -1,18 +1,3 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
 # मल्टी-एजेंट सिस्टम का आयोजन करें 🤖🤝🤖
 
 [[open-in-colab]]
@@ -54,7 +39,7 @@ from huggingface_hub import login
 login()
 ```
 
-⚡️ हमारा एजेंट [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct) द्वारा संचालित होगा जो `HfApiModel` क्लास का उपयोग करता है जो HF के Inference API का उपयोग करता है: Inference API किसी भी OS मॉडल को जल्दी और आसानी से चलाने की अनुमति देता है।
+⚡️ हमारा एजेंट [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct) द्वारा संचालित होगा जो `InferenceClientModel` क्लास का उपयोग करता है जो HF के Inference API का उपयोग करता है: Inference API किसी भी OS मॉडल को जल्दी और आसानी से चलाने की अनुमति देता है।
 
 _नोट:_ The Inference API विभिन्न मानदंडों के आधार पर मॉडल होस्ट करता है, और डिप्लॉय किए गए मॉडल बिना पूर्व सूचना के अपडेट या बदले जा सकते हैं। इसके बारे में अधिक जानें [यहां](https://huggingface.co/docs/api-inference/supported-models)।
 
@@ -126,13 +111,13 @@ print(visit_webpage("https://en.wikipedia.org/wiki/Hugging_Face")[:500])
 from smolagents import (
     CodeAgent,
     ToolCallingAgent,
-    HfApiModel,
+    InferenceClientModel,
     ManagedAgent,
     DuckDuckGoSearchTool,
     LiteLLMModel,
 )
 
-model = HfApiModel(model_id)
+model = InferenceClientModel(model_id=model_id)
 
 web_agent = ToolCallingAgent(
     tools=[DuckDuckGoSearchTool(), visit_webpage],
diff --git a/docs/source/hi/examples/rag.mdx b/docs/source/hi/examples/rag.mdx
index 9e7a0e595..478080d8b 100644
--- a/docs/source/hi/examples/rag.mdx
+++ b/docs/source/hi/examples/rag.mdx
@@ -1,18 +1,3 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
 # एजेंटिक RAG
 
 [[open-in-colab]]
@@ -135,10 +120,10 @@ retriever_tool = RetrieverTool(docs_processed)
 _नोट:_ Inference API विभिन्न मानदंडों के आधार पर मॉडल होस्ट करता है, और डिप्लॉय किए गए मॉडल बिना पूर्व सूचना के अपडेट या बदले जा सकते हैं। इसके बारे में अधिक जानें [यहां](https://huggingface.co/docs/api-inference/supported-models) पढ़ें।
 
 ```py
-from smolagents import HfApiModel, CodeAgent
+from smolagents import InferenceClientModel, CodeAgent
 
 agent = CodeAgent(
-    tools=[retriever_tool], model=HfApiModel("meta-llama/Llama-3.3-70B-Instruct"), max_steps=4, verbosity_level=2
+    tools=[retriever_tool], model=InferenceClientModel(model_id="meta-llama/Llama-3.3-70B-Instruct"), max_steps=4, verbosity_level=2
 )
 ```
 
diff --git a/docs/source/hi/examples/text_to_sql.mdx b/docs/source/hi/examples/text_to_sql.mdx
index 213821ac8..69fc9820c 100644
--- a/docs/source/hi/examples/text_to_sql.mdx
+++ b/docs/source/hi/examples/text_to_sql.mdx
@@ -1,19 +1,4 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
-# Text-to-SQL 
+# Text-to-SQL
 
 [[open-in-colab]]
 
@@ -125,14 +110,14 @@ def sql_engine(query: str) -> str:
 
 हम `CodeAgent` का उपयोग करते हैं, जो smolagents का मुख्य एजेंट क्लास है: एक एजेंट जो कोड में एक्शन लिखता है और ReAct फ्रेमवर्क के अनुसार पिछले आउटपुट पर पुनरावृत्ति कर सकता है।
 
-मॉडल वह LLM है जो एजेंट सिस्टम को संचालित करता है। `HfApiModel` आपको HF के Inference API का उपयोग करके LLM को कॉल करने की अनुमति देता है, या तो सर्वरलेस या डेडिकेटेड एंडपॉइंट के माध्यम से, लेकिन आप किसी भी प्रोप्राइटरी API का भी उपयोग कर सकते हैं।
+मॉडल वह LLM है जो एजेंट सिस्टम को संचालित करता है। `InferenceClientModel` आपको HF के Inference API का उपयोग करके LLM को कॉल करने की अनुमति देता है, या तो सर्वरलेस या डेडिकेटेड एंडपॉइंट के माध्यम से, लेकिन आप किसी भी प्रोप्राइटरी API का भी उपयोग कर सकते हैं।
 
 ```py
-from smolagents import CodeAgent, HfApiModel
+from smolagents import CodeAgent, InferenceClientModel
 
 agent = CodeAgent(
     tools=[sql_engine],
-    model=HfApiModel("meta-llama/Meta-Llama-3.1-8B-Instruct"),
+    model=InferenceClientModel(model_id="meta-llama/Meta-Llama-3.1-8B-Instruct"),
 )
 agent.run("Can you give me the name of the client who got the most expensive receipt?")
 ```
@@ -188,7 +173,7 @@ sql_engine.description = updated_description
 
 agent = CodeAgent(
     tools=[sql_engine],
-    model=HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct"),
+    model=InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
 )
 
 agent.run("Which waiter got more total money from tips?")
diff --git a/docs/source/hi/guided_tour.mdx b/docs/source/hi/guided_tour.mdx
index 745b6643a..1c7f5742e 100644
--- a/docs/source/hi/guided_tour.mdx
+++ b/docs/source/hi/guided_tour.mdx
@@ -1,18 +1,3 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
 # Agents - गाइडेड टूर
 
 [[open-in-colab]]
@@ -25,7 +10,7 @@ rendered properly in your Markdown viewer.
 
 - `model`, आपके एजेंट को पावर देने के लिए एक टेक्स्ट-जनरेशन मॉडल - क्योंकि एजेंट एक सिंपल LLM से अलग है, यह एक सिस्टम है जो LLM को अपने इंजन के रूप में उपयोग करता है। आप इनमें से कोई भी विकल्प उपयोग कर सकते हैं:
     - [`TransformersModel`] `transformers` पाइपलाइन को पहले से इनिशियलाइज़ करता है जो `transformers` का उपयोग करके आपकी लोकल मशीन पर इन्फरेंस चलाने के लिए होता है।
-    - [`HfApiModel`] अंदर से `huggingface_hub.InferenceClient` का लाभ उठाता है।
+    - [`InferenceClientModel`] अंदर से `huggingface_hub.InferenceClient` का लाभ उठाता है।
     - [`LiteLLMModel`] आपको [LiteLLM](https://docs.litellm.ai/) के माध्यम से 100+ अलग-अलग मॉडल्स को कॉल करने देता है!
 
 - `tools`, `Tools` की एक लिस्ट जिसे एजेंट टास्क को हल करने के लिए उपयोग कर सकता है। यह एक खाली लिस्ट हो सकती है। आप ऑप्शनल आर्ग्यूमेंट `add_base_tools=True` को परिभाषित करके अपनी `tools` लिस्ट के ऊपर डिफ़ॉल्ट टूलबॉक्स भी जोड़ सकते हैं।
@@ -37,14 +22,14 @@ rendered properly in your Markdown viewer.
 
 Hugging Face API टोकन के बिना उपयोग करने के लिए मुफ्त है, लेकिन फिर इसमें रेट लिमिटेशन होगी।
 
-गेटेड मॉडल्स तक पहुंचने या PRO अकाउंट के साथ अपनी रेट लिमिट्स बढ़ाने के लिए, आपको एनवायरनमेंट वेरिएबल `HF_TOKEN` सेट करना होगा या `HfApiModel` के इनिशियलाइजेशन पर `token` वेरिएबल पास करना होगा।
+गेटेड मॉडल्स तक पहुंचने या PRO अकाउंट के साथ अपनी रेट लिमिट्स बढ़ाने के लिए, आपको एनवायरनमेंट वेरिएबल `HF_TOKEN` सेट करना होगा या `InferenceClientModel` के इनिशियलाइजेशन पर `token` वेरिएबल पास करना होगा।
 
 ```python
-from smolagents import CodeAgent, HfApiModel
+from smolagents import CodeAgent, InferenceClientModel
 
 model_id = "meta-llama/Llama-3.3-70B-Instruct"
 
-model = HfApiModel(model_id=model_id, token="<YOUR_HUGGINGFACEHUB_API_TOKEN>")
+model = InferenceClientModel(model_id=model_id, token="<YOUR_HUGGINGFACEHUB_API_TOKEN>")
 agent = CodeAgent(tools=[], model=model, add_base_tools=True)
 
 agent.run(
@@ -114,7 +99,7 @@ agent.run(
 आप अपने [`CodeAgent`] के इनिशियलाइजेशन पर आर्ग्यूमेंट `additional_authorized_imports` में स्ट्रिंग्स की लिस्ट के रूप में अतिरिक्त मॉड्यूल्स को अधिकृत कर सकते हैं।
 
 ```py
-model = HfApiModel()
+model = InferenceClientModel()
 agent = CodeAgent(tools=[], model=model, additional_authorized_imports=['requests', 'bs4'])
 agent.run("Could you get me the title of the page at url 'https://huggingface.co/blog'?")
 ```
@@ -124,7 +109,7 @@ agent.run("Could you get me the title of the page at url 'https://huggingface.co
 
 एक्जीक्यूशन किसी भी कोड पर रुक जाएगा जो एक अवैध ऑपरेशन करने का प्रयास करता है या यदि एजेंट द्वारा जनरेट किए गए कोड में एक रेगुलर पायथन एरर है।
 
-आप [E2B कोड एक्जीक्यूटर](https://e2b.dev/docs#what-is-e2-b) का उपयोग लोकल पायथन इंटरप्रेटर के बजाय कर सकते हैं, पहले [`E2B_API_KEY` एनवायरनमेंट वेरिएबल सेट करके](https://e2b.dev/dashboard?tab=keys) और फिर एजेंट इनिशियलाइजेशन पर `use_e2b_executor=True` पास करके।
+आप [E2B कोड एक्जीक्यूटर](https://e2b.dev/docs#what-is-e2-b) या Docker का उपयोग लोकल पायथन इंटरप्रेटर के बजाय कर सकते हैं। E2B के लिए, पहले [`E2B_API_KEY` एनवायरनमेंट वेरिएबल सेट करें](https://e2b.dev/dashboard?tab=keys) और फिर एजेंट इनिशियलाइजेशन पर `executor_type="e2b"` पास करें। Docker के लिए, इनिशियलाइजेशन के दौरान `executor_type="docker"` पास करें।
 
 > [!TIP]
 > कोड एक्जीक्यूशन के बारे में और जानें [इस ट्यूटोरियल में](tutorials/secure_code_execution)।
@@ -158,7 +143,7 @@ agent.run("Could you get me the title of the page at url 'https://huggingface.co
 
 ### डिफ़ॉल्ट टूलबॉक्स
 
-`smolagents` एजेंट्स को सशक्त बनाने के लिए एक डिफ़ॉल्ट टूलबॉक्स के साथ आता है, जिसे आप आर्ग्यूमेंट `add_base_tools = True` के साथ अपने एजेंट में इनिशियलाइजेशन पर जोड़ सकते हैं:
+`smolagents` एजेंट्स को सशक्त बनाने के लिए एक डिफ़ॉल्ट टूलबॉक्स के साथ आता है, जिसे आप आर्ग्यूमेंट `add_base_tools=True` के साथ अपने एजेंट में इनिशियलाइजेशन पर जोड़ सकते हैं:
 
 - **DuckDuckGo वेब सर्च**: DuckDuckGo ब्राउज़र का उपयोग करके वेब सर्च करता है।
 - **पायथन कोड इंटरप्रेटर**: आपका LLM जनरेटेड पायथन कोड एक सुरक्षित एनवायरनमेंट में चलाता है। यह टूल [`ToolCallingAgent`] में केवल तभी जोड़ा जाएगा जब आप इसे `add_base_tools=True` के साथ इनिशियलाइज़ करते हैं, क्योंकि कोड-बेस्ड एजेंट पहले से ही नेटिव रूप से पायथन कोड एक्जीक्यूट कर सकता है
@@ -250,8 +235,8 @@ class ModelDownloadTool(Tool):
 
 आप सीधे अपने एजेंट को इनिशियलाइज़ कर सकते हैं:  
 ```py
-from smolagents import CodeAgent, HfApiModel
-agent = CodeAgent(tools=[model_download_tool], model=HfApiModel())
+from smolagents import CodeAgent, InferenceClientModel
+agent = CodeAgent(tools=[model_download_tool], model=InferenceClientModel())
 agent.run(
     "Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub?"
 )
@@ -264,7 +249,7 @@ agent.run(
 │ Can you give me the name of the model that has the most downloads in the 'text-to-video' │
 │ task on the Hugging Face Hub?                                                            │
 │                                                                                          │
-╰─ HfApiModel - Qwen/Qwen2.5-Coder-32B-Instruct ───────────────────────────────────────────╯
+╰─ InferenceClientModel - Qwen/Qwen2.5-Coder-32B-Instruct ───────────────────────────────────────────╯
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ Step 0 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 ╭─ Executing this code: ───────────────────────────────────────────────────────────────────╮
 │   1 model_name = model_download_tool(task="text-to-video")                               │
@@ -301,9 +286,9 @@ Microsoft के फ्रेमवर्क [Autogen](https://huggingface.co/pa
 यहां एक एजेंट बनाने का उदाहरण दिया गया है जो हमारे [`DuckDuckGoSearchTool`] का उपयोग करके एक विशिष्ट वेब खोज एजेंट को प्रबंधित करता है।
 
 ```py
-from smolagents import CodeAgent, HfApiModel, DuckDuckGoSearchTool, ManagedAgent
+from smolagents import CodeAgent, InferenceClientModel, DuckDuckGoSearchTool, ManagedAgent
 
-model = HfApiModel()
+model = InferenceClientModel()
 
 web_agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model)
 
@@ -332,14 +317,14 @@ manager_agent.run("Who is the CEO of Hugging Face?")
 from smolagents import (
     load_tool,
     CodeAgent,
-    HfApiModel,
+    InferenceClientModel,
     GradioUI
 )
 
 # Import tool from Hub
 image_generation_tool = load_tool("m-ric/text-to-image", trust_remote_code=True)
 
-model = HfApiModel(model_id)
+model = InferenceClientModel(model_id=model_id)
 
 # Initialize the agent with the image generation tool
 agent = CodeAgent(tools=[image_generation_tool], model=model)
diff --git a/docs/source/hi/index.mdx b/docs/source/hi/index.mdx
index 533b3b62d..40c938b55 100644
--- a/docs/source/hi/index.mdx
+++ b/docs/source/hi/index.mdx
@@ -1,18 +1,3 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
--->
-
 # `smolagents`
 
 <div class="flex justify-center">
diff --git a/docs/source/hi/reference/agents.mdx b/docs/source/hi/reference/agents.mdx
index 2e070cf03..95e097560 100644
--- a/docs/source/hi/reference/agents.mdx
+++ b/docs/source/hi/reference/agents.mdx
@@ -1,18 +1,3 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
 # Agents
 
 <Tip warning={true}>
@@ -98,12 +83,12 @@ print(model([{"role": "user", "content": "Ok!"}], stop_sequences=["great"]))
 
 [[autodoc]] TransformersModel
 
-### HfApiModel
+### InferenceClientModel
 
-`HfApiModel` LLM के एक्जीक्यूशन के लिए [HF Inference API](https://huggingface.co/docs/api-inference/index) क्लाइंट को रैप करता है।
+`InferenceClientModel` LLM के एक्जीक्यूशन के लिए [HF Inference API](https://huggingface.co/docs/api-inference/index) क्लाइंट को रैप करता है।
 
 ```python
-from smolagents import HfApiModel
+from smolagents import InferenceClientModel
 
 messages = [
   {"role": "user", "content": "Hello, how are you?"},
@@ -111,13 +96,13 @@ messages = [
   {"role": "user", "content": "No need to help, take it easy."},
 ]
 
-model = HfApiModel()
+model = InferenceClientModel()
 print(model(messages))
 ```
 ```text
 >>> Of course! If you change your mind, feel free to reach out. Take care!
 ```
-[[autodoc]] HfApiModel
+[[autodoc]] InferenceClientModel
 
 ### LiteLLMModel
 
@@ -133,7 +118,7 @@ messages = [
   {"role": "user", "content": "No need to help, take it easy."},
 ]
 
-model = LiteLLMModel("anthropic/claude-3-5-sonnet-latest", temperature=0.2, max_tokens=10)
+model = LiteLLMModel(model_id="anthropic/claude-3-5-sonnet-latest", temperature=0.2, max_tokens=10)
 print(model(messages))
 ```
 
diff --git a/docs/source/hi/reference/tools.mdx b/docs/source/hi/reference/tools.mdx
index 6c270321e..d7e0de98c 100644
--- a/docs/source/hi/reference/tools.mdx
+++ b/docs/source/hi/reference/tools.mdx
@@ -1,18 +1,3 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
 # Tools
 
 <Tip warning={true}>
diff --git a/docs/source/hi/tutorials/building_good_agents.mdx b/docs/source/hi/tutorials/building_good_agents.mdx
index 92587ef35..0baa206f6 100644
--- a/docs/source/hi/tutorials/building_good_agents.mdx
+++ b/docs/source/hi/tutorials/building_good_agents.mdx
@@ -1,18 +1,3 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
 # अच्छे Agents का निर्माण
 
 [[open-in-colab]]
@@ -122,11 +107,11 @@ def get_weather_api(location: str, date_time: str) -> str:
 
 
 ```py
-from smolagents import CodeAgent, HfApiModel
+from smolagents import CodeAgent, InferenceClientModel
 
 model_id = "meta-llama/Llama-3.3-70B-Instruct"
 
-agent = CodeAgent(tools=[], model=HfApiModel(model_id=model_id), add_base_tools=True)
+agent = CodeAgent(tools=[], model=InferenceClientModel(model_id=model_id), add_base_tools=True)
 
 agent.run(
     "Why does Mike not know many people in New York?",
@@ -211,13 +196,152 @@ In the end you have to return a final answer using the `final_answer` tool.
 
 Here are a few examples using notional tools:
 ---
-{examples}
+Task: "Generate an image of the oldest person in this document."
 
-Above example were using notional tools that might not exist for you. On top of performing computations in the Python code snippets that you create, you only have access to these tools:
+Thought: I will proceed step by step and use the following tools: `document_qa` to find the oldest person in the document, then `image_generator` to generate an image according to the answer.
+Code:
+```py
+answer = document_qa(document=document, question="Who is the oldest person mentioned?")
+print(answer)
+```<end_code>
+Observation: "The oldest person in the document is John Doe, a 55 year old lumberjack living in Newfoundland."
 
-{{tool_descriptions}}
+Thought: I will now generate an image showcasing the oldest person.
+Code:
+```py
+image = image_generator("A portrait of John Doe, a 55-year-old man living in Canada.")
+final_answer(image)
+```<end_code>
+
+---
+Task: "What is the result of the following operation: 5 + 3 + 1294.678?"
 
-{{managed_agents_descriptions}}
+Thought: I will use python code to compute the result of the operation and then return the final answer using the `final_answer` tool
+Code:
+```py
+result = 5 + 3 + 1294.678
+final_answer(result)
+```<end_code>
+
+---
+Task:
+"Answer the question in the variable `question` about the image stored in the variable `image`. The question is in French.
+You have been provided with these additional arguments, that you can access using the keys as variables in your python code:
+{'question': 'Quel est l'animal sur l'image?', 'image': 'path/to/image.jpg'}"
+
+Thought: I will use the following tools: `translator` to translate the question into English and then `image_qa` to answer the question on the input image.
+Code:
+```py
+translated_question = translator(question=question, src_lang="French", tgt_lang="English")
+print(f"The translated question is {translated_question}.")
+answer = image_qa(image=image, question=translated_question)
+final_answer(f"The answer is {answer}")
+```<end_code>
+
+---
+Task:
+In a 1979 interview, Stanislaus Ulam discusses with Martin Sherwin about other great physicists of his time, including Oppenheimer.
+What does he say was the consequence of Einstein learning too much math on his creativity, in one word?
+
+Thought: I need to find and read the 1979 interview of Stanislaus Ulam with Martin Sherwin.
+Code:
+```py
+pages = search(query="1979 interview Stanislaus Ulam Martin Sherwin physicists Einstein")
+print(pages)
+```<end_code>
+Observation:
+No result found for query "1979 interview Stanislaus Ulam Martin Sherwin physicists Einstein".
+
+Thought: The query was maybe too restrictive and did not find any results. Let's try again with a broader query.
+Code:
+```py
+pages = search(query="1979 interview Stanislaus Ulam")
+print(pages)
+```<end_code>
+Observation:
+Found 6 pages:
+[Stanislaus Ulam 1979 interview](https://ahf.nuclearmuseum.org/voices/oral-histories/stanislaus-ulams-interview-1979/)
+
+[Ulam discusses Manhattan Project](https://ahf.nuclearmuseum.org/manhattan-project/ulam-manhattan-project/)
+
+(truncated)
+
+Thought: I will read the first 2 pages to know more.
+Code:
+```py
+for url in ["https://ahf.nuclearmuseum.org/voices/oral-histories/stanislaus-ulams-interview-1979/", "https://ahf.nuclearmuseum.org/manhattan-project/ulam-manhattan-project/"]:
+    whole_page = visit_webpage(url)
+    print(whole_page)
+    print("\n" + "="*80 + "\n")  # Print separator between pages
+```<end_code>
+Observation:
+Manhattan Project Locations:
+Los Alamos, NM
+Stanislaus Ulam was a Polish-American mathematician. He worked on the Manhattan Project at Los Alamos and later helped design the hydrogen bomb. In this interview, he discusses his work at
+(truncated)
+
+Thought: I now have the final answer: from the webpages visited, Stanislaus Ulam says of Einstein: "He learned too much mathematics and sort of diminished, it seems to me personally, it seems to me his purely physics creativity." Let's answer in one word.
+Code:
+```py
+final_answer("diminished")
+```<end_code>
+
+---
+Task: "Which city has the highest population: Guangzhou or Shanghai?"
+
+Thought: I need to get the populations for both cities and compare them: I will use the tool `search` to get the population of both cities.
+Code:
+```py
+for city in ["Guangzhou", "Shanghai"]:
+    print(f"Population {city}:", search(f"{city} population")
+```<end_code>
+Observation:
+Population Guangzhou: ['Guangzhou has a population of 15 million inhabitants as of 2021.']
+Population Shanghai: '26 million (2019)'
+
+Thought: Now I know that Shanghai has the highest population.
+Code:
+```py
+final_answer("Shanghai")
+```<end_code>
+
+---
+Task: "What is the current age of the pope, raised to the power 0.36?"
+
+Thought: I will use the tool `wiki` to get the age of the pope, and confirm that with a web search.
+Code:
+```py
+pope_age_wiki = wiki(query="current pope age")
+print("Pope age as per wikipedia:", pope_age_wiki)
+pope_age_search = web_search(query="current pope age")
+print("Pope age as per google search:", pope_age_search)
+```<end_code>
+Observation:
+Pope age: "The pope Francis is currently 88 years old."
+
+Thought: I know that the pope is 88 years old. Let's compute the result using python code.
+Code:
+```py
+pope_current_age = 88 ** 0.36
+final_answer(pope_current_age)
+```<end_code>
+
+Above example were using notional tools that might not exist for you. On top of performing computations in the Python code snippets that you create, you only have access to these tools:
+{%- for tool in tools.values() %}
+- {{ tool.name }}: {{ tool.description }}
+    Takes inputs: {{tool.inputs}}
+    Returns an output of type: {{tool.output_type}}
+{%- endfor %}
+
+{%- if managed_agents and managed_agents.values() | list %}
+You can also give tasks to team members.
+Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task', a long string explaining your task.
+Given that this team member is a real human, you should be very verbose in your task.
+Here is a list of the team members that you can call:
+{%- for agent in managed_agents.values() %}
+- {{ agent.name }}: {{ agent.description }}
+{%- endfor %}
+{%- endif %}
 
 Here are the rules you should always follow to solve your task:
 1. Always provide a 'Thought:' sequence, and a 'Code:\n```py' sequence ending with '```<end_code>' sequence, else you will fail.
@@ -226,7 +350,7 @@ Here are the rules you should always follow to solve your task:
 4. Take care to not chain too many sequential tool calls in the same code block, especially when the output format is unpredictable. For instance, a call to search has an unpredictable return format, so do not have another tool call that depends on its output in the same block: rather output results with print() to use them in the next block.
 5. Call a tool only when needed, and never re-do a tool call that you previously did with the exact same parameters.
 6. Don't name any new variable with the same name as a tool: for instance don't name a variable 'final_answer'.
-7. Never create any notional variables in our code, as having these in your logs might derail you from the true variables.
+7. Never create any notional variables in our code, as having these in your logs will derail you from the true variables.
 8. You can use imports in your code, but only from the following list of modules: {{authorized_imports}}
 9. The state persists between code executions: so if in one step you've created variables or imported modules, these will all persist.
 10. Don't give up! You're in charge of solving the task, not providing directions to solve it.
@@ -234,11 +358,29 @@ Here are the rules you should always follow to solve your task:
 Now Begin! If you solve the task correctly, you will receive a reward of $1,000,000.
 ```
 
-जैसा कि आप देख सकते हैं, `"{{tool_descriptions}}"` जैसे प्लेसहोल्डर्स हैं: इनका उपयोग एजेंट इनिशियलाइजेशन के समय टूल्स या मैनेज्ड एजेंट्स के कुछ स्वचालित रूप से जनरेट किए गए विवरणों को डालने के लिए किया जाएगा।
+जैसा कि आप देख सकते हैं, `"{{ tool.description }}"` जैसे प्लेसहोल्डर्स हैं: इनका उपयोग एजेंट इनिशियलाइजेशन के समय टूल्स या मैनेज्ड एजेंट्स के कुछ स्वचालित रूप से जनरेट किए गए विवरणों को डालने के लिए किया जाएगा।
 
 इसलिए जबकि आप `system_prompt` पैरामीटर में अपने कस्टम प्रॉम्प्ट को आर्गुमेंट के रूप में पास करके इस सिस्टम प्रॉम्प्ट टेम्पलेट को ओवरराइट कर सकते हैं, आपके नए सिस्टम प्रॉम्प्ट में निम्नलिखित प्लेसहोल्डर्स होने चाहिए:
-- टूल विवरण डालने के लिए `"{{tool_descriptions}}"`।
-- यदि कोई मैनेज्ड एजेंट्स हैं तो उनके लिए विवरण डालने के लिए `"{{managed_agents_description}}"`।
+- टूल विवरण डालने के लिए।
+  ```
+  {%- for tool in tools.values() %}
+  - {{ tool.name }}: {{ tool.description }}
+      Takes inputs: {{tool.inputs}}
+      Returns an output of type: {{tool.output_type}}
+  {%- endfor %}
+  ```
+- यदि कोई मैनेज्ड एजेंट्स हैं तो उनके लिए विवरण डालने के लिए।
+  ```
+  {%- if managed_agents and managed_agents.values() | list %}
+  You can also give tasks to team members.
+  Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task', a long string explaining your task.
+  Given that this team member is a real human, you should be very verbose in your task.
+  Here is a list of the team members that you can call:
+  {%- for agent in managed_agents.values() %}
+  - {{ agent.name }}: {{ agent.description }}
+  {%- endfor %}
+  {%- endif %}
+  ```
 - केवल `CodeAgent` के लिए: अधिकृत इम्पोर्ट्स की सूची डालने के लिए `"{{authorized_imports}}"`।
 
 फिर आप सिस्टम प्रॉम्प्ट को निम्नानुसार बदल सकते हैं:
@@ -255,7 +397,7 @@ This also works with the [`ToolCallingAgent`].
 हम पूरक योजना चरण के लिए एक मॉडल प्रदान करते हैं, जिसे एजेंट सामान्य क्रियाओं के चरणों के बीच नियमित रूप से चला सकता है। इस चरण में कोई टूल कॉल नहीं होती है, LLM से केवल उन तथ्यों की सूची को अपडेट करने के लिए कहा जाता है जो उसे ज्ञात हैं और इन तथ्यों के आधार पर उसे अगले कदमों के बारे में विचार करना होता है।
 
 ```py
-from smolagents import load_tool, CodeAgent, HfApiModel, DuckDuckGoSearchTool
+from smolagents import load_tool, CodeAgent, InferenceClientModel, DuckDuckGoSearchTool
 from dotenv import load_dotenv
 
 load_dotenv()
@@ -267,7 +409,7 @@ search_tool = DuckDuckGoSearchTool()
 
 agent = CodeAgent(
     tools=[search_tool],
-    model=HfApiModel("Qwen/Qwen2.5-72B-Instruct"),
+    model=InferenceClientModel(model_id="Qwen/Qwen2.5-72B-Instruct"),
     planning_interval=3 # This is where you activate planning!
 )
 
diff --git a/docs/source/hi/tutorials/inspect_runs.mdx b/docs/source/hi/tutorials/inspect_runs.mdx
index 0669c4dcc..127bca148 100644
--- a/docs/source/hi/tutorials/inspect_runs.mdx
+++ b/docs/source/hi/tutorials/inspect_runs.mdx
@@ -1,18 +1,3 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
 # OpenTelemetry के साथ runs का निरीक्षण
 
 [[open-in-colab]]
@@ -73,10 +58,10 @@ from smolagents import (
     ToolCallingAgent,
     DuckDuckGoSearchTool,
     VisitWebpageTool,
-    HfApiModel,
+    InferenceClientModel,
 )
 
-model = HfApiModel()
+model = InferenceClientModel()
 
 managed_agent = ToolCallingAgent(
     tools=[DuckDuckGoSearchTool(), VisitWebpageTool()],
diff --git a/docs/source/hi/tutorials/secure_code_execution.mdx b/docs/source/hi/tutorials/secure_code_execution.mdx
index ad2cd8c34..73719e842 100644
--- a/docs/source/hi/tutorials/secure_code_execution.mdx
+++ b/docs/source/hi/tutorials/secure_code_execution.mdx
@@ -1,18 +1,3 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
 # सुरक्षित कोड एक्जीक्यूशन
 
 [[open-in-colab]]
@@ -41,7 +26,7 @@ rendered properly in your Markdown viewer.
 ### लोकल पायथन इंटरप्रेटर
 
 डिफ़ॉल्ट रूप से, `CodeAgent` LLM-जनरेटेड कोड को आपके एनवायरनमेंट में चलाता है।
-यह एक्जीक्यूशन वैनिला पायथन इंटरप्रेटर द्वारा नहीं किया जाता: हमने एक अधिक सुरक्षित `LocalPythonInterpreter` को शुरू से फिर से बनाया है।
+यह एक्जीक्यूशन वैनिला पायथन इंटरप्रेटर द्वारा नहीं किया जाता: हमने एक अधिक सुरक्षित `LocalPythonExecutor` को शुरू से फिर से बनाया है।
 यह इंटरप्रेटर सुरक्षा के लिए डिज़ाइन किया गया है:
  - इम्पोर्ट्स को उपयोगकर्ता द्वारा स्पष्ट रूप से पास की गई सूची तक सीमित करना
  - इनफिनिट लूप्स और रिसोर्स ब्लोटिंग को रोकने के लिए ऑपरेशंस की संख्या को कैप करना
@@ -64,16 +49,16 @@ rendered properly in your Markdown viewer.
 
 अब आप तैयार हैं!
 
-कोड एक्जीक्यूटर को E2B पर सेट करने के लिए, बस अपने `CodeAgent` को इनिशियलाइज़ करते समय `use_e2b_executor=True` फ्लैग पास करें।
+कोड एक्जीक्यूटर को E2B पर सेट करने के लिए, बस अपने `CodeAgent` को इनिशियलाइज़ करते समय `executor_type="e2b"` फ्लैग पास करें।
 ध्यान दें कि आपको `additional_authorized_imports` में सभी टूल की डिपेंडेंसीज़ जोड़नी चाहिए, ताकि एक्जीक्यूटर उन्हें इंस्टॉल करे।
 
 ```py
-from smolagents import CodeAgent, VisitWebpageTool, HfApiModel
+from smolagents import CodeAgent, VisitWebpageTool, InferenceClientModel
 agent = CodeAgent(
     tools = [VisitWebpageTool()],
-    model=HfApiModel(),
+    model=InferenceClientModel(),
     additional_authorized_imports=["requests", "markdownify"],
-    use_e2b_executor=True
+    executor_type="e2b"
 )
 
 agent.run("What was Abraham Lincoln's preferred pet?")
diff --git a/docs/source/hi/tutorials/tools.mdx b/docs/source/hi/tutorials/tools.mdx
index bb56d7bfc..2695217d2 100644
--- a/docs/source/hi/tutorials/tools.mdx
+++ b/docs/source/hi/tutorials/tools.mdx
@@ -1,18 +1,3 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
 # Tools
 
 [[open-in-colab]]
@@ -134,9 +119,9 @@ image_generation_tool("A sunny beach")
 फिर आप इस टूल का उपयोग किसी अन्य टूल की तरह कर सकते हैं। उदाहरण के लिए, चलिए प्रॉम्प्ट `a rabbit wearing a space suit` को सुधारें और इसकी एक इमेज जनरेट करें। यह उदाहरण यह भी दिखाता है कि आप एजेंट को अतिरिक्त आर्ग्यूमेंट्स कैसे पास कर सकते हैं।
 
 ```python
-from smolagents import CodeAgent, HfApiModel
+from smolagents import CodeAgent, InferenceClientModel
 
-model = HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct")
+model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct")
 agent = CodeAgent(tools=[image_generation_tool], model=model)
 
 agent.run(
@@ -182,9 +167,9 @@ agent.run("How many more blocks (also denoted as layers) are in BERT base encode
 चलिए केवल डिफ़ॉल्ट टूलबॉक्स के साथ इनिशियलाइज़ किए गए मौजूदा एजेंट में `model_download_tool` जोड़ें।
 
 ```python
-from smolagents import HfApiModel
+from smolagents import InferenceClientModel
 
-model = HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct")
+model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct")
 
 agent = CodeAgent(tools=[], model=model, add_base_tools=True)
 agent.tools[model_download_tool.name] = model_download_tool
@@ -241,7 +226,7 @@ server_parameters = StdioServerParameters(
     env={"UV_PYTHON": "3.12", **os.environ},
 )
 
-with ToolCollection.from_mcp(server_parameters) as tool_collection:
+with ToolCollection.from_mcp(server_parameters, trust_remote_code=True) as tool_collection:
     agent = CodeAgent(tools=[*tool_collection.tools], add_base_tools=True)
     agent.run("Please find a remedy for hangover.")
 ```
\ No newline at end of file
diff --git a/docs/source/zh/_toctree.yml b/docs/source/zh/_toctree.yml
index 4da8f4859..5ebe325c9 100644
--- a/docs/source/zh/_toctree.yml
+++ b/docs/source/zh/_toctree.yml
@@ -8,10 +8,14 @@
   sections:
   - local: tutorials/building_good_agents
     title: ✨ 构建好用的 agents
+  - local: tutorials/inspect_runs
+    title: 📊 监控 Agent 的运行
   - local: tutorials/tools
     title: 🛠️ 工具 - 深度指南
   - local: tutorials/secure_code_execution
     title: 🛡️ 使用 E2B 保护你的代码执行
+  - local: tutorials/memory
+    title: 📚 管理 Agent 的记忆
 - title: Conceptual guides
   sections:
   - local: conceptual_guides/intro_agents
@@ -21,14 +25,18 @@
 - title: Examples
   sections:
   - local: examples/text_to_sql
-    title: Self-correcting Text-to-SQL
+    title: 自我修正 Text-to-SQL
   - local: examples/rag
-    title: Master you knowledge base with agentic RAG
+    title: 借助 agentic RAG 掌控知识库
   - local: examples/multiagents
-    title: Orchestrate a multi-agent system
+    title: 编排 multi-agent 系统
+  - local: examples/web_browser
+    title: 基于视觉模型构建能够浏览网页的agent
 - title: Reference
   sections:
   - local: reference/agents
     title: Agent-related objects
+  - local: reference/models
+    title: Model-related objects
   - local: reference/tools
     title: Tool-related objects
diff --git a/docs/source/zh/conceptual_guides/intro_agents.mdx b/docs/source/zh/conceptual_guides/intro_agents.mdx
index 416aabcb5..6b09349e4 100644
--- a/docs/source/zh/conceptual_guides/intro_agents.mdx
+++ b/docs/source/zh/conceptual_guides/intro_agents.mdx
@@ -1,19 +1,3 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
-
 # Agent 简介
 
 > [!TIP]
diff --git a/docs/source/zh/conceptual_guides/react.mdx b/docs/source/zh/conceptual_guides/react.mdx
index cdb970728..44760fb0c 100644
--- a/docs/source/zh/conceptual_guides/react.mdx
+++ b/docs/source/zh/conceptual_guides/react.mdx
@@ -1,18 +1,3 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
 # 多步骤 agent 是如何工作的？
 
 ReAct 框架（[Yao et al., 2022](https://huggingface.co/papers/2210.03629)）是目前构建 agent 的主要方法。
diff --git a/docs/source/zh/examples/multiagents.mdx b/docs/source/zh/examples/multiagents.mdx
index 3b177d133..567e7573f 100644
--- a/docs/source/zh/examples/multiagents.mdx
+++ b/docs/source/zh/examples/multiagents.mdx
@@ -1,18 +1,3 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
 # 编排 multi-agent 系统 🤖🤝🤖
 
 [[open-in-colab]]
@@ -53,7 +38,7 @@ login()
 ```
 
 ⚡️ HF的Inference API 可以快速轻松地运行任何开源模型，因此我们的agent将使用HF的Inference API
-中的`HfApiModel`类来调用
+中的`InferenceClientModel`类来调用
 [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct)模型。
 
 _Note:_ 基于多参数和部署模型的 Inference API 可能在没有预先通知的情况下更新或替换模型。了解更多信息，请参阅[这里](https://huggingface.co/docs/api-inference/supported-models)。
@@ -127,13 +112,13 @@ print(visit_webpage("https://en.wikipedia.org/wiki/Hugging_Face")[:500])
 from smolagents import (
     CodeAgent,
     ToolCallingAgent,
-    HfApiModel,
+    InferenceClientModel,
     ManagedAgent,
     DuckDuckGoSearchTool,
     LiteLLMModel,
 )
 
-model = HfApiModel(model_id)
+model = InferenceClientModel(model_id=model_id)
 
 web_agent = ToolCallingAgent(
     tools=[DuckDuckGoSearchTool(), visit_webpage],
diff --git a/docs/source/zh/examples/rag.mdx b/docs/source/zh/examples/rag.mdx
index 23efa9e0e..bed9b7fb6 100644
--- a/docs/source/zh/examples/rag.mdx
+++ b/docs/source/zh/examples/rag.mdx
@@ -1,18 +1,3 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
 # Agentic RAG
 
 [[open-in-colab]]
@@ -38,7 +23,7 @@ Retrieval-Augmented-Generation (RAG) 是“使用大语言模型（LLM）来回
 !pip install smolagents pandas langchain langchain-community sentence-transformers rank_bm25 --upgrade -q
 ```
 
-你需要一个有效的 token 作为环境变量 `HF_TOKEN` 来调用 HF Inference API。我们使用 python-dotenv 来加载它。
+你需要一个有效的 token 作为环境变量 `HF_TOKEN` 来调用 Inference Providers。我们使用 python-dotenv 来加载它。
 ```py
 from dotenv import load_dotenv
 load_dotenv()
@@ -126,10 +111,10 @@ BM25 检索方法是一个经典的检索方法，因为它的设置速度非常
 _Note:_ 此 Inference API 托管基于各种标准的模型，部署的模型可能会在没有事先通知的情况下进行更新或替换。了解更多信息，请点击[这里](https://huggingface.co/docs/api-inference/supported-models)。
 
 ```py
-from smolagents import HfApiModel, CodeAgent
+from smolagents import InferenceClientModel, CodeAgent
 
 agent = CodeAgent(
-    tools=[retriever_tool], model=HfApiModel("meta-llama/Llama-3.3-70B-Instruct"), max_steps=4, verbose=True
+    tools=[retriever_tool], model=InferenceClientModel(model_id="meta-llama/Llama-3.3-70B-Instruct"), max_steps=4, verbose=True
 )
 ```
 
diff --git a/docs/source/zh/examples/text_to_sql.mdx b/docs/source/zh/examples/text_to_sql.mdx
index 419c45159..349d31f6f 100644
--- a/docs/source/zh/examples/text_to_sql.mdx
+++ b/docs/source/zh/examples/text_to_sql.mdx
@@ -1,18 +1,3 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
 # Text-to-SQL
 
 [[open-in-colab]]
@@ -121,14 +106,14 @@ def sql_engine(query: str) -> str:
 
 我们现在使用这个工具来创建一个 agent。我们使用 `CodeAgent`，这是 smolagent 的主要 agent 类：一个在代码中编写操作并根据 ReAct 框架迭代先前输出的 agent。
 
-这个模型是驱动 agent 系统的 LLM。`HfApiModel` 允许你使用 HF  Inference API 调用 LLM，无论是通过 Serverless 还是 Dedicated endpoint，但你也可以使用任何专有 API。
+这个模型是驱动 agent 系统的 LLM。`InferenceClientModel` 允许你使用 HF  Inference API 调用 LLM，无论是通过 Serverless 还是 Dedicated endpoint，但你也可以使用任何专有 API。
 
 ```py
-from smolagents import CodeAgent, HfApiModel
+from smolagents import CodeAgent, InferenceClientModel
 
 agent = CodeAgent(
     tools=[sql_engine],
-    model=HfApiModel("meta-llama/Meta-Llama-3.1-8B-Instruct"),
+    model=InferenceClientModel(model_id="meta-llama/Meta-Llama-3.1-8B-Instruct"),
 )
 agent.run("Can you give me the name of the client who got the most expensive receipt?")
 ```
@@ -184,7 +169,7 @@ sql_engine.description = updated_description
 
 agent = CodeAgent(
     tools=[sql_engine],
-    model=HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct"),
+    model=InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
 )
 
 agent.run("Which waiter got more total money from tips?")
diff --git a/docs/source/zh/examples/web_browser.mdx b/docs/source/zh/examples/web_browser.mdx
new file mode 100644
index 000000000..cf65225ed
--- /dev/null
+++ b/docs/source/zh/examples/web_browser.mdx
@@ -0,0 +1,214 @@
+# 使用Agent实现网页浏览器自动化 🤖🌐
+
+[[open-in-colab]]
+
+在本notebook中，我们将创建一个**基于Agent的网页浏览器自动化系统**！该系统可以自动导航网站、与网页元素交互并提取信息。
+
+该Agent将能够：
+
+- [x] 导航到网页
+- [x] 点击元素
+- [x] 在页面内搜索
+- [x] 处理弹出窗口和模态框
+- [x] 提取信息
+
+让我们一步步搭建这个系统！
+
+首先运行以下命令安装所需依赖：
+
+```bash
+pip install smolagents selenium helium pillow -q
+```
+
+让我们导入所需的库并设置环境变量：
+
+```python
+from io import BytesIO
+from time import sleep
+
+import helium
+from dotenv import load_dotenv
+from PIL import Image
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.common.keys import Keys
+
+from smolagents import CodeAgent, tool
+from smolagents.agents import ActionStep
+
+# Load environment variables
+load_dotenv()
+```
+
+现在我们来创建核心的浏览器交互工具，使我们的Agent能够导航并与网页交互：
+
+```python
+@tool
+def search_item_ctrl_f(text: str, nth_result: int = 1) -> str:
+    """
+    Searches for text on the current page via Ctrl + F and jumps to the nth occurrence.
+    Args:
+        text: The text to search for
+        nth_result: Which occurrence to jump to (default: 1)
+    """
+    elements = driver.find_elements(By.XPATH, f"//*[contains(text(), '{text}')]")
+    if nth_result > len(elements):
+        raise Exception(f"Match n°{nth_result} not found (only {len(elements)} matches found)")
+    result = f"Found {len(elements)} matches for '{text}'."
+    elem = elements[nth_result - 1]
+    driver.execute_script("arguments[0].scrollIntoView(true);", elem)
+    result += f"Focused on element {nth_result} of {len(elements)}"
+    return result
+
+@tool
+def go_back() -> None:
+    """Goes back to previous page."""
+    driver.back()
+
+@tool
+def close_popups() -> str:
+    """
+    Closes any visible modal or pop-up on the page. Use this to dismiss pop-up windows!
+    This does not work on cookie consent banners.
+    """
+    webdriver.ActionChains(driver).send_keys(Keys.ESCAPE).perform()
+```
+
+让我们配置使用Chrome浏览器并设置截图功能：
+
+```python
+# Configure Chrome options
+chrome_options = webdriver.ChromeOptions()
+chrome_options.add_argument("--force-device-scale-factor=1")
+chrome_options.add_argument("--window-size=1000,1350")
+chrome_options.add_argument("--disable-pdf-viewer")
+chrome_options.add_argument("--window-position=0,0")
+
+# Initialize the browser
+driver = helium.start_chrome(headless=False, options=chrome_options)
+
+# Set up screenshot callback
+def save_screenshot(memory_step: ActionStep, agent: CodeAgent) -> None:
+    sleep(1.0)  # Let JavaScript animations happen before taking the screenshot
+    driver = helium.get_driver()
+    current_step = memory_step.step_number
+    if driver is not None:
+        for previous_memory_step in agent.memory.steps:  # Remove previous screenshots for lean processing
+            if isinstance(previous_memory_step, ActionStep) and previous_memory_step.step_number <= current_step - 2:
+                previous_memory_step.observations_images = None
+        png_bytes = driver.get_screenshot_as_png()
+        image = Image.open(BytesIO(png_bytes))
+        print(f"Captured a browser screenshot: {image.size} pixels")
+        memory_step.observations_images = [image.copy()]  # Create a copy to ensure it persists
+
+    # Update observations with current URL
+    url_info = f"Current url: {driver.current_url}"
+    memory_step.observations = (
+        url_info if memory_step.observations is None else memory_step.observations + "\n" + url_info
+    )
+```
+
+现在我们来创建网页自动化Agent：
+
+```python
+from smolagents import InferenceClientModel
+
+# Initialize the model
+model_id = "meta-llama/Llama-3.3-70B-Instruct"  # You can change this to your preferred model
+model = InferenceClientModel(model_id=model_id)
+
+# Create the agent
+agent = CodeAgent(
+    tools=[go_back, close_popups, search_item_ctrl_f],
+    model=model,
+    additional_authorized_imports=["helium"],
+    step_callbacks=[save_screenshot],
+    max_steps=20,
+    verbosity_level=2,
+)
+
+# Import helium for the agent
+agent.python_executor("from helium import *", agent.state)
+```
+
+Agent需要获得关于如何使用Helium进行网页自动化的指导。以下是我们将提供的操作说明：
+
+```python
+helium_instructions = """
+You can use helium to access websites. Don't bother about the helium driver, it's already managed.
+We've already ran "from helium import *"
+Then you can go to pages!
+Code:
+```py
+go_to('github.com/trending')
+```<end_code>
+
+You can directly click clickable elements by inputting the text that appears on them.
+Code:
+```py
+click("Top products")
+```<end_code>
+
+If it's a link:
+Code:
+```py
+click(Link("Top products"))
+```<end_code>
+
+If you try to interact with an element and it's not found, you'll get a LookupError.
+In general stop your action after each button click to see what happens on your screenshot.
+Never try to login in a page.
+
+To scroll up or down, use scroll_down or scroll_up with as an argument the number of pixels to scroll from.
+Code:
+```py
+scroll_down(num_pixels=1200) # This will scroll one viewport down
+```<end_code>
+
+When you have pop-ups with a cross icon to close, don't try to click the close icon by finding its element or targeting an 'X' element (this most often fails).
+Just use your built-in tool `close_popups` to close them:
+Code:
+```py
+close_popups()
+```<end_code>
+
+You can use .exists() to check for the existence of an element. For example:
+Code:
+```py
+if Text('Accept cookies?').exists():
+    click('I accept')
+```<end_code>
+"""
+```
+
+现在我们可以运行Agent执行任务了！让我们尝试在维基百科上查找信息：
+
+```python
+search_request = """
+Please navigate to https://en.wikipedia.org/wiki/Chicago and give me a sentence containing the word "1992" that mentions a construction accident.
+"""
+
+agent_output = agent.run(search_request + helium_instructions)
+print("Final output:")
+print(agent_output)
+```
+
+您可以通过修改请求参数执行不同任务。例如，以下请求可帮助我判断是否需要更加努力工作：
+
+```python
+github_request = """
+I'm trying to find how hard I have to work to get a repo in github.com/trending.
+Can you navigate to the profile for the top author of the top trending repo, and give me their total number of commits over the last year?
+"""
+
+agent_output = agent.run(github_request + helium_instructions)
+print("Final output:")
+print(agent_output)
+```
+
+该系统在以下任务中尤为有效：
+
+- 从网站提取数据
+- 网页研究自动化
+- 用户界面测试与验证
+- 内容监控
\ No newline at end of file
diff --git a/docs/source/zh/guided_tour.mdx b/docs/source/zh/guided_tour.mdx
index 54ae10419..e851b79b8 100644
--- a/docs/source/zh/guided_tour.mdx
+++ b/docs/source/zh/guided_tour.mdx
@@ -1,18 +1,3 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
 # Agents - 导览
 
 [[open-in-colab]]
@@ -31,26 +16,28 @@ rendered properly in your Markdown viewer.
 
 - `model`，一个为您的 agent 提供动力的文本生成模型 - 因为 agent 与简单的 LLM 不同，它是一个使用 LLM 作为引擎的系统。您可以使用以下任一选项：
     - [`TransformersModel`] 使用预初始化的 `transformers` 管道在本地机器上运行推理
-    - [`HfApiModel`] 在底层使用 `huggingface_hub.InferenceClient`
+    - [`InferenceClientModel`] 在底层使用 `huggingface_hub.InferenceClient`
     - [`LiteLLMModel`] 让您通过 [LiteLLM](https://docs.litellm.ai/) 调用 100+ 不同的模型！
+    - [`AzureOpenAIServerModel`] 允许您使用部署在 [Azure](https://azure.microsoft.com/en-us/products/ai-services/openai-service) 中的 OpenAI 模型。
+    - [`MLXModel`] 可创建 [mlx-lm](https://pypi.org/project/mlx-lm/) 流水线，以便在本地机器上运行推理。
 
 - `tools`，agent 可以用来解决任务的 `Tools` 列表。它可以是一个空列表。您还可以通过定义可选参数 `add_base_tools=True` 在您的 `tools` 列表之上添加默认工具箱。
 
-一旦有了这两个参数 `tools` 和 `model`，您就可以创建一个 agent 并运行它。您可以使用任何您喜欢的 LLM，无论是通过 [Hugging Face API](https://huggingface.co/docs/api-inference/en/index)、[transformers](https://github.com/huggingface/transformers/)、[ollama](https://ollama.com/)，还是 [LiteLLM](https://www.litellm.ai/)。
+一旦有了这两个参数 `tools` 和 `model`，您就可以创建一个 agent 并运行它。您可以使用任何您喜欢的 LLM，无论是通过 [Hugging Face API](https://huggingface.co/docs/api-inference/en/index)、[transformers](https://github.com/huggingface/transformers/)、[ollama](https://ollama.com/)、[LiteLLM](https://www.litellm.ai/)、[Azure OpenAI](https://azure.microsoft.com/en-us/products/ai-services/openai-service)，还是[mlx-lm](https://pypi.org/project/mlx-lm/).。
 
 <hfoptions id="选择一个LLM">
 <hfoption id="Hugging Face API">
 
 Hugging Face API 可以免费使用而无需 token，但会有速率限制。
 
-要访问受限模型或使用 PRO 账户提高速率限制，您需要设置环境变量 `HF_TOKEN` 或在初始化 `HfApiModel` 时传递 `token` 变量。
+要访问受限模型或使用 PRO 账户提高速率限制，您需要设置环境变量 `HF_TOKEN` 或在初始化 `InferenceClientModel` 时传递 `token` 变量。
 
 ```python
-from smolagents import CodeAgent, HfApiModel
+from smolagents import CodeAgent, InferenceClientModel
 
 model_id = "meta-llama/Llama-3.3-70B-Instruct"
 
-model = HfApiModel(model_id=model_id, token="<YOUR_HUGGINGFACEHUB_API_TOKEN>")
+model = InferenceClientModel(model_id=model_id, token="<YOUR_HUGGINGFACEHUB_API_TOKEN>")
 agent = CodeAgent(tools=[], model=model, add_base_tools=True)
 
 agent.run(
@@ -109,6 +96,62 @@ agent.run(
     "Could you give me the 118th number in the Fibonacci sequence?",
 )
 ```
+</hfoption>
+<hfoption id="Azure OpenAI">
+
+要连接到 Azure OpenAI，您可以直接使用 `AzureOpenAIServerModel`，或使用 `LiteLLMModel` 并进行相应配置。
+
+初始化 `AzureOpenAIServerModel` 实例时，需要传递模型部署名称，可选择以下任一种方式：1.传递 `azure_endpoint`、`api_key` 和 `api_version` 参数；2.设置环境变量 `AZURE_OPENAI_ENDPOINT`、`AZURE_OPENAI_API_KEY` 和 `OPENAI_API_VERSION`
+
+```python
+# !pip install smolagents[openai]
+from smolagents import CodeAgent, AzureOpenAIServerModel
+
+model = AzureOpenAIServerModel(model_id="gpt-4o-mini")
+agent = CodeAgent(tools=[], model=model, add_base_tools=True)
+
+agent.run(
+    "Could you give me the 118th number in the Fibonacci sequence?",
+)
+```
+
+也可按如下方式配置 `LiteLLMModel` 连接 Azure OpenAI：
+
+- 将模型部署名称作为 `model_id` 参数传递，并确保其前缀为 `azure/`
+- 确保设置环境变量 `AZURE_API_VERSION`
+- 任选其一：1.传递 `api_base` 和 `api_key` 参数；2.设置环境变量 `AZURE_API_KEY` 和 `AZURE_API_BASE`
+
+```python
+import os
+from smolagents import CodeAgent, LiteLLMModel
+
+AZURE_OPENAI_CHAT_DEPLOYMENT_NAME="gpt-35-turbo-16k-deployment" # example of deployment name
+
+os.environ["AZURE_API_KEY"] = "" # api_key
+os.environ["AZURE_API_BASE"] = "" # "https://example-endpoint.openai.azure.com"
+os.environ["AZURE_API_VERSION"] = "" # "2024-10-01-preview"
+
+model = LiteLLMModel(model_id="azure/" + AZURE_OPENAI_CHAT_DEPLOYMENT_NAME)
+agent = CodeAgent(tools=[], model=model, add_base_tools=True)
+
+agent.run(
+   "Could you give me the 118th number in the Fibonacci sequence?",
+)
+```
+
+</hfoption>
+<hfoption id="mlx-lm">
+
+```python
+# !pip install smolagents[mlx-lm]
+from smolagents import CodeAgent, MLXModel
+
+mlx_model = MLXModel("mlx-community/Qwen2.5-Coder-32B-Instruct-4bit")
+agent = CodeAgent(model=mlx_model, tools=[], add_base_tools=True)
+
+agent.run("Could you give me the 118th number in the Fibonacci sequence?")
+```
+
 </hfoption>
 </hfoptions>
 
@@ -125,6 +168,7 @@ Python 解释器默认也不允许在安全列表之外导入，所以所有最
 ```py
 from smolagents import CodeAgent
 
+model = InferenceClientModel()
 agent = CodeAgent(tools=[], model=model, additional_authorized_imports=['requests', 'bs4'])
 agent.run("Could you get me the title of the page at url 'https://huggingface.co/blog'?")
 ```
@@ -134,7 +178,7 @@ agent.run("Could you get me the title of the page at url 'https://huggingface.co
 
 如果生成的代码尝试执行非法操作或出现常规 Python 错误，执行将停止。
 
-您也可以使用 [E2B 代码执行器](https://e2b.dev/docs#what-is-e2-b) 而不是本地 Python 解释器，首先 [设置 `E2B_API_KEY` 环境变量](https://e2b.dev/dashboard?tab=keys)，然后在初始化 agent 时传递 `use_e2b_executor=True`。
+您也可以使用 [E2B 代码执行器](https://e2b.dev/docs#what-is-e2-b) 或 Docker 而不是本地 Python 解释器。对于 E2B，首先 [设置 `E2B_API_KEY` 环境变量](https://e2b.dev/dashboard?tab=keys)，然后在初始化 agent 时传递 `executor_type="e2b"`。对于 Docker，在初始化时传递 `executor_type="docker"`。
 
 > [!TIP]
 > 在 [该教程中](tutorials/secure_code_execution) 了解更多关于代码执行的内容。
@@ -168,7 +212,7 @@ agent.run("Could you get me the title of the page at url 'https://huggingface.co
 
 ### 默认工具箱
 
-`smolagents` 附带了一个用于增强 agent 的默认工具箱，您可以在初始化时通过参数 `add_base_tools = True` 将其添加到您的 agent 中：
+`smolagents` 附带了一个用于增强 agent 的默认工具箱，您可以在初始化时通过参数 `add_base_tools=True` 将其添加到您的 agent 中：
 
 - **DuckDuckGo 网页搜索**：使用 DuckDuckGo 浏览器执行网页搜索。
 - **Python 代码解释器**：在安全环境中运行 LLM 生成的 Python 代码。只有在使用 `add_base_tools=True` 初始化 [`ToolCallingAgent`] 时才会添加此工具，因为基于代码的 agent 已经可以原生执行 Python 代码
@@ -260,8 +304,8 @@ class ModelDownloadTool(Tool):
 
 然后您可以直接初始化您的 agent：
 ```py
-from smolagents import CodeAgent, HfApiModel
-agent = CodeAgent(tools=[model_download_tool], model=HfApiModel())
+from smolagents import CodeAgent, InferenceClientModel
+agent = CodeAgent(tools=[model_download_tool], model=InferenceClientModel())
 agent.run(
     "Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub?"
 )
@@ -274,7 +318,7 @@ agent.run(
 │ Can you give me the name of the model that has the most downloads in the 'text-to-video' │
 │ task on the Hugging Face Hub?                                                            │
 │                                                                                          │
-╰─ HfApiModel - Qwen/Qwen2.5-Coder-32B-Instruct ───────────────────────────────────────────╯
+╰─ InferenceClientModel - Qwen/Qwen2.5-Coder-32B-Instruct ───────────────────────────────────────────╯
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ Step 0 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 ╭─ Executing this code: ───────────────────────────────────────────────────────────────────╮
 │   1 model_name = model_download_tool(task="text-to-video")                               │
@@ -311,9 +355,9 @@ Out[20]: 'ByteDance/AnimateDiff-Lightning'
 以下是一个使用我们的 [`DuckDuckGoSearchTool`] 制作一个管理特定网页搜索 agent 的 agent 的示例：
 
 ```py
-from smolagents import CodeAgent, HfApiModel, DuckDuckGoSearchTool, ManagedAgent
+from smolagents import CodeAgent, InferenceClientModel, DuckDuckGoSearchTool, ManagedAgent
 
-model = HfApiModel()
+model = InferenceClientModel()
 
 web_agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model)
 
@@ -342,14 +386,14 @@ manager_agent.run("Who is the CEO of Hugging Face?")
 from smolagents import (
     load_tool,
     CodeAgent,
-    HfApiModel,
+    InferenceClientModel,
     GradioUI
 )
 
 # 从 Hub 导入工具
 image_generation_tool = load_tool("m-ric/text-to-image")
 
-model = HfApiModel(model_id)
+model = InferenceClientModel(model_id=model_id)
 
 # 使用图像生成工具初始化 agent
 agent = CodeAgent(tools=[image_generation_tool], model=model)
@@ -364,6 +408,18 @@ GradioUI(agent).launch()
 
 ## 下一步
 
+最后，当您按需配置好agent后，即可将其分享至 Hub！
+
+```py
+agent.push_to_hub("m-ric/my_agent")
+```
+
+类似地，若要加载已推送至 Hub 的agent，在信任其工具代码的前提下，可使用：
+
+```py
+agent.from_hub("m-ric/my_agent", trust_remote_code=True)
+```
+
 要更深入地使用，您将需要查看我们的教程：
 - [我们的代码 agent 如何工作的解释](./tutorials/secure_code_execution)
 - [本指南关于如何构建好的 agent](./tutorials/building_good_agents)。
diff --git a/docs/source/zh/index.mdx b/docs/source/zh/index.mdx
index d79e8090c..08260bb91 100644
--- a/docs/source/zh/index.mdx
+++ b/docs/source/zh/index.mdx
@@ -1,18 +1,3 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
--->
-
 # `smolagents`
 
 这是构建强大 agent 的最简单框架！顺便问一下，什么是 "agent"？我们在[此页面](conceptual_guides/intro_agents)提供了我们的定义，您还可以找到关于何时使用或不使用它们的建议（剧透：通常不使用 agent 会更好）。
diff --git a/docs/source/zh/reference/agents.mdx b/docs/source/zh/reference/agents.mdx
index bd7f3a779..c4fae3c5c 100644
--- a/docs/source/zh/reference/agents.mdx
+++ b/docs/source/zh/reference/agents.mdx
@@ -1,19 +1,3 @@
-
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
 # Agents（智能体）
 
 <Tip warning={true}>
diff --git a/docs/source/zh/reference/models.mdx b/docs/source/zh/reference/models.mdx
index 79c9e72a4..036334140 100644
--- a/docs/source/zh/reference/models.mdx
+++ b/docs/source/zh/reference/models.mdx
@@ -1,19 +1,3 @@
-
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
 # 模型
 
 <Tip warning={true}>
@@ -71,24 +55,24 @@ print(model([{"role": "user", "content": [{"type": "text", "text": "Ok!"}]}], st
 
 [[autodoc]] TransformersModel
 
-### HfApiModel
+### InferenceClientModel
 
-`HfApiModel` 封装了 huggingface_hub 的 [InferenceClient](https://huggingface.co/docs/huggingface_hub/main/en/guides/inference)，用于执行 LLM。它支持 HF 的 [Inference API](https://huggingface.co/docs/api-inference/index) 以及 Hub 上所有可用的[Inference Providers](https://huggingface.co/blog/inference-providers)。
+`InferenceClientModel` 封装了 huggingface_hub 的 [InferenceClient](https://huggingface.co/docs/huggingface_hub/main/en/guides/inference)，用于执行 LLM。它支持 HF 的 [Inference API](https://huggingface.co/docs/api-inference/index) 以及 Hub 上所有可用的[Inference Providers](https://huggingface.co/blog/inference-providers)。
 
 ```python
-from smolagents import HfApiModel
+from smolagents import InferenceClientModel
 
 messages = [
   {"role": "user", "content": [{"type": "text", "text": "Hello, how are you?"}]}
 ]
 
-model = HfApiModel()
+model = InferenceClientModel()
 print(model(messages))
 ```
 ```text
 >>> Of course! If you change your mind, feel free to reach out. Take care!
 ```
-[[autodoc]] HfApiModel
+[[autodoc]] InferenceClientModel
 
 ### LiteLLMModel
 
@@ -101,7 +85,7 @@ messages = [
   {"role": "user", "content": [{"type": "text", "text": "Hello, how are you?"}]}
 ]
 
-model = LiteLLMModel("anthropic/claude-3-5-sonnet-latest", temperature=0.2, max_tokens=10)
+model = LiteLLMModel(model_id="anthropic/claude-3-5-sonnet-latest", temperature=0.2, max_tokens=10)
 print(model(messages))
 ```
 
diff --git a/docs/source/zh/reference/tools.mdx b/docs/source/zh/reference/tools.mdx
index 86f19dca4..9306eb322 100644
--- a/docs/source/zh/reference/tools.mdx
+++ b/docs/source/zh/reference/tools.mdx
@@ -1,19 +1,3 @@
-
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
 # 工具
 
 <Tip warning={true}>
diff --git a/docs/source/zh/tutorials/building_good_agents.mdx b/docs/source/zh/tutorials/building_good_agents.mdx
index fbf489fae..a70d251ce 100644
--- a/docs/source/zh/tutorials/building_good_agents.mdx
+++ b/docs/source/zh/tutorials/building_good_agents.mdx
@@ -1,18 +1,3 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
 # 构建好用的 agent
 
 [[open-in-colab]]
@@ -120,11 +105,11 @@ def get_weather_api(location: str, date_time: str) -> str:
 除了简单的任务描述字符串外，你还可以使用 `additional_args` 参数传递任何类型的对象：
 
 ```py
-from smolagents import CodeAgent, HfApiModel
+from smolagents import CodeAgent, InferenceClientModel
 
 model_id = "meta-llama/Llama-3.3-70B-Instruct"
 
-agent = CodeAgent(tools=[], model=HfApiModel(model_id=model_id), add_base_tools=True)
+agent = CodeAgent(tools=[], model=InferenceClientModel(model_id=model_id), add_base_tools=True)
 
 agent.run(
     "Why does Mike not know many people in New York?",
@@ -209,13 +194,152 @@ In the end you have to return a final answer using the `final_answer` tool.
 
 Here are a few examples using notional tools:
 ---
-{examples}
+Task: "Generate an image of the oldest person in this document."
 
-Above example were using notional tools that might not exist for you. On top of performing computations in the Python code snippets that you create, you only have access to these tools:
+Thought: I will proceed step by step and use the following tools: `document_qa` to find the oldest person in the document, then `image_generator` to generate an image according to the answer.
+Code:
+```py
+answer = document_qa(document=document, question="Who is the oldest person mentioned?")
+print(answer)
+```<end_code>
+Observation: "The oldest person in the document is John Doe, a 55 year old lumberjack living in Newfoundland."
 
-{{tool_descriptions}}
+Thought: I will now generate an image showcasing the oldest person.
+Code:
+```py
+image = image_generator("A portrait of John Doe, a 55-year-old man living in Canada.")
+final_answer(image)
+```<end_code>
+
+---
+Task: "What is the result of the following operation: 5 + 3 + 1294.678?"
 
-{{managed_agents_descriptions}}
+Thought: I will use python code to compute the result of the operation and then return the final answer using the `final_answer` tool
+Code:
+```py
+result = 5 + 3 + 1294.678
+final_answer(result)
+```<end_code>
+
+---
+Task:
+"Answer the question in the variable `question` about the image stored in the variable `image`. The question is in French.
+You have been provided with these additional arguments, that you can access using the keys as variables in your python code:
+{'question': 'Quel est l'animal sur l'image?', 'image': 'path/to/image.jpg'}"
+
+Thought: I will use the following tools: `translator` to translate the question into English and then `image_qa` to answer the question on the input image.
+Code:
+```py
+translated_question = translator(question=question, src_lang="French", tgt_lang="English")
+print(f"The translated question is {translated_question}.")
+answer = image_qa(image=image, question=translated_question)
+final_answer(f"The answer is {answer}")
+```<end_code>
+
+---
+Task:
+In a 1979 interview, Stanislaus Ulam discusses with Martin Sherwin about other great physicists of his time, including Oppenheimer.
+What does he say was the consequence of Einstein learning too much math on his creativity, in one word?
+
+Thought: I need to find and read the 1979 interview of Stanislaus Ulam with Martin Sherwin.
+Code:
+```py
+pages = search(query="1979 interview Stanislaus Ulam Martin Sherwin physicists Einstein")
+print(pages)
+```<end_code>
+Observation:
+No result found for query "1979 interview Stanislaus Ulam Martin Sherwin physicists Einstein".
+
+Thought: The query was maybe too restrictive and did not find any results. Let's try again with a broader query.
+Code:
+```py
+pages = search(query="1979 interview Stanislaus Ulam")
+print(pages)
+```<end_code>
+Observation:
+Found 6 pages:
+[Stanislaus Ulam 1979 interview](https://ahf.nuclearmuseum.org/voices/oral-histories/stanislaus-ulams-interview-1979/)
+
+[Ulam discusses Manhattan Project](https://ahf.nuclearmuseum.org/manhattan-project/ulam-manhattan-project/)
+
+(truncated)
+
+Thought: I will read the first 2 pages to know more.
+Code:
+```py
+for url in ["https://ahf.nuclearmuseum.org/voices/oral-histories/stanislaus-ulams-interview-1979/", "https://ahf.nuclearmuseum.org/manhattan-project/ulam-manhattan-project/"]:
+    whole_page = visit_webpage(url)
+    print(whole_page)
+    print("\n" + "="*80 + "\n")  # Print separator between pages
+```<end_code>
+Observation:
+Manhattan Project Locations:
+Los Alamos, NM
+Stanislaus Ulam was a Polish-American mathematician. He worked on the Manhattan Project at Los Alamos and later helped design the hydrogen bomb. In this interview, he discusses his work at
+(truncated)
+
+Thought: I now have the final answer: from the webpages visited, Stanislaus Ulam says of Einstein: "He learned too much mathematics and sort of diminished, it seems to me personally, it seems to me his purely physics creativity." Let's answer in one word.
+Code:
+```py
+final_answer("diminished")
+```<end_code>
+
+---
+Task: "Which city has the highest population: Guangzhou or Shanghai?"
+
+Thought: I need to get the populations for both cities and compare them: I will use the tool `search` to get the population of both cities.
+Code:
+```py
+for city in ["Guangzhou", "Shanghai"]:
+    print(f"Population {city}:", search(f"{city} population")
+```<end_code>
+Observation:
+Population Guangzhou: ['Guangzhou has a population of 15 million inhabitants as of 2021.']
+Population Shanghai: '26 million (2019)'
+
+Thought: Now I know that Shanghai has the highest population.
+Code:
+```py
+final_answer("Shanghai")
+```<end_code>
+
+---
+Task: "What is the current age of the pope, raised to the power 0.36?"
+
+Thought: I will use the tool `wiki` to get the age of the pope, and confirm that with a web search.
+Code:
+```py
+pope_age_wiki = wiki(query="current pope age")
+print("Pope age as per wikipedia:", pope_age_wiki)
+pope_age_search = web_search(query="current pope age")
+print("Pope age as per google search:", pope_age_search)
+```<end_code>
+Observation:
+Pope age: "The pope Francis is currently 88 years old."
+
+Thought: I know that the pope is 88 years old. Let's compute the result using python code.
+Code:
+```py
+pope_current_age = 88 ** 0.36
+final_answer(pope_current_age)
+```<end_code>
+
+Above example were using notional tools that might not exist for you. On top of performing computations in the Python code snippets that you create, you only have access to these tools:
+{%- for tool in tools.values() %}
+- {{ tool.name }}: {{ tool.description }}
+    Takes inputs: {{tool.inputs}}
+    Returns an output of type: {{tool.output_type}}
+{%- endfor %}
+
+{%- if managed_agents and managed_agents.values() | list %}
+You can also give tasks to team members.
+Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task', a long string explaining your task.
+Given that this team member is a real human, you should be very verbose in your task.
+Here is a list of the team members that you can call:
+{%- for agent in managed_agents.values() %}
+- {{ agent.name }}: {{ agent.description }}
+{%- endfor %}
+{%- endif %}
 
 Here are the rules you should always follow to solve your task:
 1. Always provide a 'Thought:' sequence, and a 'Code:\n```py' sequence ending with '```<end_code>' sequence, else you will fail.
@@ -224,7 +348,7 @@ Here are the rules you should always follow to solve your task:
 4. Take care to not chain too many sequential tool calls in the same code block, especially when the output format is unpredictable. For instance, a call to search has an unpredictable return format, so do not have another tool call that depends on its output in the same block: rather output results with print() to use them in the next block.
 5. Call a tool only when needed, and never re-do a tool call that you previously did with the exact same parameters.
 6. Don't name any new variable with the same name as a tool: for instance don't name a variable 'final_answer'.
-7. Never create any notional variables in our code, as having these in your logs might derail you from the true variables.
+7. Never create any notional variables in our code, as having these in your logs will derail you from the true variables.
 8. You can use imports in your code, but only from the following list of modules: {{authorized_imports}}
 9. The state persists between code executions: so if in one step you've created variables or imported modules, these will all persist.
 10. Don't give up! You're in charge of solving the task, not providing directions to solve it.
@@ -232,11 +356,29 @@ Here are the rules you should always follow to solve your task:
 Now Begin! If you solve the task correctly, you will receive a reward of $1,000,000.
 ```
 
-如你所见，有一些占位符，如 `"{{tool_descriptions}}"`：这些将在 agent 初始化时用于插入某些自动生成的工具或管理 agent 的描述。
+如你所见，有一些占位符，如 `"{{ tool.description }}"`：这些将在 agent 初始化时用于插入某些自动生成的工具或管理 agent 的描述。
 
 因此，虽然你可以通过将自定义提示作为参数传递给 `system_prompt` 参数来覆盖此系统提示模板，但你的新系统提示必须包含以下占位符：
-- `"{{tool_descriptions}}"` 用于插入工具描述。
-- `"{{managed_agents_description}}"` 用于插入 managed agent 的描述（如果有）。
+- 用于插入工具描述。
+  ```
+  {%- for tool in tools.values() %}
+  - {{ tool.name }}: {{ tool.description }}
+      Takes inputs: {{tool.inputs}}
+      Returns an output of type: {{tool.output_type}}
+  {%- endfor %}
+  ```
+- 用于插入 managed agent 的描述（如果有）。
+  ```
+  {%- if managed_agents and managed_agents.values() | list %}
+  You can also give tasks to team members.
+  Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task', a long string explaining your task.
+  Given that this team member is a real human, you should be very verbose in your task.
+  Here is a list of the team members that you can call:
+  {%- for agent in managed_agents.values() %}
+  - {{ agent.name }}: {{ agent.description }}
+  {%- endfor %}
+  {%- endif %}
+  ```
 - 仅限 `CodeAgent`：`"{{authorized_imports}}"` 用于插入授权导入列表。
 
 然后你可以根据如下，更改系统提示：
@@ -253,7 +395,7 @@ agent.prompt_templates["system_prompt"] = agent.prompt_templates["system_prompt"
 我们提供了一个用于补充规划步骤的模型，agent 可以在正常操作步骤之间定期运行。在此步骤中，没有工具调用，LLM 只是被要求更新它知道的事实列表，并根据这些事实反推它应该采取的下一步。
 
 ```py
-from smolagents import load_tool, CodeAgent, HfApiModel, DuckDuckGoSearchTool
+from smolagents import load_tool, CodeAgent, InferenceClientModel, DuckDuckGoSearchTool
 from dotenv import load_dotenv
 
 load_dotenv()
@@ -265,7 +407,7 @@ search_tool = DuckDuckGoSearchTool()
 
 agent = CodeAgent(
     tools=[search_tool],
-    model=HfApiModel("Qwen/Qwen2.5-72B-Instruct"),
+    model=InferenceClientModel(model_id="Qwen/Qwen2.5-72B-Instruct"),
     planning_interval=3 # 这是你激活规划的地方！
 )
 
diff --git a/docs/source/zh/tutorials/inspect_runs.mdx b/docs/source/zh/tutorials/inspect_runs.mdx
new file mode 100644
index 000000000..ea3eb659b
--- /dev/null
+++ b/docs/source/zh/tutorials/inspect_runs.mdx
@@ -0,0 +1,180 @@
+# 使用 OpenTelemetry 检查运行记录
+
+[[open-in-colab]]
+
+> [!TIP]
+> 如果您是初次构建Agent，建议先阅读 [Agent 入门指南](../conceptual_guides/intro_agents) 和 [smolagents 导览](../guided_tour)。
+
+## 为什么需要记录Agent运行？
+
+调试Agent运行过程具有挑战性。
+
+验证运行是否正常进行很困难，因为Agent的工作流程本身具有 [设计上的不可预测性](../conceptual_guides/intro_agents)（如果可预测，直接使用传统代码即可）。
+
+检查运行记录同样困难：多步骤的Agent往往会快速在控制台生成大量日志，而大多数错误只是"LLM 低级错误"类型的问题，通常LLM会在后续步骤中通过生成更好的代码或工具调用来自我修正。
+
+因此，在生产环境中使用监控工具记录Agent运行过程，对于后续检查和分析至关重要！
+
+我们采用 [OpenTelemetry](https://opentelemetry.io/) 标准来实现Agent运行监控。
+
+这意味着您只需添加少量监控代码，即可在正常运行Agent时自动记录所有信息到监控平台。以下是在不同OpenTelemetry后端实现此功能的示例：
+
+在监控平台上的展示效果如下：
+
+<div class="flex justify-center">
+    <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/inspect_run_phoenix.gif"/>
+</div>
+
+
+## 使用 Arize AI Phoenix 配置遥测
+
+首先安装必要的软件包。这里我们选择安装 [Arize AI 的 Phoenix](https://github.com/Arize-ai/phoenix) 作为日志收集和检查方案，您也可以使用其他兼容 OpenTelemetry 的平台来完成收集与检查工作。
+
+```shell
+pip install 'smolagents[telemetry]'
+```
+
+接着在后台运行日志收集器：
+
+```shell
+python -m phoenix.server.main serve
+```
+
+最后配置 `SmolagentsInstrumentor` 来追踪Agent活动，并将追踪数据发送至 Phoenix 默认端点：
+
+```python
+from phoenix.otel import register
+from openinference.instrumentation.smolagents import SmolagentsInstrumentor
+
+register()
+SmolagentsInstrumentor().instrument()
+```
+
+完成上述配置后，即可正常运行您的Agent！
+
+```py
+from smolagents import (
+    CodeAgent,
+    ToolCallingAgent,
+    DuckDuckGoSearchTool,
+    VisitWebpageTool,
+    InferenceClientModel,
+)
+
+model = InferenceClientModel()
+
+search_agent = ToolCallingAgent(
+    tools=[DuckDuckGoSearchTool(), VisitWebpageTool()],
+    model=model,
+    name="search_agent",
+    description="This is an agent that can do web search.",
+)
+
+manager_agent = CodeAgent(
+    tools=[],
+    model=model,
+    managed_agents=[search_agent],
+)
+manager_agent.run(
+    "If the US keeps its 2024 growth rate, how many years will it take for the GDP to double?"
+)
+```
+Voilà!
+
+此时访问 `http://0.0.0.0:6006/projects/` 即可查看运行记录：
+
+<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/inspect_run_phoenix.png">
+
+如图所示，CodeAgent 调用了其托管的 ToolCallingAgent（注：托管Agent也可以是另一个 CodeAgent）执行美国2024年经济增长率的网络搜索。托管Agent返回报告后，管理Agent根据结果计算出经济翻倍周期！是不是很智能？
+
+## 使用 Langfuse 配置遥测
+
+本部分演示如何通过 `SmolagentsInstrumentor` 使用 **Langfuse** 监控和调试 Hugging Face **smolagents**。
+
+> **Langfuse 是什么？** [Langfuse](https://langfuse.com) 是面向LLM工程的开源平台，提供AI Agent的追踪与监控功能，帮助开发者调试、分析和优化产品。该平台通过原生集成、OpenTelemetry 和 SDKs 与各类工具框架对接。
+
+### 步骤 1: 安装依赖
+
+```python
+%pip install smolagents
+%pip install opentelemetry-sdk opentelemetry-exporter-otlp openinference-instrumentation-smolagents
+```
+
+### 步骤 2: 配置环境变量
+
+设置 Langfuse API 密钥，并配置 OpenTelemetry 端点将追踪数据发送至 Langfuse。通过注册 [Langfuse Cloud](https://cloud.langfuse.com) 或 [自托管 Langfuse](https://langfuse.com/self-hosting) 获取 API 密钥。
+
+同时需添加 [Hugging Face 令牌](https://huggingface.co/settings/tokens) (`HF_TOKEN`) 作为环境变量：
+```python
+import os
+import base64
+
+LANGFUSE_PUBLIC_KEY="pk-lf-..."
+LANGFUSE_SECRET_KEY="sk-lf-..."
+LANGFUSE_AUTH=base64.b64encode(f"{LANGFUSE_PUBLIC_KEY}:{LANGFUSE_SECRET_KEY}".encode()).decode()
+
+os.environ["OTEL_EXPORTER_OTLP_ENDPOINT"] = "https://cloud.langfuse.com/api/public/otel" # EU data region
+# os.environ["OTEL_EXPORTER_OTLP_ENDPOINT"] = "https://us.cloud.langfuse.com/api/public/otel" # US data region
+os.environ["OTEL_EXPORTER_OTLP_HEADERS"] = f"Authorization=Basic {LANGFUSE_AUTH}"
+
+# your Hugging Face token
+os.environ["HF_TOKEN"] = "hf_..."
+```
+
+### 步骤 3: 初始化 `SmolagentsInstrumentor`
+
+在应用程序代码执行前初始化 `SmolagentsInstrumentor`。配置 `tracer_provider` 并添加 span processor 将追踪数据导出至 Langfuse。`OTLPSpanExporter()` 会自动使用环境变量中配置的端点和请求头。
+
+
+```python
+from opentelemetry.sdk.trace import TracerProvider
+
+from openinference.instrumentation.smolagents import SmolagentsInstrumentor
+from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
+from opentelemetry.sdk.trace.export import SimpleSpanProcessor
+
+trace_provider = TracerProvider()
+trace_provider.add_span_processor(SimpleSpanProcessor(OTLPSpanExporter()))
+
+SmolagentsInstrumentor().instrument(tracer_provider=trace_provider)
+```
+
+### 步骤 4: 运行 smolagent
+
+```python
+from smolagents import (
+    CodeAgent,
+    ToolCallingAgent,
+    DuckDuckGoSearchTool,
+    VisitWebpageTool,
+    InferenceClientModel,
+)
+
+model = InferenceClientModel(
+    model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
+)
+
+search_agent = ToolCallingAgent(
+    tools=[DuckDuckGoSearchTool(), VisitWebpageTool()],
+    model=model,
+    name="search_agent",
+    description="This is an agent that can do web search.",
+)
+
+manager_agent = CodeAgent(
+    tools=[],
+    model=model,
+    managed_agents=[search_agent],
+)
+manager_agent.run(
+    "How can Langfuse be used to monitor and improve the reasoning and decision-making of smolagents when they execute multi-step tasks, like dynamically adjusting a recipe based on user feedback or available ingredients?"
+)
+```
+
+### 步骤 5: 在 Langfuse 中查看追踪记录
+
+运行Agent后，您可以在 [Langfuse](https://cloud.langfuse.com) 平台查看 smolagents 应用生成的追踪记录。这些记录会详细展示LLM的交互步骤，帮助您调试和优化AI代理。
+
+![smolagents 追踪示例](https://langfuse.com/images/cookbook/integration-smolagents/smolagent_example_trace.png)
+
+_[Langfuse 公开示例追踪](https://cloud.langfuse.com/project/cloramnkj0002jz088vzn1ja4/traces/ce5160f9bfd5a6cd63b07d2bfcec6f54?timestamp=2025-02-11T09%3A25%3A45.163Z&display=details)_
\ No newline at end of file
diff --git a/docs/source/zh/tutorials/memory.mdx b/docs/source/zh/tutorials/memory.mdx
new file mode 100644
index 000000000..de2bdc8c3
--- /dev/null
+++ b/docs/source/zh/tutorials/memory.mdx
@@ -0,0 +1,131 @@
+# 📚 管理Agent的记忆
+
+[[open-in-colab]]
+
+归根结底，Agent可以定义为由几个简单组件构成：它拥有工具、提示词。最重要的是，它具备对过往步骤的记忆，能够追溯完整的规划、执行和错误历史。
+
+### 回放Agent的记忆
+
+我们提供了多项功能来审查Agent的过往运行记录。
+
+您可以通过插装（instrumentation）在可视化界面中查看Agent的运行过程，该界面支持对特定步骤进行缩放操作，具体方法参见[插装指南](./inspect_runs)。
+
+您也可以使用`agent.replay()`方法实现回放：
+
+当Agent完成运行后：
+```py
+from smolagents import InferenceClientModel, CodeAgent
+
+agent = CodeAgent(tools=[], model=InferenceClientModel(), verbosity_level=0)
+
+result = agent.run("What's the 20th Fibonacci number?")
+```
+
+若要回放最近一次运行，只需使用：
+```py
+agent.replay()
+```
+
+### 动态修改Agent的记忆
+
+许多高级应用场景需要对Agent的记忆进行动态修改。
+
+您可以通过以下方式访问Agent的记忆：
+
+```py
+from smolagents import ActionStep
+
+system_prompt_step = agent.memory.system_prompt
+print("The system prompt given to the agent was:")
+print(system_prompt_step.system_prompt)
+
+task_step = agent.memory.steps[0]
+print("\n\nThe first task step was:")
+print(task_step.task)
+
+for step in agent.memory.steps:
+    if isinstance(step, ActionStep):
+        if step.error is not None:
+            print(f"\nStep {step.step_number} got this error:\n{step.error}\n")
+        else:
+            print(f"\nStep {step.step_number} got these observations:\n{step.observations}\n")
+```
+
+使用`agent.memory.get_full_steps()`可获取完整步骤字典数据。
+
+您还可以通过步骤回调（step callbacks）实现记忆的动态修改。
+
+步骤回调函数可通过参数直接访问`agent`对象，因此能够访问所有记忆步骤并根据需要进行修改。例如，假设您正在监控网页浏览Agent每个步骤的屏幕截图，希望保留最新截图同时删除旧步骤的图片以节省token消耗。
+
+可参考以下代码示例：
+_注：此代码片段不完整，部分导入语句和对象定义已精简，完整代码请访问[原始脚本](https://github.com/huggingface/smolagents/blob/main/src/smolagents/vision_web_browser.py)_
+
+```py
+import helium
+from PIL import Image
+from io import BytesIO
+from time import sleep
+
+def update_screenshot(memory_step: ActionStep, agent: CodeAgent) -> None:
+    sleep(1.0)  # Let JavaScript animations happen before taking the screenshot
+    driver = helium.get_driver()
+    latest_step = memory_step.step_number
+    for previous_memory_step in agent.memory.steps:  # Remove previous screenshots from logs for lean processing
+        if isinstance(previous_memory_step, ActionStep) and previous_memory_step.step_number <= latest_step - 2:
+            previous_memory_step.observations_images = None
+    png_bytes = driver.get_screenshot_as_png()
+    image = Image.open(BytesIO(png_bytes))
+    memory_step.observations_images = [image.copy()]
+```
+
+最后在初始化Agent时，将此函数传入`step_callbacks`参数：
+
+```py
+CodeAgent(
+    tools=[DuckDuckGoSearchTool(), go_back, close_popups, search_item_ctrl_f],
+    model=model,
+    additional_authorized_imports=["helium"],
+    step_callbacks=[update_screenshot],
+    max_steps=20,
+    verbosity_level=2,
+)
+```
+
+请访问我们的 [vision web browser code](https://github.com/huggingface/smolagents/blob/main/src/smolagents/vision_web_browser.py) 查看完整可运行示例。
+
+### 分步运行 Agents
+
+当您需要处理耗时数天的工具调用时，这种方式特别有用：您可以逐步执行Agents。这还允许您在每一步更新记忆。
+
+```py
+from smolagents import InferenceClientModel, CodeAgent, ActionStep, TaskStep
+
+agent = CodeAgent(tools=[], model=InferenceClientModel(), verbosity_level=1)
+print(agent.memory.system_prompt)
+
+task = "What is the 20th Fibonacci number?"
+
+# You could modify the memory as needed here by inputting the memory of another agent.
+# agent.memory.steps = previous_agent.memory.steps
+
+# Let's start a new task!
+agent.memory.steps.append(TaskStep(task=task, task_images=[]))
+
+final_answer = None
+step_number = 1
+while final_answer is None and step_number <= 10:
+    memory_step = ActionStep(
+        step_number=step_number,
+        observations_images=[],
+    )
+    # Run one step.
+    final_answer = agent.step(memory_step)
+    agent.memory.steps.append(memory_step)
+    step_number += 1
+
+    # Change the memory as you please!
+    # For instance to update the latest step:
+    # agent.memory.steps[-1] = ...
+
+print("The final answer is:", final_answer)
+```
\ No newline at end of file
diff --git a/docs/source/zh/tutorials/secure_code_execution.mdx b/docs/source/zh/tutorials/secure_code_execution.mdx
index 6017aefb9..93e80986a 100644
--- a/docs/source/zh/tutorials/secure_code_execution.mdx
+++ b/docs/source/zh/tutorials/secure_code_execution.mdx
@@ -1,18 +1,3 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
 # 安全代码执行
 
 [[open-in-colab]]
@@ -41,7 +26,7 @@ rendered properly in your Markdown viewer.
 ### 本地 Python 解释器
 
 默认情况下，`CodeAgent` 会在你的环境中运行 LLM 生成的代码。
-这个执行不是由普通的 Python 解释器完成的：我们从零开始重新构建了一个更安全的 `LocalPythonInterpreter`。
+这个执行不是由普通的 Python 解释器完成的：我们从零开始重新构建了一个更安全的 `LocalPythonExecutor`。
 这个解释器通过以下方式设计以确保安全：
   - 将导入限制为用户显式传递的列表
   - 限制操作次数以防止无限循环和资源膨胀
@@ -64,16 +49,16 @@ rendered properly in your Markdown viewer.
 
 现在你已经准备好了！
 
-要将代码执行器设置为 E2B，只需在初始化 `CodeAgent` 时传递标志 `use_e2b_executor=True`。
+要将代码执行器设置为 E2B，只需在初始化 `CodeAgent` 时传递标志 `executor_type="e2b"`。
 请注意，你应该将所有工具的依赖项添加到 `additional_authorized_imports` 中，以便执行器安装它们。
 
 ```py
-from smolagents import CodeAgent, VisitWebpageTool, HfApiModel
+from smolagents import CodeAgent, VisitWebpageTool, InferenceClientModel
 agent = CodeAgent(
     tools = [VisitWebpageTool()],
-    model=HfApiModel(),
+    model=InferenceClientModel(),
     additional_authorized_imports=["requests", "markdownify"],
-    use_e2b_executor=True
+    executor_type="e2b"
 )
 
 agent.run("What was Abraham Lincoln's preferred pet?")
diff --git a/docs/source/zh/tutorials/tools.mdx b/docs/source/zh/tutorials/tools.mdx
index e62f6b660..9256bd0a3 100644
--- a/docs/source/zh/tutorials/tools.mdx
+++ b/docs/source/zh/tutorials/tools.mdx
@@ -1,18 +1,3 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
 # 工具
 
 [[open-in-colab]]
@@ -133,9 +118,9 @@ image_generation_tool("A sunny beach")
 然后你可以像使用任何其他工具一样使用这个工具。例如，让我们改进提示 `A rabbit wearing a space suit` 并生成它的图片。
 
 ```python
-from smolagents import CodeAgent, HfApiModel
+from smolagents import CodeAgent, InferenceClientModel
 
-model = HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct")
+model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct")
 agent = CodeAgent(tools=[image_generation_tool], model=model)
 
 agent.run(
@@ -181,9 +166,9 @@ agent.run("How many more blocks (also denoted as layers) are in BERT base encode
 让我们将 `model_download_tool` 添加到一个仅使用默认工具箱初始化的现有 agent 中。
 
 ```python
-from smolagents import HfApiModel
+from smolagents import InferenceClientModel
 
-model = HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct")
+model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct")
 
 agent = CodeAgent(tools=[], model=model, add_base_tools=True)
 agent.tools[model_download_tool.name] = model_download_tool
diff --git a/e2b.Dockerfile b/e2b.Dockerfile
deleted file mode 100644
index cd6dd29c8..000000000
--- a/e2b.Dockerfile
+++ /dev/null
@@ -1,5 +0,0 @@
-# You can use most Debian-based base images
-FROM e2bdev/code-interpreter:latest 
-
-# Install dependencies and customize sandbox
-RUN pip install git+https://github.com/huggingface/smolagents.git
\ No newline at end of file
diff --git a/examples/agent_from_any_llm.py b/examples/agent_from_any_llm.py
index 86f45effb..bc421274c 100644
--- a/examples/agent_from_any_llm.py
+++ b/examples/agent_from_any_llm.py
@@ -1,18 +1,19 @@
-from typing import Optional
-
-from smolagents import HfApiModel, LiteLLMModel, TransformersModel, tool
+from smolagents import InferenceClientModel, LiteLLMModel, OpenAIServerModel, TransformersModel, tool
 from smolagents.agents import CodeAgent, ToolCallingAgent
 
 
 # Choose which inference type to use!
 
-available_inferences = ["hf_api", "transformers", "ollama", "litellm"]
-chosen_inference = "transformers"
+available_inferences = ["hf_api", "hf_api_provider", "transformers", "ollama", "litellm", "openai"]
+chosen_inference = "hf_api_provider"
 
 print(f"Chose model: '{chosen_inference}'")
 
 if chosen_inference == "hf_api":
-    model = HfApiModel(model_id="meta-llama/Llama-3.3-70B-Instruct")
+    model = InferenceClientModel(model_id="meta-llama/Llama-3.3-70B-Instruct")
+
+elif chosen_inference == "hf_api_provider":
+    model = InferenceClientModel(provider="together")
 
 elif chosen_inference == "transformers":
     model = TransformersModel(model_id="HuggingFaceTB/SmolLM2-1.7B-Instruct", device_map="auto", max_new_tokens=1000)
@@ -29,9 +30,13 @@
     # For anthropic: change model_id below to 'anthropic/claude-3-5-sonnet-latest'
     model = LiteLLMModel(model_id="gpt-4o")
 
+elif chosen_inference == "openai":
+    # For anthropic: change model_id below to 'anthropic/claude-3-5-sonnet-latest'
+    model = OpenAIServerModel(model_id="gpt-4o")
+
 
 @tool
-def get_weather(location: str, celsius: Optional[bool] = False) -> str:
+def get_weather(location: str, celsius: bool | None = False) -> str:
     """
     Get weather in the next days at given location.
     Secretly this tool does not care about the location, it hates the weather everywhere.
@@ -43,10 +48,10 @@ def get_weather(location: str, celsius: Optional[bool] = False) -> str:
     return "The weather is UNGODLY with torrential rains and temperatures below -10°C"
 
 
-agent = ToolCallingAgent(tools=[get_weather], model=model)
+agent = ToolCallingAgent(tools=[get_weather], model=model, verbosity_level=2)
 
 print("ToolCallingAgent:", agent.run("What's the weather like in Paris?"))
 
-agent = CodeAgent(tools=[get_weather], model=model)
+agent = CodeAgent(tools=[get_weather], model=model, verbosity_level=2)
 
 print("CodeAgent:", agent.run("What's the weather like in Paris?"))
diff --git a/examples/benchmark.ipynb b/examples/benchmark.ipynb
deleted file mode 100644
index 79f0ae0a1..000000000
--- a/examples/benchmark.ipynb
+++ /dev/null
@@ -1,1195 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
-     ]
-    }
-   ],
-   "source": [
-    "!pip install -e .. datasets sympy numpy matplotlib seaborn -q  # Install dev version of smolagents + some packages"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Constants and utilities/tools"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Benchmark date\n",
-    "# - set a concrete date:\n",
-    "DATE = \"2024-12-26\"\n",
-    "# - or use default: today\n",
-    "# DATE = None\n",
-    "\n",
-    "# Evaluation dataset\n",
-    "# - the dataset is gated, so you must first visit its page to request access: https://huggingface.co/datasets/smolagents-benchmark/benchmark-v1\n",
-    "EVAL_DATASET = \"smolagents-benchmark/benchmark-v1\"\n",
-    "\n",
-    "# Answers dataset: it must be a gated dataset; required to score the answers\n",
-    "ANSWERS_DATASET = \"smolagents-benchmark/answers\"\n",
-    "# Whether to push the answers dataset to the Hub\n",
-    "PUSH_ANSWERS_DATASET_TO_HUB = True\n",
-    "\n",
-    "# Results dataset\n",
-    "RESULTS_DATASET = \"smolagents-benchmark/results\"\n",
-    "# Whether to push the results dataset to the Hub\n",
-    "PUSH_RESULTS_DATASET_TO_HUB = True\n",
-    "\n",
-    "\n",
-    "import datetime\n",
-    "import json\n",
-    "import os\n",
-    "import re\n",
-    "import string\n",
-    "import time\n",
-    "import warnings\n",
-    "from typing import List\n",
-    "\n",
-    "import datasets\n",
-    "from dotenv import load_dotenv\n",
-    "from tqdm import tqdm\n",
-    "\n",
-    "from smolagents import (\n",
-    "    AgentError,\n",
-    "    CodeAgent,\n",
-    "    GoogleSearchTool,\n",
-    "    HfApiModel,\n",
-    "    PythonInterpreterTool,\n",
-    "    ToolCallingAgent,\n",
-    "    VisitWebpageTool,\n",
-    ")\n",
-    "from smolagents.agents import ActionStep\n",
-    "\n",
-    "\n",
-    "load_dotenv()\n",
-    "os.makedirs(\"output\", exist_ok=True)\n",
-    "\n",
-    "\n",
-    "def serialize_agent_error(obj):\n",
-    "    if isinstance(obj, AgentError):\n",
-    "        return {\"error_type\": obj.__class__.__name__, \"message\": obj.message}\n",
-    "    else:\n",
-    "        return str(obj)\n",
-    "\n",
-    "\n",
-    "def answer_questions(\n",
-    "    eval_ds,\n",
-    "    agent,\n",
-    "    model_id,\n",
-    "    action_type,\n",
-    "    is_vanilla_llm=False,\n",
-    "    date=DATE,\n",
-    "    output_dir=\"output\",\n",
-    "    push_to_hub_dataset=ANSWERS_DATASET if PUSH_ANSWERS_DATASET_TO_HUB else None,\n",
-    "):\n",
-    "    date = date or datetime.date.today().isoformat()\n",
-    "\n",
-    "    for task in eval_ds:\n",
-    "        file_name = f\"output/{model_id.replace('/', '__')}__{action_type}__{task}__{date}.jsonl\"\n",
-    "        answered_questions = []\n",
-    "        if os.path.exists(file_name):\n",
-    "            with open(file_name, \"r\") as f:\n",
-    "                for line in f:\n",
-    "                    answered_questions.append(json.loads(line)[\"question\"])\n",
-    "\n",
-    "        for _, example in tqdm(enumerate(eval_ds[task]), total=len(eval_ds[task])):\n",
-    "            try:\n",
-    "                question = example[\"question\"]\n",
-    "                if example[\"source\"] == \"SimpleQA\":\n",
-    "                    question += \" Answer with only the final number.\"\n",
-    "                if example[\"source\"] == \"MATH\":\n",
-    "                    question += \" Write code, not latex.\"\n",
-    "                if question in answered_questions:\n",
-    "                    continue\n",
-    "                start_time = time.time()\n",
-    "\n",
-    "                if is_vanilla_llm:\n",
-    "                    llm = agent\n",
-    "                    answer = str(llm([{\"role\": \"user\", \"content\": question}]).content)\n",
-    "                    token_count = {\n",
-    "                        \"input\": llm.last_input_token_count,\n",
-    "                        \"output\": llm.last_output_token_count,\n",
-    "                    }\n",
-    "                    intermediate_steps = str([])\n",
-    "                else:\n",
-    "                    answer = str(agent.run(question))\n",
-    "                    token_count = agent.monitor.get_total_token_counts()\n",
-    "                    intermediate_steps = str(agent.logs)\n",
-    "                    # Remove memory from logs to make them more compact.\n",
-    "                    for step in agent.logs:\n",
-    "                        if isinstance(step, ActionStep):\n",
-    "                            step.agent_memory = None\n",
-    "\n",
-    "                end_time = time.time()\n",
-    "                annotated_example = {\n",
-    "                    \"model_id\": model_id,\n",
-    "                    \"agent_action_type\": action_type,\n",
-    "                    \"question\": question,\n",
-    "                    \"answer\": answer,\n",
-    "                    \"true_answer\": example[\"true_answer\"],\n",
-    "                    \"source\": example[\"source\"],\n",
-    "                    \"intermediate_steps\": intermediate_steps,\n",
-    "                    \"start_time\": start_time,\n",
-    "                    \"end_time\": end_time,\n",
-    "                    \"token_counts\": token_count,\n",
-    "                }\n",
-    "\n",
-    "                with open(file_name, \"a\") as f:\n",
-    "                    json.dump(annotated_example, f, default=serialize_agent_error)\n",
-    "                    f.write(\"\\n\")  # add a newline for JSONL format\n",
-    "            except Exception as e:\n",
-    "                print(\"Failed:\", e)\n",
-    "\n",
-    "        if push_to_hub_dataset:\n",
-    "            ds = datasets.Dataset.from_pandas(pd.read_json(file_name, lines=True), split=\"test\", preserve_index=False)\n",
-    "            config = f\"{model_id.replace('/', '__')}__{action_type}__{task}\"\n",
-    "            data_dir = f\"{model_id}/{action_type}/{task}/{date}\"\n",
-    "            ds.push_to_hub(\n",
-    "                push_to_hub_dataset,\n",
-    "                config_name=config,\n",
-    "                data_dir=data_dir,\n",
-    "                split=\"test\",\n",
-    "                commit_message=f\"Upload {config}\",\n",
-    "            )\n",
-    "\n",
-    "\n",
-    "def normalize_number_str(number_str: str) -> float:\n",
-    "    # we replace these common units and commas to allow\n",
-    "    # conversion to float\n",
-    "    for char in [\"$\", \"%\", \",\"]:\n",
-    "        number_str = number_str.replace(char, \"\")\n",
-    "    try:\n",
-    "        return float(number_str)\n",
-    "    except ValueError:\n",
-    "        return float(\"inf\")\n",
-    "\n",
-    "\n",
-    "def split_string(\n",
-    "    s: str,\n",
-    "    char_list: list[str] = [\",\", \";\"],\n",
-    ") -> list[str]:\n",
-    "    pattern = f\"[{''.join(char_list)}]\"\n",
-    "    return re.split(pattern, s)\n",
-    "\n",
-    "\n",
-    "def is_float(element: any) -> bool:\n",
-    "    try:\n",
-    "        float(element)\n",
-    "        return True\n",
-    "    except ValueError:\n",
-    "        return False\n",
-    "\n",
-    "\n",
-    "def normalize_str(input_str, remove_punct=True) -> str:\n",
-    "    \"\"\"\n",
-    "    Normalize a string by:\n",
-    "    - Removing all white spaces\n",
-    "    - Optionally removing punctuation (if remove_punct is True)\n",
-    "    - Converting to lowercase\n",
-    "    Parameters:\n",
-    "    - input_str: str, the string to normalize\n",
-    "    - remove_punct: bool, whether to remove punctuation (default: True)\n",
-    "    Returns:\n",
-    "    - str, the normalized string\n",
-    "    \"\"\"\n",
-    "    # Remove all white spaces. Required e.g for seagull vs. sea gull\n",
-    "    no_spaces = re.sub(r\"\\s\", \"\", input_str)\n",
-    "\n",
-    "    # Remove punctuation, if specified.\n",
-    "    if remove_punct:\n",
-    "        translator = str.maketrans(\"\", \"\", string.punctuation)\n",
-    "        return no_spaces.lower().translate(translator)\n",
-    "    else:\n",
-    "        return no_spaces.lower()\n",
-    "\n",
-    "\n",
-    "def extract_numbers(text: str) -> List[str]:\n",
-    "    \"\"\"This pattern matches:\n",
-    "    - Optional negative sign\n",
-    "    - Numbers with optional comma thousand separators\n",
-    "    - Optional decimal points with decimal numbers\n",
-    "    \"\"\"\n",
-    "    pattern = r\"-?(?:\\d{1,3}(?:,\\d{3})+|\\d+)(?:\\.\\d+)?\"\n",
-    "\n",
-    "    return [el.replace(\",\", \"\") for el in re.findall(pattern, text)]\n",
-    "\n",
-    "\n",
-    "def get_question_score_gaia(\n",
-    "    model_answer: str,\n",
-    "    ground_truth: str,\n",
-    ") -> bool:\n",
-    "    \"\"\"Scoring function used to score functions from the GAIA benchmark\"\"\"\n",
-    "    if is_float(ground_truth):\n",
-    "        normalized_answer = normalize_number_str(str(model_answer))\n",
-    "        return normalized_answer == float(ground_truth)\n",
-    "\n",
-    "    elif any(char in ground_truth for char in [\",\", \";\"]):  # if gt is a list\n",
-    "        # question with the fish: normalization removes punct\n",
-    "        gt_elems = split_string(ground_truth)\n",
-    "        ma_elems = split_string(model_answer)\n",
-    "\n",
-    "        if len(gt_elems) != len(ma_elems):  # check length is the same\n",
-    "            warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n",
-    "            return False\n",
-    "\n",
-    "        comparisons = []\n",
-    "        for ma_elem, gt_elem in zip(ma_elems, gt_elems):  # compare each element as float or str\n",
-    "            if is_float(gt_elem):\n",
-    "                normalized_ma_elem = normalize_number_str(ma_elem)\n",
-    "                comparisons.append(normalized_ma_elem == float(gt_elem))\n",
-    "            else:\n",
-    "                # we do not remove punct since comparisons can include punct\n",
-    "                comparisons.append(\n",
-    "                    normalize_str(ma_elem, remove_punct=False) == normalize_str(gt_elem, remove_punct=False)\n",
-    "                )\n",
-    "        return all(comparisons)\n",
-    "\n",
-    "    else:  # if gt is a str\n",
-    "        return normalize_str(model_answer) == normalize_str(ground_truth)\n",
-    "\n",
-    "\n",
-    "def get_correct(row):\n",
-    "    if row[\"source\"] == \"MATH\":  # Checks the last number in answer\n",
-    "        numbers_answer = extract_numbers(str(row[\"answer\"]))\n",
-    "        if len(numbers_answer) == 0:\n",
-    "            return False\n",
-    "        return float(numbers_answer[-1]) == float(row[\"true_answer\"])\n",
-    "    else:\n",
-    "        return get_question_score_gaia(str(row[\"answer\"]), str(row[\"true_answer\"]))\n",
-    "\n",
-    "\n",
-    "def score_answers(\n",
-    "    answers_subsets,\n",
-    "    answers_dataset=ANSWERS_DATASET,\n",
-    "    date=DATE,\n",
-    "    push_to_hub_dataset=RESULTS_DATASET if PUSH_RESULTS_DATASET_TO_HUB else None,\n",
-    "    set_default=True,\n",
-    "):\n",
-    "    if not answers_dataset:\n",
-    "        raise ValueError(\"Pass 'answers_dataset' to load the answers from it\")\n",
-    "    date = date or datetime.date.today().isoformat()\n",
-    "    results = []\n",
-    "    for answers_subset in answers_subsets:\n",
-    "        *model_id, action_type, task = answers_subset.split(\"__\")\n",
-    "        model_id = \"/\".join(model_id)\n",
-    "        ds = datasets.load_dataset(answers_dataset, answers_subset, split=\"test\")\n",
-    "        df = ds.to_pandas()\n",
-    "        df[\"correct\"] = df.apply(get_correct, axis=1)\n",
-    "        acc = df[\"correct\"].mean().item()\n",
-    "        result = df.loc[0, [\"model_id\", \"agent_action_type\", \"source\"]].to_dict()\n",
-    "        result[\"acc\"] = acc\n",
-    "        results.append(result)\n",
-    "    df = pd.DataFrame(results)\n",
-    "\n",
-    "    if push_to_hub_dataset:\n",
-    "        ds = datasets.Dataset.from_pandas(df)\n",
-    "        config = date\n",
-    "        set_default = set_default\n",
-    "        ds.push_to_hub(\n",
-    "            push_to_hub_dataset, config_name=config, set_default=set_default, commit_message=f\"Upload {config} results\"\n",
-    "        )\n",
-    "    return df"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Evaluation dataset"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "['gaia', 'math', 'simpleqa']\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>question</th>\n",
-       "      <th>source</th>\n",
-       "      <th>true_answer</th>\n",
-       "      <th>true_reasoning</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>What year was the municipality of Ramiriquí, B...</td>\n",
-       "      <td>SimpleQA</td>\n",
-       "      <td>1541</td>\n",
-       "      <td>['https://en.wikipedia.org/wiki/Ramiriqu%C3%AD...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>In what year did Hjalmar Hvam invent a mechani...</td>\n",
-       "      <td>SimpleQA</td>\n",
-       "      <td>1937</td>\n",
-       "      <td>['https://www.kgw.com/article/features/portlan...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>In which year did Fayaz A. Malik (an Indian ph...</td>\n",
-       "      <td>SimpleQA</td>\n",
-       "      <td>2009</td>\n",
-       "      <td>['https://en.wikipedia.org/wiki/Fayaz_A._Malik...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>In which year was John B. Goodenough elected a...</td>\n",
-       "      <td>SimpleQA</td>\n",
-       "      <td>2010</td>\n",
-       "      <td>['https://en.wikipedia.org/wiki/John_B._Gooden...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>In which year did Atul Gawande earn an M.A. in...</td>\n",
-       "      <td>SimpleQA</td>\n",
-       "      <td>1989</td>\n",
-       "      <td>['https://en.wikipedia.org/wiki/Atul_Gawande',...</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                            question    source true_answer  \\\n",
-       "0  What year was the municipality of Ramiriquí, B...  SimpleQA        1541   \n",
-       "1  In what year did Hjalmar Hvam invent a mechani...  SimpleQA        1937   \n",
-       "2  In which year did Fayaz A. Malik (an Indian ph...  SimpleQA        2009   \n",
-       "3  In which year was John B. Goodenough elected a...  SimpleQA        2010   \n",
-       "4  In which year did Atul Gawande earn an M.A. in...  SimpleQA        1989   \n",
-       "\n",
-       "                                      true_reasoning  \n",
-       "0  ['https://en.wikipedia.org/wiki/Ramiriqu%C3%AD...  \n",
-       "1  ['https://www.kgw.com/article/features/portlan...  \n",
-       "2  ['https://en.wikipedia.org/wiki/Fayaz_A._Malik...  \n",
-       "3  ['https://en.wikipedia.org/wiki/John_B._Gooden...  \n",
-       "4  ['https://en.wikipedia.org/wiki/Atul_Gawande',...  "
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import pandas as pd\n",
-    "\n",
-    "\n",
-    "# Choose the tasks to evaluate on:\n",
-    "# tasks = [\"gaia\"]\n",
-    "# or evaluate on all tasks: [\"gaia\", \"math\", \"simpleqa\"]\n",
-    "tasks = datasets.get_dataset_config_names(EVAL_DATASET)\n",
-    "print(tasks)\n",
-    "\n",
-    "\n",
-    "eval_ds = {task: datasets.load_dataset(EVAL_DATASET, task, split=\"test\") for task in tasks}\n",
-    "pd.DataFrame(eval_ds[\"simpleqa\"]).head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Benchmark agents\n",
-    "\n",
-    "### Open models"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "open_model_ids = [\n",
-    "    \"meta-llama/Llama-3.3-70B-Instruct\",\n",
-    "    # \"Qwen/QwQ-32B-Preview\",\n",
-    "    \"Qwen/Qwen2.5-72B-Instruct\",\n",
-    "    \"Qwen/Qwen2.5-Coder-32B-Instruct\",\n",
-    "    \"meta-llama/Llama-3.2-3B-Instruct\",\n",
-    "    \"meta-llama/Llama-3.1-8B-Instruct\",\n",
-    "    \"mistralai/Mistral-Nemo-Instruct-2407\",\n",
-    "    # \"HuggingFaceTB/SmolLM2-1.7B-Instruct\",\n",
-    "    # \"meta-llama/Llama-3.1-70B-Instruct\",\n",
-    "]\n",
-    "\n",
-    "\n",
-    "for model_id in open_model_ids:\n",
-    "    print(f\"Evaluating '{model_id}'...\")\n",
-    "    # action_type = \"tool-calling\"\n",
-    "    # agent = ToolCallingAgent(\n",
-    "    #     tools=[GoogleSearchTool(), VisitWebpageTool(), PythonInterpreterTool()],\n",
-    "    #     model=HfApiModel(model_id),\n",
-    "    #     max_steps=10,\n",
-    "    # )\n",
-    "    # answer_questions(eval_ds, agent, model_id, action_type)\n",
-    "\n",
-    "    action_type = \"code\"\n",
-    "    agent = CodeAgent(\n",
-    "        tools=[GoogleSearchTool(), VisitWebpageTool()],\n",
-    "        model=HfApiModel(model_id),\n",
-    "        additional_authorized_imports=[\"numpy\", \"sympy\"],\n",
-    "        max_steps=10,\n",
-    "    )\n",
-    "    answer_questions(eval_ds, agent, model_id, action_type)\n",
-    "\n",
-    "    # Also evaluate vanilla model\n",
-    "    action_type = \"vanilla\"\n",
-    "    llm = HfApiModel(model_id)\n",
-    "    answer_questions(eval_ds, llm, model_id, action_type, is_vanilla_llm=True)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Closed models"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from smolagents import LiteLLMModel\n",
-    "\n",
-    "\n",
-    "litellm_model_ids = [\"gpt-4o\", \"anthropic/claude-3-5-sonnet-latest\"]\n",
-    "\n",
-    "\n",
-    "for model_id in litellm_model_ids:\n",
-    "    print(f\"Evaluating '{model_id}'...\")\n",
-    "    action_type = \"tool-calling\"\n",
-    "    agent = ToolCallingAgent(\n",
-    "        tools=[\n",
-    "            GoogleSearchTool(),\n",
-    "            VisitWebpageTool(),\n",
-    "            PythonInterpreterTool([\"numpy\", \"sympy\"]),\n",
-    "        ],\n",
-    "        model=LiteLLMModel(model_id),\n",
-    "        max_steps=10,\n",
-    "    )\n",
-    "    answer_questions(eval_ds, agent, model_id, action_type)\n",
-    "\n",
-    "    action_type = \"code\"\n",
-    "    agent = CodeAgent(\n",
-    "        tools=[GoogleSearchTool(), VisitWebpageTool()],\n",
-    "        model=LiteLLMModel(model_id),\n",
-    "        additional_authorized_imports=[\"numpy\", \"sympy\"],\n",
-    "        max_steps=10,\n",
-    "    )\n",
-    "    answer_questions(eval_ds, agent, model_id, action_type)\n",
-    "\n",
-    "    # Also evaluate vanilla model\n",
-    "    action_type = \"vanilla\"\n",
-    "    llm = LiteLLMModel(model_id)\n",
-    "    answer_questions(eval_ds, llm, model_id, action_type, is_vanilla_llm=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# import glob\n",
-    "# import json\n",
-    "\n",
-    "# jsonl_files = glob.glob(f\"output/*.jsonl\")\n",
-    "\n",
-    "# for file_path in jsonl_files:\n",
-    "#     if \"-Nemo-\" in file_path and \"-vanilla-\" in file_path:\n",
-    "#         print(file_path)\n",
-    "#         # Read all lines and filter out SimpleQA sources\n",
-    "#         filtered_lines = []\n",
-    "#         removed = 0\n",
-    "#         with open(file_path, \"r\", encoding=\"utf-8\") as f:\n",
-    "#             for line in f:\n",
-    "#                 try:\n",
-    "#                     data = json.loads(line.strip())\n",
-    "#                     data[\"answer\"] = data[\"answer\"][\"content\"]\n",
-    "#                     # if not any([question in data[\"question\"] for question in eval_ds[\"question\"]]):\n",
-    "#                     #     removed +=1\n",
-    "#                     # else:\n",
-    "#                     filtered_lines.append(json.dumps(data) + \"\\n\")\n",
-    "#                 except json.JSONDecodeError:\n",
-    "#                     print(\"Invalid line:\", line)\n",
-    "#                     continue  # Skip invalid JSON lines\n",
-    "#         print(f\"Removed {removed} lines.\")\n",
-    "#         # Write filtered content back to the same file\n",
-    "#         with open(\n",
-    "#             str(file_path).replace(\"-vanilla-\", \"-vanilla2-\"), \"w\", encoding=\"utf-8\"\n",
-    "#         ) as f:\n",
-    "#             f.writelines(filtered_lines)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Score answers"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Number of answers_subsets 54\n",
-      "Example of answers_subset Qwen__Qwen2.5-72B-Instruct__code__gaia\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/tmp/ipykernel_640885/2542893079.py:194: UserWarning: Answer lists have different lengths, returning False.\n",
-      "  warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n",
-      "/tmp/ipykernel_640885/2542893079.py:194: UserWarning: Answer lists have different lengths, returning False.\n",
-      "  warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n",
-      "/tmp/ipykernel_640885/2542893079.py:194: UserWarning: Answer lists have different lengths, returning False.\n",
-      "  warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n",
-      "/tmp/ipykernel_640885/2542893079.py:194: UserWarning: Answer lists have different lengths, returning False.\n",
-      "  warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n",
-      "/tmp/ipykernel_640885/2542893079.py:194: UserWarning: Answer lists have different lengths, returning False.\n",
-      "  warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n",
-      "/tmp/ipykernel_640885/2542893079.py:194: UserWarning: Answer lists have different lengths, returning False.\n",
-      "  warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n",
-      "/tmp/ipykernel_640885/2542893079.py:194: UserWarning: Answer lists have different lengths, returning False.\n",
-      "  warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n",
-      "/tmp/ipykernel_640885/2542893079.py:194: UserWarning: Answer lists have different lengths, returning False.\n",
-      "  warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n",
-      "/tmp/ipykernel_640885/2542893079.py:194: UserWarning: Answer lists have different lengths, returning False.\n",
-      "  warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n",
-      "/tmp/ipykernel_640885/2542893079.py:194: UserWarning: Answer lists have different lengths, returning False.\n",
-      "  warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n",
-      "/tmp/ipykernel_640885/2542893079.py:194: UserWarning: Answer lists have different lengths, returning False.\n",
-      "  warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n",
-      "/tmp/ipykernel_640885/2542893079.py:194: UserWarning: Answer lists have different lengths, returning False.\n",
-      "  warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n",
-      "/tmp/ipykernel_640885/2542893079.py:194: UserWarning: Answer lists have different lengths, returning False.\n",
-      "  warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n",
-      "/tmp/ipykernel_640885/2542893079.py:194: UserWarning: Answer lists have different lengths, returning False.\n",
-      "  warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n",
-      "/tmp/ipykernel_640885/2542893079.py:194: UserWarning: Answer lists have different lengths, returning False.\n",
-      "  warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n",
-      "/tmp/ipykernel_640885/2542893079.py:194: UserWarning: Answer lists have different lengths, returning False.\n",
-      "  warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n",
-      "/tmp/ipykernel_640885/2542893079.py:194: UserWarning: Answer lists have different lengths, returning False.\n",
-      "  warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n",
-      "/tmp/ipykernel_640885/2542893079.py:194: UserWarning: Answer lists have different lengths, returning False.\n",
-      "  warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>model_id</th>\n",
-       "      <th>agent_action_type</th>\n",
-       "      <th>source</th>\n",
-       "      <th>acc</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>Qwen/Qwen2.5-72B-Instruct</td>\n",
-       "      <td>code</td>\n",
-       "      <td>GAIA</td>\n",
-       "      <td>28.12</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>Qwen/Qwen2.5-72B-Instruct</td>\n",
-       "      <td>code</td>\n",
-       "      <td>MATH</td>\n",
-       "      <td>76.00</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>Qwen/Qwen2.5-72B-Instruct</td>\n",
-       "      <td>code</td>\n",
-       "      <td>SimpleQA</td>\n",
-       "      <td>88.00</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>Qwen/Qwen2.5-72B-Instruct</td>\n",
-       "      <td>vanilla</td>\n",
-       "      <td>GAIA</td>\n",
-       "      <td>6.25</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>Qwen/Qwen2.5-72B-Instruct</td>\n",
-       "      <td>vanilla</td>\n",
-       "      <td>MATH</td>\n",
-       "      <td>30.00</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                    model_id agent_action_type    source    acc\n",
-       "0  Qwen/Qwen2.5-72B-Instruct              code      GAIA  28.12\n",
-       "1  Qwen/Qwen2.5-72B-Instruct              code      MATH  76.00\n",
-       "2  Qwen/Qwen2.5-72B-Instruct              code  SimpleQA  88.00\n",
-       "3  Qwen/Qwen2.5-72B-Instruct           vanilla      GAIA   6.25\n",
-       "4  Qwen/Qwen2.5-72B-Instruct           vanilla      MATH  30.00"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import datasets\n",
-    "import pandas as pd\n",
-    "\n",
-    "\n",
-    "# Choose the answers subsets to score:\n",
-    "# answers_subsets = [\"meta-llama__Llama-3.1-8B-Instruct__code__gaia\"]\n",
-    "# or get all the answers subsets present in the ANSWERS_DATASET\n",
-    "answers_subsets = datasets.get_dataset_config_names(ANSWERS_DATASET)\n",
-    "print(\"Number of answers_subsets\", len(answers_subsets))\n",
-    "print(\"Example of answers_subset\", answers_subsets[0])\n",
-    "\n",
-    "\n",
-    "result_df = score_answers(answers_subsets)\n",
-    "result_df[\"acc\"] = (result_df[\"acc\"] * 100).round(2)\n",
-    "result_df.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pivot_df = result_df.pivot_table(\n",
-    "    index=[\"model_id\", \"source\"],\n",
-    "    columns=[\"action_type\"],\n",
-    "    values=\"correct\",\n",
-    "    fill_value=float(\"nan\"),\n",
-    ").reset_index()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Display results"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th>action_type</th>\n",
-       "      <th>model_id</th>\n",
-       "      <th>source</th>\n",
-       "      <th>code</th>\n",
-       "      <th>vanilla</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>Qwen/Qwen2.5-72B-Instruct</td>\n",
-       "      <td>GAIA</td>\n",
-       "      <td>28.1</td>\n",
-       "      <td>6.2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>Qwen/Qwen2.5-72B-Instruct</td>\n",
-       "      <td>MATH</td>\n",
-       "      <td>76.0</td>\n",
-       "      <td>30.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>Qwen/Qwen2.5-72B-Instruct</td>\n",
-       "      <td>SimpleQA</td>\n",
-       "      <td>88.0</td>\n",
-       "      <td>10.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>Qwen/Qwen2.5-Coder-32B-Instruct</td>\n",
-       "      <td>GAIA</td>\n",
-       "      <td>25.0</td>\n",
-       "      <td>3.1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>Qwen/Qwen2.5-Coder-32B-Instruct</td>\n",
-       "      <td>MATH</td>\n",
-       "      <td>86.0</td>\n",
-       "      <td>60.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>Qwen/Qwen2.5-Coder-32B-Instruct</td>\n",
-       "      <td>SimpleQA</td>\n",
-       "      <td>86.0</td>\n",
-       "      <td>8.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>anthropic/claude-3-5-sonnet-latest</td>\n",
-       "      <td>GAIA</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>3.1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>anthropic/claude-3-5-sonnet-latest</td>\n",
-       "      <td>MATH</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>50.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>anthropic/claude-3-5-sonnet-latest</td>\n",
-       "      <td>SimpleQA</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>34.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>gpt-4o</td>\n",
-       "      <td>GAIA</td>\n",
-       "      <td>25.6</td>\n",
-       "      <td>3.1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10</th>\n",
-       "      <td>gpt-4o</td>\n",
-       "      <td>MATH</td>\n",
-       "      <td>58.0</td>\n",
-       "      <td>40.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>11</th>\n",
-       "      <td>gpt-4o</td>\n",
-       "      <td>SimpleQA</td>\n",
-       "      <td>86.0</td>\n",
-       "      <td>6.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>12</th>\n",
-       "      <td>meta-llama/Llama-3.1-8B-Instruct</td>\n",
-       "      <td>GAIA</td>\n",
-       "      <td>3.1</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>13</th>\n",
-       "      <td>meta-llama/Llama-3.1-8B-Instruct</td>\n",
-       "      <td>MATH</td>\n",
-       "      <td>14.0</td>\n",
-       "      <td>18.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>14</th>\n",
-       "      <td>meta-llama/Llama-3.1-8B-Instruct</td>\n",
-       "      <td>SimpleQA</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>6.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>15</th>\n",
-       "      <td>meta-llama/Llama-3.2-3B-Instruct</td>\n",
-       "      <td>GAIA</td>\n",
-       "      <td>3.1</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>16</th>\n",
-       "      <td>meta-llama/Llama-3.2-3B-Instruct</td>\n",
-       "      <td>MATH</td>\n",
-       "      <td>40.0</td>\n",
-       "      <td>12.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>17</th>\n",
-       "      <td>meta-llama/Llama-3.2-3B-Instruct</td>\n",
-       "      <td>SimpleQA</td>\n",
-       "      <td>20.0</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>18</th>\n",
-       "      <td>meta-llama/Llama-3.3-70B-Instruct</td>\n",
-       "      <td>GAIA</td>\n",
-       "      <td>31.2</td>\n",
-       "      <td>3.1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>19</th>\n",
-       "      <td>meta-llama/Llama-3.3-70B-Instruct</td>\n",
-       "      <td>MATH</td>\n",
-       "      <td>72.0</td>\n",
-       "      <td>40.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>20</th>\n",
-       "      <td>meta-llama/Llama-3.3-70B-Instruct</td>\n",
-       "      <td>SimpleQA</td>\n",
-       "      <td>78.0</td>\n",
-       "      <td>12.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>21</th>\n",
-       "      <td>mistralai/Mistral-Nemo-Instruct-2407</td>\n",
-       "      <td>GAIA</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>3.1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>22</th>\n",
-       "      <td>mistralai/Mistral-Nemo-Instruct-2407</td>\n",
-       "      <td>MATH</td>\n",
-       "      <td>30.0</td>\n",
-       "      <td>22.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>23</th>\n",
-       "      <td>mistralai/Mistral-Nemo-Instruct-2407</td>\n",
-       "      <td>SimpleQA</td>\n",
-       "      <td>30.0</td>\n",
-       "      <td>6.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "action_type                              model_id    source  code  vanilla\n",
-       "0                       Qwen/Qwen2.5-72B-Instruct      GAIA  28.1      6.2\n",
-       "1                       Qwen/Qwen2.5-72B-Instruct      MATH  76.0     30.0\n",
-       "2                       Qwen/Qwen2.5-72B-Instruct  SimpleQA  88.0     10.0\n",
-       "3                 Qwen/Qwen2.5-Coder-32B-Instruct      GAIA  25.0      3.1\n",
-       "4                 Qwen/Qwen2.5-Coder-32B-Instruct      MATH  86.0     60.0\n",
-       "5                 Qwen/Qwen2.5-Coder-32B-Instruct  SimpleQA  86.0      8.0\n",
-       "6              anthropic/claude-3-5-sonnet-latest      GAIA   NaN      3.1\n",
-       "7              anthropic/claude-3-5-sonnet-latest      MATH   NaN     50.0\n",
-       "8              anthropic/claude-3-5-sonnet-latest  SimpleQA   NaN     34.0\n",
-       "9                                          gpt-4o      GAIA  25.6      3.1\n",
-       "10                                         gpt-4o      MATH  58.0     40.0\n",
-       "11                                         gpt-4o  SimpleQA  86.0      6.0\n",
-       "12               meta-llama/Llama-3.1-8B-Instruct      GAIA   3.1      0.0\n",
-       "13               meta-llama/Llama-3.1-8B-Instruct      MATH  14.0     18.0\n",
-       "14               meta-llama/Llama-3.1-8B-Instruct  SimpleQA   2.0      6.0\n",
-       "15               meta-llama/Llama-3.2-3B-Instruct      GAIA   3.1      0.0\n",
-       "16               meta-llama/Llama-3.2-3B-Instruct      MATH  40.0     12.0\n",
-       "17               meta-llama/Llama-3.2-3B-Instruct  SimpleQA  20.0      0.0\n",
-       "18              meta-llama/Llama-3.3-70B-Instruct      GAIA  31.2      3.1\n",
-       "19              meta-llama/Llama-3.3-70B-Instruct      MATH  72.0     40.0\n",
-       "20              meta-llama/Llama-3.3-70B-Instruct  SimpleQA  78.0     12.0\n",
-       "21           mistralai/Mistral-Nemo-Instruct-2407      GAIA   0.0      3.1\n",
-       "22           mistralai/Mistral-Nemo-Instruct-2407      MATH  30.0     22.0\n",
-       "23           mistralai/Mistral-Nemo-Instruct-2407  SimpleQA  30.0      6.0"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "display(pivot_df)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAABdYAAAJOCAYAAAC6HlVrAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAA3O5JREFUeJzs3QmcHHWZ//Gnu+dMZjJDJndCAgmSEMkBCZfLTUTFRPiDCIio664grrAXKocou8KiiCwrouCqgMoCXkQSWIQgBJFjIZAQjEFNICHHJJOEOZK5u/v/emqmZqrPqequrq7q/rxfrzFOTU/3r7qLrppv/37PE4rH43EBAAAAAAAAAAC2hO3dDAAAAAAAAAAAKIJ1AAAAAAAAAAAcIFgHAAAAAAAAAMABgnUAAAAAAAAAABwgWAcAAAAAAAAAwAGCdQAAAAAAAAAAHCBYBwAAAAAAAADAAYJ1AAAAAAAAAAAcIFgHAAAAAAAAAMABgnUAABAIl1xyicyePVsuvPDCjLf553/+Z+M2V199dd6P99JLLxn3pf+6+Tvmfli/jjzySDn11FPl3/7t36StrU3c0NzcLBdffLHMmzdPTjjhBOnq6nLlfkvVW2+9JTfccIMsWbJE5s+fb7we//Iv/yIbN26UUvHrX//aON62bdtW7KEAAAAAgVdR7AEAAADYFQ6HZe3atUZoPGnSpISfdXZ2ytNPPy1BMHfuXPna17429H1fX5/88Y9/lNtuu03+9Kc/yQMPPCChUCivx7jvvvuM5+pb3/qWTJw4UWpra10YeWl64okn5Etf+pK85z3vkcsvv1ymTZtmHGP6HH7sYx+T73//+/I3f/M3EnT6YcFDDz0kEyZMKPZQAAAAgMAjWAcAAIGhgfRf//pXefzxx+XTn/50ws80VNfweMyYMeJ3dXV1snDhwoRtxxxzjBw4cEC+853vyLp161J+7lRra6sRoJ511ll5jra0bd26Vb785S/LSSedJLfffrtEIpGhn5155ply0UUXGT//3e9+J1VVVRJkY8eONb4AAAAA5I9SMAAAIDBGjRolp5xyihGsJ3vsscfkAx/4gFRUJM4b6OnpkTvvvFM++MEPGmVRNCz9wQ9+ILFYLOF2Dz74oPH7WgbkE5/4hOzYsSPlMXSblgc59thjZcGCBfKpT31KNmzY4Nr+aUkY83FMq1atknPPPdcYu86avvHGG43Z+aY77rhD3v/+98t3v/tdY1wnnniiLFq0yCj7ofejpT/0Nmr37t1yzTXXGM+h7udHP/pReeqppxLGoLfX+9LH1Nvo/9f70sd/5ZVX5LzzzjP+vz5XGjZv3rzZeB70+dBxPProown39/LLL8vf/d3fGR8c6P6dfvrpxnjM51/Lkuhj/u///q9ceeWVctRRRxn78ZWvfCVhP+PxuNx7773yoQ99yBiXPtaPfvQjY7tJx6evnY5F70MD8X379mV9zn/6059Kb2+v8XjWUF3pBzV6H7rP1hI9eqzp86Nj1dfkq1/9asLPdf/0eHvyySdl6dKlxvN19tlny2uvvWasIjj//PONfdCfvfDCCwm/p8+Pfkikv6/7oTPmk0sLaXmaL3zhC3L88cfLe9/7XuNDAT0uuru7R3wdraVg9Ln513/9V2MfzDEuX7484bHefvtt43XR2+iHPVrKaM2aNUM/t/v6AQAAAKWGYB0AAASKzsA2y8GY9u/fL88++6wRVFpp6Pq5z31OfvjDHxph5l133WUEljoz2VqK5Wc/+5nxvQbO3/ve94xA8/rrr0+4Lw0htb67lmzRn3372982wmGtY75p0ybX6nyrgw8+2Ph3xYoV8g//8A8yc+ZM48MBDVMfeeQR+fznP58QKGuAvnr1avnP//xPIzjXsFj3Zfz48UbpD933PXv2GEG6hs9ai15D3KlTpxr3r/dppc/TsmXLjNnzGqCr/v5+I4TV50BLo2jofNVVVxnPr5YY0d/RGfIaRJuvjQbAurKgsbHRGJv+3uLFi42QV4NYK33+dTz6/GsQ/8tf/tK4vemWW24xvjR41sfSfbn11luND0nMAF8fq6amxnh9r732Wvm///s/+eQnP5kQOCf7/e9/b6yE0HI56Wh9en2+9LlUOj79cEVDZn1+9Pn77W9/awTO1sfR5+Ab3/iG8fz813/9l7S3txvBs/6uvh76euprqPdt/T09zvQ5/PjHP278nu6PPh9aIsj8cESPOa2Zr/f/3//93/LhD3/YeM1/8pOfjPg6Wn3xi180jl2t7a/3o8+DPvaLL75o/FxXh2gwr+G5BuX6fGuJIv0gRZ9bJ68fAAAAUHLiAAAAAfCJT3zC+Orq6oovXLgwfs899wz97Ne//nX8lFNOicdisfhpp50W//KXv2xsf+aZZ+KHH354fOXKlQn3deeddxrb//znPxu/c8IJJ8T/6Z/+KeE2X/3qV43bvPjii8b3t912W3zevHnxbdu2Dd2mp6cnfsYZZ8SvuOIK43u9rfV3Mu3HxRdfHO/r6xv62rNnT/yxxx6LH3vssfELLrjAGJN+nXzyyfG/+7u/S/j9559/3niMp59+2vj+O9/5jvH9yy+/nHA7fQ70uTDdcsst8fe+970J41ef+tSn4n/zN38Tj0ajxvd6X7rN6le/+pWx/X/+53+Gtj366KPGtttvv31o2/r1641tTz75pPH9ww8/HP/7v//7oftW+v8XLVoUv/76643v33nnHeN3rrrqqoTHvOSSS+JLly41/n9bW1t87ty58ZtuuinhNl//+teHnh993vT2/f39Qz/fvHlz/Igjjoj/7Gc/y/h6LFiwIOW1z6S1tTV+5JFHDo3dpM+97oP5OOZrsnr16qHb3H333ca2X/ziF0PbHn/8cWPbhg0bEn5PnzeTHu/6+phj/P3vf28cPx0dHQlj0H3/zGc+M/R9ttdRn3Ol+/L9738/4bX5xje+EV+zZo3x/T/+4z/GjzvuuITH0uP1Ax/4QPy8886z/foBAAAApYgZ6wAAIFB0Bq/OWraWg9HyI1oiJLnhp86q1dIwOkvd6iMf+cjQz7WUyd69e+W0005LuI3en5WW7DjiiCOMmc06e1u/tJnqySefLM8//7yjfdDZ1VrCw/x63/veZ8xk1lIpOhNe90PHpbOedV/Nx9MvLamiNdr/8Ic/JNynji0b3Vct06GzipOfi5aWFuPxRrov/X1TU1OT8a/O7jfpzHSls7PVOeecY8yE1uasOntdZ3br7OloNGpss0quKa/Nac1SIrpCQfddy/hY6SxqXY2gs7e1Lr3O0tdZ4OZzpTP/Z82alfJcWWn5Fx2PHToOLRuTvDJCZ+Hr85o8i/voo48e+v/jxo0b8flSerxa71+Pdz3G9JhRWupHV1hUV1cbM8q1lI/ODNeZ7jo2J8fEcccdZ6xc0Jn0v/jFL4xVDTpj3Ry37o/+d6HHm3V8OkP+jTfeMHoC2Hn9AAAAgFJE81IAABA4GnprWRQNnjVg1ND7n/7pn1Jup3WvDzrooJTa2WZZj46OjqHa2Hq7dLexNgPdsmWLEYSno+GuXXofWn5DaYiu+zB58uSEAFMfT+ntzNtaaUkQq9GjR2d9TN1Ps8SMlRn4WsNdrWWfjnV8Ji0Jk4mWOPn6178uv/nNb4yge9q0aUY4r+GstZRNuvvRDy3M25jPRabGmzp2LcujIb5+JdPnN5MpU6akradv0g8A9LnT58k8VsznzEq36fGUz/Nl3k9ynwD9EMN8DnQ/b7vtNrn//vuN4FqPG62hnm4fM72OJi3Po+VitCyPfuihz7l+yPPv//7vxgcF5n6nG6O+NlqCyc7rBwAAAJQignUAABA4OoNXg2Sdta7hoQa2ZuNPq4aGBnn33XeNGcnWcN0MpTVMNwN1nbVuZQaZpvr6eqMp45e+9KW0Y6qqqrI9fh27NovMZsyYMca/+nj6uOn2zQm9vc5MT2ZuS/5gwQ033XSTEdhqzXMNbM2gV+uWO2E+FzorW+vNmzQQ37p1q/Ha6wcUWmNdZ1M7CbN1Bvh9991nPA/JH6YorV2vddS1Lrz5nOvMbus4lP5+ug8unEo+7szHM1cIaE15beKqH7boDH49LpXWnHdKf1frrOuXrljQ2e9aI13vWx9H91cfO9sxk/wBDwAAAFAuKAUDAAACR0PsJUuWGKGtzrZNF6YqDaR1prS1bIwym3UuWrRIDjnkEGPWb/Jtnn766ZT70uaihx56qBGKm186G1sbNSbPis+XBrcapmrjSOvjaSkaLRezYcMGR/enJWRee+012b59e8pzoYHyjBkzxG1r1qwxyo3oa2WG6lpCRANynXltl87IrqysTHlNfvzjHxsldPS+tfGmhsPW5+o973mPUerkpZdeynjf2ghU71s/BEguCaMzwrV0jQbI+mGOlnHRY2/lypUJt9OGsBryW0u/5Epn+WtDVev32pjX/DBCn9PDDjtMzjvvvKFQfdeuXfLnP//Z0XOqx4GWzjGPez3ePvvZzxofgJgz+PWY0efcOjNdnyMtvaTPr5MPkwAAAIBSw4x1AAAQSGeddZZcdtllRskJrbWdjoahGuzqzzV8nDNnjlE3WsuF/L//9/+MgFJdddVV8q//+q/G7bQeu9bSfuCBBxLuS2dDa4iu/37mM58xwtbHHntMfv7zn8s111zj+v5pUP/P//zP8tWvftX4/1rrWkue6Ixi3ZdMJWky+du//VsjRNfxaxkdre+9fPlyefHFF+U//uM/jOfRbRqI6wcf+lxqrXOts671wHV2uZPSOVoC5pOf/KQxU1vDXP2QQ2uq6/3qjH4duwbsl156qfE6at14DYA1eNfbff7zn89437ra4YYbbpDrrrvOCNkvvPBC44MWnQl/zz33yDvvvCM/+tGPjFIr+qWPceeddxphvL4m+sHHf/3XfxnHkh5TbtDjSUsb6Qcr+tga8F9++eVDz6keAzqjXOuaa3miu+++26iv7uQ51VIvWgf9xhtvNILz6dOnGx966Ax9/e9K6XGiob4+97rfus9a312fE61tDwAAAJQzgnUAABBIOrNWS4RoCKqhbToa4GroqLOONZTVmdIapGoIq0GzSZtFajirgaWG54cffrhRZ1pvZ9KZ4g8++KAxW1yD2J6eHmO2u850zqUMhx3nn3++UTZGQ8yHHnrImJmts6JvvfVWx2VHdFa6BtE6fg1TtXa4ftCg+3zGGWcUZPxXX3218ThaCkaDX33uNSDWppu/+93vbDcNVVquRINmfQ30+dD7uv76640g3CzpoiG0lmzRZpwaAuuHDxqOJzfWTKaBuM7Y15IwOlYtC6TPlz7XOuPdenxdccUVRo1xDZj1NdEPKPTDGA3CR6ppbpceX/phhx6vOgZ93cwVBRp6a3mjn/zkJ0bAr8f/2WefPXSs64cvZumckehzpfXa9YMBvU+9Lw3TNURXOuP/f/7nf4zbaNivj6HBvj62NmwFAAAAylkoTlchAAAAoOg0xNew+8033yz2UAAAAACMgBrrAAAAAAAAAAA4QLAOAAAAAAAAAIADlIIBAAAAAAAAAMABZqwDAAAAAAAAAOAAwToAAAAAAAAAAA4QrAMAAAAAAAAAUG7B+ic+8QnjCwAAAAAAAACAQquQErBz585iDwEAAAAAAAAAUCZKYsY6AAAAAAAAAABeIVgHAAAAAAAAAMABgnUAAAAAAAAAABwgWAcAAAAAAAAAwAGCdQAAAAAAAAAAHCBYBwAAAAAAAADAAYJ1AAAAAAAAAAAcIFgHAAAAAAAAAMABgnUAAAAAAAAAABwgWAcAAAAAAAAAwAGCdQAAAAAAAAAAHCBYBwAAAAAAAADAAYJ1AAAAAAAAAPC59u4+2d3ebetLb5uLzs5Ouf322+WDH/ygzJ8/X4477ji58sor5S9/+UvKba+++mqZPXu2bN26NeVnl1xyidxxxx2OfidoKoo9AAAAAAAAAAAodcvueM72bVdccWLC9xqU/+T5t6WnPyZ79vdIXzQu4+uqpKoikvK7etuKcEiuOOM9Mqam0vZjHjhwQD7+8Y8b4boG4HPmzJF3331X7r//frnwwgtl+fLlcvDBBxu37enpkSeffFKmT59ubNfwfSS5/I6fEawDAAAAAAAAgI9190aNUD0SFmkcVSUfnjdZJtRXp9zu1a3vyrN/3iNSGTZ+x0mwfuedd8revXvlsccekzFjxhjbpk6dKjfffLPs3LlT7r33Xrn++uuN7atXr5bKykojiP/pT38qV1xxhYRCoaz3n8vv+BmlYAAAAAAAAADA53Smek9/XD51wiFy5NQGmTCmJuHrrT0H5E87O+Tkw8c5CtRVLBaThx9+WP72b/92KFS3uuWWW+SLX/zi0PcrV66UxYsXy2mnnSbbt2+Xl19+ecTHyOV3/IxgHQAAAAAAAAB8Tsu/6Ez1SQ01KT97afNeeX7TXnnfrCY5evpBju9ba57v27fPCL7TmTBhgtTU1AyVjFm9erURkB9yyCEya9YsI5TPJpff8TuCdQAAAAAAAADwOa2pnq78izVUP25mU073rbXUVUNDw9C2559/Xo466qihrw9/+MPG9lWrVklfX58Rkqv3v//98tvf/la6uroy3n8uv+N3BOsAAAAAAAAA4HPpGpW6Eaors/xLe3v70DYN07XJqH59/vOfHwrBH330UTn66KNl7NixxvdnnnmmMSP9iSeeyHj/ufyO39G8FAAAAAAAAAACxq1QXc2YMUMaGxvltddek/nz5xvbamtrje2qqalpaGa7zmTv7++XuXPnJtyHBvBnn312yn3n8jtBQLAOAAAAAAAAACUUqvf2Rx3dX0VFhZx33nly3333Gf/W1dUl/HzXrl3GvzrDXBud3n///VJfXz/0c62Xfu+990pzc7NMmjQp4Xdz+Z0goBQMAAAAAAAAAJRIqL67o0da9vc6vt8rrrhCxo8fLxdeeKE8/vjj8s4778jrr78u119/vXznO9+RRYsWycqVK+Wkk04y/v/hhx8+9PXpT39awuGw/OY3v0m531x+JwgI1gEAAAAAAAAgAJ55c7c8uWGXHDG5Xg4dN1p2t3cnfL2xvU1+seYdqYyEHN+3ln756U9/apRm+d73vidLly6Vv/u7v5MdO3bIHXfcIVdddZW88sor8tGPfjTldydOnChnnHGGMQs9eaa7098JilA8Ho9LwOkLoJ566qliDwUAAAAAAAAAXNXe3Sc/WL1ZdrZ1yZjaShlTU5m2/IvOVNdQfUpDrXzqbw5Jezu4gxrrAAB47IKVF7hyPw8tfciV+wEAAAAA+JsG5JeeMlO6e+3VTq+pihCqFxjBOgAAAAAAAAD4nAblhOX+QY11AAAAAAAAAAAcIFgHAAAAAAAAAMABgnUAAAAAAAAAABwgWAcAAAAAAAAAwAGCdQAAAAAAAAAAHCBYBwAAAAAAAADAAYJ1AAAAAAAAAAAcqHByYwAAAAAAAABAEXS3ifR12bttZa1ITYPtu549e7bx79NPPy1TpkxJ+NkDDzwgN9xwg3zhC1+QK664IuFnp59+usTjcfnd734noVBoaNv27dszPtabb74pV199tfH/v/GNbyT8bNu2bXLGGWfIU089JdOmTRM/I1gHAAAAAAAAgEK7+xT7t71sdWqo/tLdIrFo4vZ4TGT/bpFYn8joCSIV1QPbIxUix17qKFyvrKw0AvJPfOITCdtXrVo1FJpbvfbaa9Ld3W18vfTSS3L88ccb23/5y19KNDowzptuusn497rrrpNSQ7AOAAAQYBesvMCV+3lo6UOu3A8AAACAAtCZ6hqqH7FMZPS4gW39vSJ/+s1AqD73HJH6SQPbD+wR+dOKgd9xEKwvXrw4JVjfv3+/EaDPnTs35faPPvqo8Tt9fX2yfPnyoWB97NixQ7epqakx/h0/fryUGmqsAwAAAAAAAEAQaKiuAXrtQSJvPSMS7R2YmT5l4cB2/TKDd4e0BMv//d//GWG66ZlnnjHC89GjRyfcNhaLyeOPP2787LTTTpPf/va30tnZKeWEYB0AAAAAAAAAgqK/R+T1h0QOtIgsuEhkTGJN9KHSMQ4dfvjhMnHiRHn22WeHtj355JOyZMmSlNu+9NJL0tLSYoTq+qXlYJ544gkpJwTrAAAAAAAAABAEWv5lpFD9nZdFultznrWu5WBUb2+v/OEPfzC2JVu5cqXR8PTggw82yrwsXLhQHn74YUePtWLFCjnqqKMSvpYuXSpBQY11AAAAAAAAAPA7bVSqNdW1/EumUP3tP4hsfVGkpjGnh9AQ/corr5T+/n554YUXjFnsTU1NCbfRwP3JJ59MqMV+5plnyje/+U3ZsWOHTJmSZlxpnH766XLVVVclbNu1a5dccsklEgQE6wAAAAAAAADgd/t3DzQq1ZrqmUL1t54VmX68yK43cnqIRYsWGf+uWbNGVq1aJe9///tTbvP73/9e2tra5Pvf/77cddddxrZ4PG58/eY3v5HLL7/c1mNp3fYZM2YkbItEIhIUlIIBAAAAAAAAAL/TUH3uOdlD9UNPFjn4mJwfoqKiQk455RSjHMzTTz+dtr76Y489JjNnzjRC9OXLlxtf+v+POeYY4/+XC4J1AAAAAAAAAPC70RNE6idlD9UP+Zu8H0bLwfziF78wSsBoDXWrrq4uI3T/6Ec/apSJsX5dfPHF8vbbb8trr70m5YBSMAAAAAAAAADgdxXVIgf2pDYq1ZrqWv6laZZIR3PqbRw68cQTjRrr6Wara6je19cn55xzTsrP9PbayFSbmGoj0lIXimvxm4AzO9M+9dRTxR4KAAAjumDlBa7cz0NLH3LlfhBsHE8AAABAGehuE/m/H4hE+xO3dbcONCqtaUi8faRioBZ78na4hhnrAAAAAAAAAOBnGpBrUN7XZe/2lbWE6gVGsA4ANjErFAAAAAAAFI0G5YTlvkHzUgAAAAAAAAAAHCBYBwAAAAAAAADAAYJ1AAAAAAAAAAAcIFgHAAAAAAAAAMABgnUAAAAAAAAAABwgWAcAAAAAAAAAwAGCdQAAAAAAAAAAHCBYBwAAAAAAAACf6+jtkJbOFltfelun+vr65I477pAzzjhDjjzySDn11FPl5ptvlv379xs/P/300+XXv/616/t19dVXG19O/OIXv5Bzzz1XFi5cKCeeeKLx+++8807a2+r22bNnyxe/+EVxU4Wr9wYAAAAAAAAASHHBygts3/ahpQ8lfK9B+QMbH5DOvk7Z07VHKsOV0lTbJOFQ4rzpWDwme7v2Sl1VnVw6/1Kpr6q3/Zi33nqrPP/883LjjTfKwQcfbATSN910k2zZskXuuusu+eUvfymjRo2SYvvKV74iTz/9tFx11VVyzDHHyJ49e+SHP/yhnH/++XLfffcZIbrVY489JtOnT5dVq1bJgQMHZPTo0a6Mg2AdAAAAAAAAAHysu7/bCNW7+rvkPQe9R5ZMXyJVkaqE2/RGe2XV1lVGCB8JRYzfcRKsP/zww/If//EfcsIJJxjfT5s2TW644Qa5+OKLZffu3TJhwgQpttWrV8tvfvMbY+b8e97znqFx6kz7f/iHf5Brr71WfvWrXyX8zsqVK+UTn/iE3HnnnfLb3/7WmOnuBoJ1AAAAAAAQ+NmdTmZ+AkAQ6Ux1DdUvmnNR2lB95eaV0hfrk7NnnS3PbHvG8f2HQiF58cUXjZIv4fDATPijjjpKHn30UTnooIOM7V/4wheMYPqSSy4xSsU899xzsmbNGjn88MPl29/+tjFzXIPs8ePHGzPfjz32WHnppZeMMix///d/b4TbkUjE+P3LL7887TiefPJJ+c///E/Zvn27EZ5/6UtfMu5H/fznP5clS5YMherWsWuwrmP705/+JEcccYSx/a9//av8+c9/luOOO05ef/1148MDt4J1aqwDAAAAAAAAgM9p+ZdMM9U1VN/XvU+WzVwm40eNz+n+P/nJT8pPf/pTI0D/2te+Zszu7u7ulsMOO0wqKytTbn/nnXfKxz72MWP2eEdHh3z0ox+VcePGGSVjNPjWYN20d+9eWb58ufz4xz+Wf//3fzcCeA3Jk23cuFG+/OUvG6H7I488Ih/5yEfks5/9rFGORq1bt07mz5+fdvxz586V2tpaI0A3acg/depUmTNnjlE7/uWXXzYCezcQrAMAAAAAAACAz2lN9ZFC9YmjJ+Z8/zrj+1vf+pZMmjTJCL2vvPJKOemkk1JKq5hOO+00+dCHPmQE7zqLvK6uzvidWbNmGYH75s2bh27b399vlJl573vfa9z2U5/6lDz44IMp9/mjH/3I+N1ly5bJjBkzjLD/5JNPlgceeMD4eWtra8Ya6Tprvb6+Xt59992E+ur6QYE65ZRTpKqqygj43UCwDgAAAAAAAAA+l9yo1M1Q3aQzxDXw1iam2sxUZ55fd9118sYbb6Tcdtq0aUP/v6amRqZMmWKE2+b3fX19Qz/Xpqc6a9x05JFHJgTvpk2bNsnPfvYzowSN+aWNSt9++23j542NjbJr1660Y4/H47J//34jXFc6c11numuQrzSQf9/73mfUaHcDNdYBAAAAAAAAIEDcDtW1BIvO5L766quN77Wmus4a/8AHPiBnnnmmUXs9WUVFYrRs1mVPJ/m2sVhsKIS3ikajRumXc845J2G7BvVKy8CkC/nVm2++KZ2dncaseKW14dVnPvOZhMfVAF7rwi9atEjywYx1AAAAAAAAACihUL2jt8PRfWqgfc8998iGDRsStmvpFA21x44dm9eY29vbZdu2bUPfr1+/XmbPnp1yu0MPPdS4nZaBMb8eeughefbZZ42fX3DBBbJ69eqhOupaGkaD/xUrVsj3vvc9o4nqggULjAD9f//3f+Xss882PjAwv7R5qZascaMcDME6AAAAAAAAAJRIqL6uZZ2097Y7ul+d5X3qqafK5z//eSOk1nB77dq1RhPT3t5eI7zO1/XXXy9//vOfjaao2iT14osvTrnNpz/9aaMu+k9+8hPZunWr3HvvvcbXIYccMlQnXWuwf+5znzNCcm2aquH5VVddZdzvtddea8yEf+WVV4ySMZdccokRtptfRxxxhFHuRkP3np6evPaHUjAAAAAAAAAA4HOxeEx+s+k30hfrk/dPf79Rc72lsyUlVH9hxwsypmqM4/u//fbb5a677pLvfve7smPHDqMu+oknnmjUPNdZ3vk6+eST5eMf/7hxv//yL/9ilJpJtnDhQrnlllvkjjvuMP6dPn26fPvb35Zjjjlm6Db//u//btRo1/D9hhtuMMamTVQ1UP/yl79sbHvmmWeMGfHz5s1LeYyLLrpI/ud//kdWrVolH/7wh3Pen1Bci8oE3BlnnGH8+9RTTxV7KABK2AUrL3Dlfh5a+pAr94Pg4liCmzieAACljPMcAAyXdvnh+h/K3q69Mq52nFRFqtLeRmeqa6h+UM1BctGci6S+aqCRZzG99NJL8slPftKogV5ozz33nEQiETnhhBMK/ljMWAcAAAAAAAAAH9OA/O/n/b1093fbun1NRY0vQnWv6Qx7rxCsAwAAAABQzu4+xZ37uWy1O/cDAEhLg/JyDMv9iualAAAAAAAAAICCOO644zwpA+M1gnUAAAAAAAAAABwgWAcAAAAAAAAAwAGCdQAAAAAAAAAAHCBYBwAAAAAAAAAgKMH6zp075bLLLpOjjz5aTj/9dLn33nuHfrZhwwY5//zzZcGCBXLeeefJG2+8UcyhAgAAAAAAAABQ/GD9n/7pn2TUqFHy61//Wq699lq5/fbb5cknn5TOzk659NJLZfHixcbPjjrqKCOA1+0AAAAAAAAAAJRlsN7W1iZr166Vyy+/XA455BBZsmSJnHTSSfLCCy/IY489JtXV1fKlL31JZs2aJdddd52MHj1aHn/88WINFwAAAAAAAACA4gbrNTU1Ultba8xI7+vrk82bN8urr74qRxxxhKxbt04WLVokoVDIuK3+q+ViNIgHAAAAAAAAAKCYKor1wDoj/atf/ap8/etfl5/85CcSjUbl3HPPNeqqP/XUU3LYYYcl3L6pqUn+8pe/ZL3PeDye8L0G8snbCr29GI/JPrFPfhtLqe6TW/y0T6X4OgVhn9zkl30qxdcpKPvkFj/tk9+3+2ks7BP7FPTtfhoL+zS4Xc8JNral3T54f26O0S0l9zqV4rHHPgVyu5v/nQJBU7RgXW3atElOO+00+du//VsjNNeQ/YQTTpCuri6pqqpKuK1+39vbm/G+9D9qLS9jvb3Wb9f7sv6ezpTXrwMHDkh/f//Qdr2t/s7+/fuNkN+kJWgqKyulvb094Y2jvr5ewuFwwmOqhoYGicVi0tHRkfAmo9v18fRxTZFIxLgfnbFvrR9fUVEhdXV10tPTI93d3ewT+8Q++WSfVCwaSxh7OBI2xmTsp+WaQ8ejf2lE+4f339heEfHVPpXi6xSEfVJ6//GY5VgKhyUUDmU8xtJtV37Zp1J8nYKyT3q/enxYb6/Hhx5fel9D28Mh4zEzHXt+2qdSfJ3YJ/aJfWKf/LxPdYM/0/HoHlpva26XNNsr9Pbx4fPN/rY21/fJybVRtuvyUnidSvHYY5+Cv0+NjY0J+weUk1A83cdPHtBa6tq8dPXq1cZ/3Or73/++PPLII3LwwQfL4YcfLlddddXQ7b/1rW8ZQfxdd92Vcl9nnHGG8e+qVasStvPJI/vEPgV7u5/Goi589EJxw4MffrBgY+R1CsY+uXUsPbT0Id/sUym+TkHZpwtWXiB+fm/y0/PLPrFPQd/up7GwTyW2Tz841Z0Z65c+4/oYC3We88Xz7vJ2P42FfSqffdLvgXJVtBnrb7zxhsyYMWMoVFdz5841gvPFixfLnj17Em6v30+YMCHrfab7jznTf+CF3F6Mxyz0dj+Nxa3tfhqLW9v9NBa3tvtpLG7x2z6V4usUhH1yi5/2qRRfpyDsk1v8tE9B2O6nsbi13U9jcWu7n8bi1nY/jcWt7X4ai1vb/TSWnLbb3Jay3XJ/Qbhm8t3zzrHHPgV8O1Buita8VEPyLVu2JCxN0Qam06ZNkwULFshrr7029AmY/quNTXU7AAAAAAAAAABlGayffvrpRr2mr3zlK/LWW2/J7373O2O2+iWXXCIf/OAHjTpON910k/z1r381/tX6UB/60IeKNVwAAAAAAAAAAIobrGvDg3vvvVdaWlrkox/9qNx8881y+eWXywUXXGA0YLj77rtlzZo1cu6558q6devkBz/4wVDzQAAAAAAAAAAAyq7GujrssMPknnvuSfuz+fPny8MPP+z5mAAAAAAAAAAA8OWMdQAAAAAAAAAAgohgHQAAAAAAAAAABwjWAQAAAAAAAABwgGAdAAAAAAAAAAAHCNYBAAAAAAAAAHCAYB0AAAAAAAAAAAcI1gEAAAAAAAAAcIBgHQAAAAAAAAAABwjWAQAAAAAAAABwgGAdAAAAAAAAAAAHCNYBAAAAAAAAAHCAYB0AAAAAAAAAAAcI1gEAAAAAAAAAcIBgHQAAAAAAAAAABwjWAQAAAAAAAABwoMLJjQGg4O4+xZ37uWy1O/cDAAAAAAAAJGHGOgAAAAAAAAAADhCsAwAAAAAAAADgAME6AAAAAAAAAAAOEKwDAAAAAAAAAOAAwToAAAAAAAAAAA4QrAMAAAAAAAAA4ADBOgAAAAAAAAAADhCsAwAAAAAAAADgAME6AAAAAAAAAAAOEKwDAAAAAAAAAOBAhZMbAxndfYo793PZanfuBwAAAAAAAAAKhBnrAAAAAAAAAAA4QLAOAAAAAAAAAIADBOsAAAAAAAAAADhAsA4AAAAAAAAAgAME6wAAAAAAAAAAOECwDgAAAAAAAACAAwTrAAAAAAAAAAA4QLAOAAAAAAAAAIADBOsAAAAAAAAAADhAsA4AAAAAAAAAgAME6wAAAAAAAAAAOECwDgAAAAAAAACAAwTrAAAAAAAAAAA4QLAOAAAAAAAAAIADBOsAAAAAAAAAADhAsA4AAAAAAAAAgAME6wAAAAAAAAAAOECwDgAAAAAAAACAAwTrAAAAAAAAAAA4QLAOAAAAAAAAAIADBOsAAAAAAAAAADhQ4eTGQNBcsPICV+7noaUPuXI/AICAuvsUd+7nstXu3A8AAAAAoKiYsQ4AAAAAAAAAgAME6wAAAAAAAAAAOECwDgAAAAAAAACAAwTrAAAAAAAAAAA4QLAOAAAAAAAAAIADBOsAAAAAAAAAADhAsA4AAAAAAAAAgAME6wAAAAAAAAAAOECwDgAAAAAAAACAAwTrAAAAAAAAAAA4QLAOAAAAAAAAAIADBOsAAAAAAAAAADhAsA4AAAAAAAAAgAME6wAAAAAAAAAAOECwDgAAAAAAAACAAxVObgwAQGDcfYp793XZavfuCwAAAAAABB4z1gEAAAAAAAAAcIBgHQAAAAAAAAAABwjWAQAAAAAAAABwgGAdAAAAAAAAAAAHCNYBAAAAAAAAAHCAYB0AAAAAAAAAAAcI1gEAAAAAAAAAcIBgHQAAAAAAAAAABwjWAQAAAAAAAABwgGAdAAAAAAAAAAAHCNYBAAAAAAAAAHCAYB0AAAAAAAAAAAcI1gEAAAAAAAAAcIBgHQAAAAAAAAAABwjWAQAAAAAAAABwgGAdAAAAAAAAAAAHCNYBAAAAAAAAAHCAYB0AAAAAAAAAAAcI1gEAAAAAAAAAcIBgHQAAAAAAAAAABwjWAQAAAAAAAABwgGAdAAAAAAAAAAAHCNYBAAAAAAAAAHCAYB0AAAAAAAAAAAcI1gEAAAAAAAAAcIBgHQAAAAAAAAAABwjWAQAAAAAAAABwgGAdAAAAAAAAAAAHCNYBAAAAAAAAAHCAYB0AAAAAAAAAAAcI1gEAAAAAAAAAcIBgHQAAAAAAAACAoATrvb298m//9m9yzDHHyPve9z657bbbJB6PGz/bsGGDnH/++bJgwQI577zz5I033ijmUAEAAAAAAAAAKH6wfuONN8rzzz8vP/rRj+Tb3/62/PznP5eHHnpIOjs75dJLL5XFixfLr3/9aznqqKPksssuM7YDAAAAAAAAAFBMFcV64NbWVvnVr34l99xzj8yfP9/Y9pnPfEbWrVsnFRUVUl1dLV/60pckFArJddddJ88++6w8/vjjcu655xZryAAAAAAAAAAAFG/G+po1a6Surk6OPfbYoW06S/3mm282wvVFixYZobrSf48++mhZu3ZtsYYLAAAAAAAAAEBxZ6y/8847MnXqVFm+fLncdddd0tfXZ8xGv/zyy6WlpUUOO+ywhNs3NTXJX/7yl6z3adZnN2kgn7yt0NuL8Zi+2yd9LVK22txuuT+39sktfnk9Mm3301hc26eRjplYVCQeS3/79p32H7OyVqSmYcTbu8Uvz69b2/00Frfem1K2Dd6f396XBoZWYq9TEI69Ap7nchmjW0rudSrFY499Yp98NBb2qYT3yc61kUfXTIU8z/nueefYY58Cut3N/06BoClasK710rds2SIPPvigMUtdw/SvfvWrUltbK11dXVJVVZVwe/1em51mov9Rt7W1Jdx+1KhRxn1Zf6+mpsb4OnDggPT39w9t19vq7+zfv1+i0ejQ9tGjR0tlZaW0t7cnvHHU19dLOBxOeEzV0NAgsVhMOjo6Et5kdLs+nj6uKRKJGPejHypY68drKRydzd/T0yPd3d2B2Ke6wfvX7ZFw2Lhf/R3r7bNt1236s/2Dj+3WPhlXeSGRaP/wbY3nviJi/Mx6H3o7fU2MMUYTx6hK4XUKwrFnHkvmOFXC66Tj1Ncp2i/x3gMJJ/FQRbVIpMq47/4XfiAS7ZVw1x6pCIeM5yYWqpT4qHEiobDxeBWRsETbdkq0ola6jr1CpHpM1n1SemxYxx6OhI0xGGO0HHrG2DMce6XwOgXh2Ku17JOOe+h1sr4eeiylOcaSjz19b3Jzn5TxvheLJ45Rj9UMx1i67Sror1NQjj3zvck8lszzVsLrl2W79Vhye5/Snbf0+NDjK+GcGw4Zj5np2CuF16kUjz32iX1in9gnL/bJPM/ZvTYaGqf599Pg+UbPc27vk5Nro2zX5aXwOpXiscc+BX+fGhsbE/YPKCeheLqPnzzwgx/8wGhY+rvf/c6Yua7uvfdeeeCBB2TGjBly+OGHy1VXXTV0+29961uyadMmY3Z7sjPOOMP4d9WqVQnb+eTRw336wanuzOS79BlXx37hoxeKGx5a+pBvXo9M2/00lry22z2Won0i/d0idRONMF2aZkmocfrw7WeeKrLhNyL9PRKSkMTHTBY54iMikcqBn0f7JLRxhcT3/GXgfs74mkj9pKxjdOt4evDDD2be/wBu99NYMh1Lec9YH3xvcmuMxXhvKvrrUYDtnj6mR+e5XMZ4wcoLxM/vTX46Ztgn9ino2/00FvapxPZphPOc7XOfy9dMhTzP+eJ5d3m7n8bCPpXPPun3QLkq2oz18ePHGw1KzVBdHXroobJz506j7vqePXsSbq/fT5gwIet9pvuPOdN/4IXcXozHLPR2x/eRdquN7Un359Y+ucFPr0em7X4ai1vbRzxmNFSfPF9k7Mzh7d1tIpt+N/Az/Wo8WELzPiZSMbgSpr9XZP3PRbrbJVQ52pjZbhx71tnvZX4sOd3up7Fk3Z52a/rtCdtsHBvFeF9y+ri+ez2CfOwV8DwXhGPJ6XY/HTNubffTWNza7qexuLXdT2Nxa7ufxuLWdj+Nxa3tfhpLTtttbgv6NZPvnneOPfYp4NuBclO05qULFiwwlqy89dZbQ9s2b95sBO36s9dee23oEzD999VXXzW2A0CCpllDobqYofq2l42Z6obGg0XSheqt7wx8r9vrsn9oBwAAAAAAAPgiWJ85c6aceuqpcs0118jGjRvl97//vVEe5qKLLpIPfvCDRh2nm266Sf76178a/2p9qA996EPFGi4APzJmo09PDdW1TIytUL1aZO45IpHqIgweAAAAAAAAQVW0YF3deuutMn36dCNM//KXvywXX3yxXHLJJUYDhrvvvlvWrFkj5557rqxbt84I3c3mgQAwFKznE6ovuEikfmIRBg4AAAAAAIAgK1qNdbOb8C233JL2Z/Pnz5eHH37Y8zEBCKDkUH3UWHuhujY07Wgu3rgBAAAAAAAQSEUN1gEgbz0dIi0bE0P1qYvsherWYB4AAAAAAACwiWAdQHDFYyI714qEIomherjCfqi+7RWR7tYiDB4AAAAAAABBVdQa6wCQl74ukWh/7qH6lhcGvgAAAAAAAAAHCNYBBFg8fage67cXqm9+pghjBgAAAAAAQNARrAMIttrG1FB9+xpnoXpNo8eDBgAAAAAAQJARrAMILq2tPml+aqjeuc9+qD7jBJGaBo8HDgAAAAAAgCAjWAcQXJU1IuFI7qH6zFNFpi32eNAAAAAAAAAIOoJ1AAEWSh+qRyrtheo6Wx0AAAAAAABwiGAdQLDFoqmh+rRjnIXq0R4PBwwAAAAAAICgGyxMDABBFBdpfl2krysxVLfWTB8pVO/YJbJ/t4djBgAAAAAAQNAxYx1AcPV1i3S15h6qt+8U2bBcJB7zcNAAAAAAAAAIOoJ1AMEVj+YXqq97QKS/18MBAwAAAAAAoBRQCgZAsEUqUkP1fZtFOppthOqDtdUrqj0cMAAAAAAAAIKOGesAAiwkMnlhaqje8qb9UL1hisjoCR6OGQAAAAAAAEFHsA4guCprRarrcw/VGw8WmbNMJMRbIQAAAAAAAOwjTQIQXNZAPJdQfd7HRCqqPBwwAAAAAAAASgHBOoDgSw7Vx892FqrHYx4PGAAAAAAAAEFG81IAwda6VaRtW2KoPnam/VC9v1fkwG6PBw0AAAAAAIAgY8Y6gOCK9ors3ZRfqL5xxfDPAQAAAAAAABsI1gEEO1jPJ1Rf/3ORth0eDxoAAAAAAABBR7AOIPiSQ/XuNnuheus7qU1QAQAAAAAAgBGQJgEItqZZqaH6tpfth+q6vW5CEQYOAAAAAACAoCJYBxBckSqRxumpoXq0z2aoXi0y9xyRSHURBg8AAAAAAICgqij2AAAgr2A9n1B9wUUioVARBg4AAAAAAIAgY8Y6gOBLDtVHjbUXqo+ZXLwxAwAAAAAAILCYsQ4g2Ho6RFo2JobqUxc5C9U1mAcAAAAAAABsIlgHEFzxmMjOtSKhSGKoHq6wH6pve0Wku7UIgwcAwF8uWHmBK/fz0NKHXLkfAAAAwM8oBQMguPq6RKL9uYfqW14Y+AIAAAAAAAAcIFgHEGDx9KF6rN9eqL75mSKMGQAAAAAAAEFHsA4g2GobU0P17Wucheo1jR4PGgAAAAAAAEFGsA4guLS2+qT5qaF65z77ofqME0RqGjweOAAAAAAAAIKMYB1AcFXWiIQjuYfqM08VmbbY40EDAAAAAAAg6AjWAQRYKH2oHqm0F6rrbHUAAAAAAADAIYJ1AMEWi6aG6tOOcRaqR3s8HDAAAAAAAACCbrAwMQAEUVyk+XWRvq7EUN1aM32kUL1jl8j+3R6OGQAAAAAAAEHHjHUAwdXXLdLVmnuo3r5TZMNykXjMw0EDAAAAAAAg6AjWAQRXPJpfqL7uAZH+Xg8HDAAAAAAAgFJAKRgAwRapSA3V920W6Wi2EaoP1lavqPZwwAAAAAAAAAg6ZqwDCLCQyOSFqaF6y5v2Q/WGKSKjJ3g4ZgAAAAAAAAQdwTqA4KqsFamuzz1UbzxYZM4ykRBvhQAAAAAAALCPNAlAcFkD8VxC9XkfE6mo8nDAAAAAAAAAKAUE6wCCLzlUHz/bWagej3k8YAAAAAAAAAQZzUsBBFvrVpG2bYmh+tiZ9kP1/l6RA7s9HjQAAAAAAACCjBnrAIIr2iuyd1N+ofrGFcM/BwAAAAAAAGwgWAcQ7GA9n1B9/c9F2nZ4PGgAAAAAAAAEHcE6gOBLDtW72+yF6q3vpDZBBQAAAAAAAEZAmgQg2JpmpYbq2162H6rr9roJRRg4AAAAAAAAyq55aUdHhzzyyCPy1ltvyec//3lZt26dzJo1S6ZPn+7uCAEgk0iVSOP01FA92mczVK8WmX2WyJuPFWHw8JVYVCQey/zzjmZ791NZK1LT4NqwAAAAAABACQXrf/7zn+VTn/qUTJ48eej/P/HEE/L444/L3XffLccee6z7IwWAdMF6PqH6gotEQqEiDBy+C9V7OkTCkYFjynpcadje1yWy6obh42b0hOHyQfrzA7uHV0iMHidy8hcJ1wEAAAAAKHE5Bes33nijXHTRRXLllVfKUUcdZWy7+eabZezYsXLLLbfIL3/5S7fHCQCZJYfqo8baC9XHTLY/ExmlS8NxDdUPPm6gXr9Jw/ada0Wi/SK1TSINU0TmLEs8rjauEOntFKmsE4n1i4QrBoJ4gnUAAAAAAEpaTjXW169fL+ecc07K9gsvvFD++te/ujEuALBHw8/kUH3qInuhujWYR3nTWeoaqmsgbobiLRtFQpGBY2biESLHfFbkoOki9ZNEaseKvP2sSHe7SHWdyOgmkYUfF4lUF3tPAAAAAACAX4N1nZmutdWTvfrqq9LU1OTGuADA3kxjY0ZxUqius4bthurbXhHpbi3C4BGYskJ2V0DUTyzCwAEAAAAAQGBKwXz2s5+Vr3zlK/K5z31O4vG4vPjii/Lwww/LfffdJ//8z//s/igBIB0tuaFlOioiuYXqW14Y+AKyhep2V0BQVggAAAAAgLKRU7CuJV8mTJggP/rRj6Smpsaoq37ooYfK17/+dTnrrLPcHyUApBUf+Cc5VNda13ZC9c3PFGHM8HVZIS3/ks8KCMoKAQAAAABQFnIK1n/4wx/K0qVL5f7773d/RADgRG1jaqi+fY1I4wz7oXpNo8eDhm/LCmlNdUVZIQAAAAAA4HaN9bvuukv6+gZn9AFAsWgIOml+aqjeuc9+qD7jhOFmlShfZlmhfFZAUFYIAAAAAICykVOwrrPVv//978vbb78tvb297o8KAOyorBEJR3IP1WeeKjJtsceDRuDKCulxRVkhAAAAAACQbymYZ599Vnbs2GE0LE3nT3/6Uy53CwAOhdKH6pFKe6G6zlan4SRGKiukx5WWFqKsEAAAAAAAyCdY/8Y3vpHLrwGA+2LR1FB92jH2QnVTtMfDAaOkywo1r/d44AAAAAAAIDDB+rHHHmv8q6VgNm3aJLFYTA499FA57LDD3B4fAGQRF2l+faA+tjVUt9ZMHylU79glsn+3h2NG4MoK2V0BMfZQgnUAAAAAAMpETsF6e3u7XHPNNfLUU09JQ0ODRKNROXDggBxzzDFy5513Sn19vfsjBYBkfd0iXa0Ds4lzCdXbd4psWC4Sj3k7bgSrrJDdFRCUFQIAAAAAoGzk1Lz0xhtvlObmZnnsscfkpZdekldeeUVWrFghnZ2dcvPNN7s/SgBIJx4d+DfXUH3dAyL9NGDGCGWFnBxXlBUCAAAAAKAs5DRj/Xe/+53cc889MnPmzKFtWgbmq1/9qnz2s591c3wAkF2kIjX83Lc5cfZwxlB9MATVGe8oc5QVAgAAAAAABZ6xXl1dLeFw6q+GQiGjLAwAeCMkMnlhaqje8qb9UL1hisjoCR6OGb4uK6QoKwQAAAAAAAoRrJ9++unyb//2b7J169ahbdrIVEvEnHLKKbncJQA4V1krUl2fe6jeeLDInGUioZzeClEuZYX0uKKsEAAAAAAAsMgpTfriF79ozFo/88wz5bjjjjO+PvjBDxqNTK+//vpc7hIAnLMG4rmE6vM+JlJR5eGAEciyQk6OK8oKAQAAAABQFnKqsT5mzBj56U9/Km+++aZs2rTJCNkPPfTQhJrrAOCZ5PBz/GxnoTrlO+BWWSGznAwAAAAAAChpOQXrvb29cvvtt8vUqVPl4osvNrade+658r73vU/+8R//USorK90eJwCk17pVpG1bYqg+dqb9UF3Ldxyg4WTZc6Os0CEni6y938NBAwAAAACAQJWC0Vrqq1evljlz5gxt+/znPy/PPPOMfPOb33RzfACQWbRXZO+m/EL1jSuGf47yla2skNMVEAAAAAAAoOTlFKw/8cQTcuutt8qiRYuGti1ZskRuvvlmeeyxx9wcHwBkD9bzCdXX/1ykbYfHg4avpQvVnRxXlBUCAAAAAKAs5FQKJh6PS09PT9rtfX19bowLAOxLDj+72+yF6q3vpM5WRvmirBAAAAAAALAppzTpAx/4gFx//fXyyiuvSGdnp/H16quvyg033CDvf//7c7lLAMhN06zUUH3by/ZDdd1eN6EIA4evUFYIAAAAAAAUesb6NddcI9ddd5186lOfklhsYNl7JBKRs88+W6699tpc7hIAnItUiTROTw3Vo302Q/VqkdlnibxJCauyl62skN0VEJQVAgAAAACgbDgO1vfs2SMHHXSQ3HbbbdLe3i5vv/22vPzyy1JdXS3nnnuujBo1qjAjBYB0wXo+ofqCi0RCoSIMHL6VLlTX42rikQPfU1YIAAAAAAA4KQVz4MAB+dznPicnnXSSEaarp556Si688EK5//77ja9ly5ZJc3NzIccLAKmSQ/VRY+2F6mMmF2/MCE5ZIdsf1lBWCAAAAACAcmE7WL/jjjtk+/bt8rOf/Uxmzpxp1FW/8cYbZf78+fLb3/5W/vd//1dOPPFEufXWWws7YgCw6ulIDdWnLnIWqmuAivLmRlmhueeIRKqLMHgAAAAAAODbYP2JJ54w6qovWrRIQqGQPPfcc8Ys9ksuuUQqKyuN22gpGN0OAJ6Ix0R2rk0N1cMV9kP1ba+IdLcWYfAITFkhuysg6icWYeAAAAAAAMDXwXpLS4tMnz48m+/55583GpbqLHXTuHHjpKury/1RAkA6fV0i0f7cQ/UtLwx8AdlCdacrIAAAAAAAQMmzHaxPnDhR3nlnIEiIx+OyevVqWbBggTQ0NAzd5rXXXpPJkwkXAHglnj5Uj/XbC9U3P1OEMSNwZYWcfFhDWSEAAAAAAMqC7WD97LPPlptuusloWPof//EfsnPnTvn4xz8+9PONGzfKbbfdJh/84AcLNVYASFXbmBqqb1/jLFSvafR40PAdygoBAAAAAAAHBhODkV1++eWyf/9+ufbaa40a61deeaUsXbrU+Nk3v/lNueeee+TUU081bgcAnghFRCbNTw3VO/eJNM6wF6rPOEGkeb33Y4c/ywpVRHJfAUFZIQAAAAAAyobtYL2iokKuueYa4yvZOeecI8uWLZO5c+e6PT4AyKyyRiQcSQ3VlZ1QfeapImMPJVhH9rJCelzpBzWKskIAAAAAAMBJsJ7N7Nmz3bgbAHAolD5Uj1TaC9V1tnpHs8djRuDKCjlZAUFZIQAAAAAAyoLtGusA4EuxaGqoPu0Ye6G6Kdrj4YARuLJCym5ZoZrhht4AAAAAAKB0EawDCLC4SPPrqaG6NdwcKVTv2CWyf7eHY0bgygrZXQExbbHHgwYAAAAAAMVCsA4guPq6Rbpacw/V23eKbFguEo95OGgErqyQ0xUQAAAAAACg5BGsAwiueDS/UH3dAyL9vR4OGIEsK+TkuKKsEAAAAAAAZcGV5qUAUDSRitTwc9/mxKakGUP1nuH62Shzg2WF+roGvqWsEAAAAAAAyIIZ6wACLCQyeWFqqN7ypv1QvWGKyOgJHo4ZvkRZIQAAAAAA4ADBOoDgqqwVqa7PPVRvPFhkzjKREG+FZS9bWSE9rigrBAAAAAAALEiTAASXNRDPJVSf9zGRiioPB4xAlhVyclxRVggAAAAAgLJAjXUAwZccfo6f7SxUp3wH3CorZJaTAYBM7j7Fvfu6bLV79wUAAADAEWasAwi21q2pofrYmfZDdS3fcYCGk2WPskIAAAAAAMABEgAAwRXtFdm7Kb9QfeOK4Z+jfGUrK+R0BQQAAAAAACh5BOsAgh2s5xOqr/+5SNsOjwcNX0sXqjs5rigrBAAAAABAWaDGOoDgSw4/u9vsheqt7wx8T/kOmGWF2rYNf09ZIQAAAAAAkAFpEoBga5qVGqpve9l+qK7b6yYUYeDwFcoKAQAAAACAIAbrl156qVx99dVD32/YsEHOP/98WbBggZx33nnyxhtvFHV8AHwoUiXSOD01VI/22QzVq0XmniMSqS7C4BGYskJ2V0BQVggAAAAAgLLhi2D90UcfldWrVw9939nZaQTtixcvll//+tdy1FFHyWWXXWZsB4CEYD2fUH3BRSL1E4swcPhWulDdyQoIygoBAAAAAFAWil5jvbW1VW655RaZN2/e0LbHHntMqqur5Utf+pKEQiG57rrr5Nlnn5XHH39czj333KKOF4APJYfqo8baC9XHTBbpaC7euEvEsjuec+V+VlxxoviyrJDtD2soKwQAAAAAQLko+tS6b37zm3L22WfLYYcdNrRt3bp1smjRIiNUV/rv0UcfLWvXri3iSAH4Uk9Haqg+dZG9UN0aoKK8UVYIAAAAAAAEZcb6Cy+8IK+88oqsWLFCbrjhhqHtLS0tCUG7ampqkr/85S9Z7y8ejyd8r4F88rZCby/GY/pun/S1SNlqc7vl/tzaJ7f45fXItN1PY3Ftn0Y6ZuIxkZ1rRUKRge2jxkpcQ/VwxcCxpHWzX/+5hNq2SVx/o6JGZMGFIvWTjJ8bj7ntFZHu1oHbDz5+uR9LuWzP47/6xHsp5NizjSaprFBo28sST1gBcb5IpHLgtloW5nWtqW6G6jUSWnDR4H3HPT2WjEcs5/eIYo1lpCM7Fh14f0p3e22Ea3efKmtFahpGHKNbSu51CsKxl++75wjvNU73yS0l9zr5aLufxsI+lfA+pXkPSthmOc+l3HbwPGfrMYt8nvPd886xxz4FdLub/50CQVO0YL2np0e+9rWvyVe/+lWpqalJ+FlXV5dUVVlCDhHj+95eS3O5JPofdVtbW8LtR40aZdyX9ff0sfTrwIED0t/fP7Rdb6u/s3//folGo0PbR48eLZWVldLe3p7wxlFfXy/hcDjhMVVDQ4PEYjHp6OhIeJPR7fp4+rimSCRi3E9fX19C/fiKigqpq6sznqPu7u5A7FPd4P3r9kg4bNyv/o719tm26zb92f7Bx3Zrn4yrvJBItH/4tsZzXxExfma9D72dvibGGKOJY1Sl8DoF4dgzjyVznCrhddJxDr5O0tcpoWi/bpB47UESGgzV9We9nR0S+eOvJNy+3bifWLhK+uacK1I1Vt+AjG2VO16R2NvPSayvXzo72iUeq826T0qPDevYw5Gw8TwbY7QcesbYMxx7pfA6JYob2U5M/8ga3qvB/54ybU98L5DB/87c3Kdayz7puIdeJ+vroWMx/9DSr67WgZnqsYH71ONKphwtvf1xo856TUVY4usekti+LQM/r6iW2BHnSfWYyRJr3S7R3j7pGjyWsr1Oynjfi8UTxxgOZTzG0m1X5fYeUax9Mt+bzGPJPG8lvH66vb9v4L2pun5oPBo+xHv1MeMSffx6Y4VDpGGy/pL09fYZPw917jGOsYrKColLSPqrx0qssla6539SQjUNWfcp3XlLjw89vhLOueGQMc5Mx14pvE5BOPbsXjNlPMYs2/W6yc19cnJtZBxjGbaXwutUisce+8Q+Zdsn873JvDZKd82kon29Eop2S6iqbmi/dDKLMfFAf67nudpGqaxrMq4BjWvhaK+EDrRIKBQ3ruNjoUqJ1oyVWKTSOM9V1Y/Luk9Oro2yXZeXwutUisce+xT8fWpsbEzYP6CcFC1Y/+53vytHHnmknHTSSSk/0/rqySG6fp8cwFuZb0LJamtrja9k+iaRjr4xpTNmzBhbj2n8QREOpx2LvvGl265vVum26/OgX4HYp8ELLevvmBdfdrbr/avkx8h7n0KWID1lMOm3G2NMs70kXqcg7JPl+DBnwGQ6lszXV2cUm6G68bN4VKrf/I3IgZ0iGj5WVEt4wYVSXW8p/7L1BZHNqyUcCku4skLG1I8RqW8YcZ/MMDNZujEa29McSyXxOiVuNf5QSv86ZX790m13dZ/S3H/asZgD7d0v0rLRCNWNY89yXBlj0hUQ638uofZtA6/r0AqIyUOvazjWJZVJx1KmfTLe98L2j7FM28vuPaJY+5R07JjnrWRhPXhqxogc/gGRqtED5ap2rh34EFDHNW2hyBEfGVgBEQpJpZYV+tMj+vHtwD4ZZYXOlspwRORPK6S6tlJEj6ks+5TpvKVheUTvx+axVxKvUxD2yeY1U8ZjzLLd+jhu7JPTa6NM20vidUrCPrFPJb9PSdfgye9LQ9fleqKrqB8+z7VuFdm7aeiyvGLeOQNlGfU5iMclvH+XyIbfiITHDdygYaqE5yyTcE/b8HlucD8y7ZPTa6NM1+Ul8ToljZ99Yp/8sk9AuSpasP7oo4/Knj175KijjjK+N4P03/72t7J06VLjZ1b6/YQJ2ZvCpVt+kmlJSiG3F+MxC73d8X2k3Wpje9L9ubVPbvDT65Fpu5/G4tb2EY+Z2saBi3czVNeZxtvXiDTOGLjVYE31kLWm+paBUH3gfkIiNY0Dx57l8cv9WHK+Pef/6j0cY5bRJJUVSv6wxqi1vv4XRk1145hJU6s/pMedlhXy8FjKdv9l8x5RrLGk3WrZrmGD0g9rQhEJafioZYWOvTShVn9Ia/Vr+K4z3K3vV0Zj5cFjydJzppBK8nUKwj6l3Wpzu433mnK/ZvLTWNza7qexuLXdT2Nxa7ufxpLT9pG26Xmuc69I27aBD4XV+NkiRywdvr2Whdn0u4FeNkY/m4GeNSE9D+p5TmfBFvk857vnnWOPfQr4dqDcFC1Y/+lPf5qw/OTWW281/r3qqqvk5Zdflv/+7/82lpvof6z676uvviqf+9znijVcAH6kIeik+cPhpxmqd+4bCNbTNSo1QvVnhr+fcYJI83rvxw5/6evSuj1GWaGhBrjW42qkBrh6XOkXYNKwXEN1GisDAEqRzlRv2zb8vYbqY2cOf6+h+roHjDJnGRvBH9jt8aABACiRYH3q1Klpl6PMmDHDaFT67W9/W2666Sa58MIL5cEHHzTqQ33oQx8q0mgB+FJlja4BTQ3VlZ1QfeapImMPJVjHcNutdKH60AoIm8cVkGYFRMJxZSdUNxsrAwDgN1oeb++mgfNXrqH6xhXDPwcAIKDSFyUrMq3tdPfdd8uaNWvk3HPPlXXr1skPfvCDoeaBADAglD5Uj1TaC9V1tjqQoaxQTh/WaFkhwFwBkWuozgoIAIDfg3VTLqG6ngfbdng8aAAASmjGerJvfOMbCd/Pnz9fHn744aKNB0BAxKKpofq0Y5yF6lFmy5S9bGWFFGWF4NYKCDuhOisgAABBkByqawkzO6G6eR4M+XKeHwAAtnEmAxBgcZHm11ND9ZoG+6F6xy6R/dR3LHvZygrZXQExbbHHg4avZVoB4SRUZwUEAMCvmmalhurbXrYfquv2uglFGDgAAO4hWAcQXH3dIl2tuYfqulR1w/KBesgoc1nKCjldAQG4tQLC+n4GAIBfRKpEGqenhupmw+4RQ/VqkbnniEQGa7QDABBQBOsAgisezS9UN5aqWmpEorxlKivk5LiirBDcaqzMCggAgJ+D9XxCdT0P1k8swsABACjRGusAkJNIRWr4uW+zSEezjVC9Z/gCH2VusKyQNp1UlBVCsRsrW9/DAADwo+RQXXuL2AnV9TzIeQ4AUAKYsQ4gwEIikxemhuotb9oP1RumiIymvmPZo6wQ3EZjZQBAKevpSA3VtbeInVDdGswDABBgBOsAgquyVqS6PvdQXZeqzlkmEuKtsOxlKyukxxVlheAIjZUBACVMJxLsXJsaqpu9ReyE6tteEekenNQAAEBAkSYBCC5rIJ5LqG5dqgpkKivk5LiirBAUKyAAAKVMS+dF+3MP1fU8qF8AAAQcNdYBBF9y+Dl+trNQnfAKbpUVMsNUlDcaKwMASlo8faiuvUXshOrW8yAAAAHGjHUAwda6NTVUHzvTfqiu4dUByi2UPcoKwasVELZCdVZAAAB8rrYxNVTX3iJOQvWaRo8HDQCAu0gAAARXtFdk76b8QvWNK4Z/jvKVrayQ0xUQAI2VAQClLBQRmTQ/NVQ3e4vYCdX1HGg9TwIAEEAE6wCCHaznE6rrUtW2HR4PGr6WLlR3clxRVgiKFRAAgFJWWSMSjuQequt5cNpijwcNAID7+IsNQPAlh5/dbfZCdXOpKuEVFGWF4BYaKwMASloofaiuvUXshOrW8yAAAAFGmgQg2JpmpYbq2162H6rr9jrKLZQ9ygqhEGisDAAoVbFoaqiuvUWchOpRrpsAAMFGsA4guCJVIo3TU0P1aJ/NUL1aZO45IhEaBJa9bGWF7K6AoKwQrFgBAQAoWXGR5tdTQ3VrzfSRQvWOXSL7Oc8BAIKNYB1AsIP1fEJ1XapaP7EIA4dvpQvVnayAoKwQFCsgAAClrK9bpKs191Bdz4MblrMyCwAQeCQAAIIvOVQfNdZeqG5dqgpkKitk+8MaygphEI2VAQClLB7NL1Q3zoOWcyUAAAFFsA4g2Ho6UkP1qYucheoaoKK8UVYIhUBjZQBAqYpUpIbq2lvEVqjeM3z9BABAgPEXG4Dg0uWjO9emhurhCvuh+rZXRLoHl7KifGUrK2R3BQRlhWBFY2UAQMkKiUxemBqqW3uLjBSqN0wRGc15DgAQbATrAIKrr0sk2p97qK5LVfULyBaqO10BAbACAgBQyiprRarrcw/V9Tw4ZxkrswAAgTeYQAFAEMXTh+qxfnuhunWpKqBlhVo25rcCgrJCcKuxcihUhIEDAGCDNRDPJVTX82DXPg8HDABAYfARMYBgq21MDdW3r3EWqtc0ejxo+A5lhVAINFYGAJSy5FBde4vYCdXN86BefwEAEGAE6wCCKxQRmTQ/NVTv3Gc/VNeLf2t9SJSnbGWF7K6AoKwQrGisDAAoZa1bU0N1a2+RkUJ1PQ8e2O3xoAEAcBfBOoDgqqwRCUdyD9V1qeq0xR4PGoErK+R0BQTACggAQCmL9ors3ZRfqL5xxfDPAQAIKIJ1AAEWSh+qRyrtherWpapAprJCTj6soawQFI2VAQClHqznE6rrebBth8eDBgDAfTQvBRBssWhqqD7tGGehepTZMmXPrbJCzes9Hjj8icbKAID0lt3xnCv3s+KKE6XokkN1LWFmJ1Q3z4PWJqgAAAQQwTqAAIuLNL8+MDvUGqpba6aPFKp37BLZT33HspetrJDdFRBjDy1IsB6NRSUm6Zt7tXS22L6fmooaqa+qd3FkyGkFROOMge9ZAQEACLKmWamhuvYWmXikvVBdt9dNKMLAAQBwD8E6gODq6xbpah0IqHIJ1XWp6oblA/WQUeaylBWyuwKio7kgofqBvgNGKF4ZrhzaHovHpDvaLbe/ervxfVW4SppqmyQ8OPNLf763a6/0xgaWaoclLFPqpsgn5n6CcL3YKyA0WGcFBAAgyCJVIo3TU0N1s7fIiKF6tcjss0TefKwIgwcAwD0E6wCCKx4d+DfXUN1YqmqpEYnylqmskJPjyuWyQjpTXUP1M6afIaMqRxnb9vftlw17N0h/rF8aqxtl4qiJsmT6EqnU8ernTdE+WbV1lXT1d8koGSVVkSo5dtKx8tru16S7v5tgPSiNlQu0AgIAAFeC9XxCdT0PhgYnNQAAEGAE6wCCLVKRGn7u25w4ezhjqN4zfIGPMuffskI6U11DdQ3EO3o7ZFPrJomEIhKJROSwxsNk6cylCaH6ys0rjfB9dOVoI1T/yKyPSEhCRrCOADVWLsAKCAAAXJUcqmtvETuhup4HOc8BAEoA3UIABFhIZPLC1FC95U37oXrDFJHR1Hcse2ZZIeXTskIaqq9rWWfMVFc6Wz1dqL7zwE7jezNUnzCK47soaKwMAChlPR2pobr2FrETqluDeQAAAoxgHUBwVdaKVNfnHqrrUtU5y0QG61KjjGUrK6THVZHLCukM9ORQfd64eY5CdQ3m4fEKiHzKCtFYGQDgVzqRYOfa1FDd7C1iJ1Tf9opI9+CkBgAAAoo0CUBwWQPxXEJ161JVIFNZISfHVQHKCmkjUrOmujVUjwzW8LYTqq9vWS/tve2ujw3BXQEBAEDOtHRetD/3UF3Pg/oFAEDAEawDCL7k8HP8bGehOuEVfFxWqDvanVeo/uquV2XN7jWujwtZ0FgZAFDS4ulDdb1esROqW8+DAAAEGME6gGBr3Zoaqo+daT9U1/DqAOUWyp6PywrF4/G0oXo0FrUVqr+480XXx4Q8VkDYCtVprAwA8LnaxtRQXXuLOAnVaxo9HjQAAO4iWAcQXNFekb2b8gvVN64Y/jnKV7ayQk5XQBRAQ1VDSqi+fs96R6H6mKoxBRsfgrMCAgCAvIUiIpPmp4bqZm8RO6G6ngOt50kAAAKIYB1AsIP1fEJ1XaratsPjQcPX0oXqTo6rApQVioQiMmfsnJRQvbWn1XaovmjCIqmvsszIR9mugAAAIG+VNSKD1yU5hep6Hpy22ONBAwDgPv5iAxB8yeFnd5u9UN1cqkp4BR+XFaqpqMkrVD9+8vEyb/w818eFLGisDAAoaaH0obr2FrETqlvPgwAABNjg2i0ACKimWamh+raXRSYeaS9U1+11lFsoe2ZZIbOmtQ/LCiWH6hXhCluh+tETj5aWzpaCjQtZ0FgZQKHdfYo793PZanfuB+UjFk0N1bW3iJNQPUo5RgBAsDFNE0BwRapEGqenhurRPpuherXI3HNEIjQILHvZygrZXQFRwLJC6UL1BeMX2ArVTb3WfUTZroAAACB/cZHm11NDdWvN9JFC9Y5dIvs5zwEAgo1gHUCwg/V8QnVdqlo/sQgDh2+lC9X1uCpyWaGN+zamhOrWmukjhep7uvbI3q69BRkb0qCxMgCglPV1i3S15h6q63lww3JWZgEAAo9gHUDwJYfqo8baC9WtS1WBTGWFbH9YU5iyQt393dLW25ZzqL67c7c8seUJiQl/vHqGxsoAgFIWj+YXqhvnQVbSAQCCj2AdQLD1dKSG6lMXOQvVNUBFefNxWaHo4B+vuYbqj2x6hDIwxUJjZQBAqYpUpIbq2lvEVqg+eB40e9sAABBQ/MUGILh0+ejOtamherjCfqi+7RWR7sGlrChf2coK2V0BUcCyQulC9a3tWx2F6lVhyz6ieCsg7IbqNFYGAPhWSGTywtRQ3dpbZKRQvWGKyGjOcwCAYCNYBxBcfV0i0f7cQ3VdqqpfQLZQ3ekKCJeFQiGZ2zQ3JVTf3LbZdqg+cdREaaptKtgYEZwVEAAA5K2yVqS6PvdQXc+Dc5axMgsAEHicyQAEWDx9qB7rtxeqW5eqApnKCjn5sKYAZYVqIjVSV1mXc6g+efRkWTJ9iYT549U7NFYGAJQy6zVFLqG69TwIAECA8Vc2gGCrbUwN1bevcRaq1zR6PGj4jo/LClkD8VxC9aUzl0qlNheD92isDAAoZcmhuvYWcRKq6/UXAAABRrAOILhCEZFJ81ND9c599kN1vfi31odEecpWVsjuCogClxVKDtVnNsx0FKrH+OPVWzRWBgCUstatqaG6tbfISKG6ngcP7PZ40AAAuGswNQA8Eotmn5nQ0eysth+BaHmrrBEJR3IP1XWp6thDRZrXezxwBKqskB5XjTOKWlZoe8d2ae5sTgjVp4+ZbjtU74v2yd6uvQUdI9KsgNAP/3y2AgIAgLzp9cbeTQPnr1xD9Y0rhn8OAEBAEazD21BdZ4VqWYN4dHBjaCAgN0sdvHLPwAw9a5igZTqsAXq0R2T/bpHag0RO+RLhelkLpQ/VNUy0E6rrbHUnH+agPMsK6XGlwXqRygr1xfpkS8cWqR5sZJlLqL5q6yrpjQ38HB6ugKiI0FgZAFB6Bq85cg7V9TzYtsPjQQMiF6y8wLX7emjpQ67dF4DgIliHtzP4NEAfPW6gsVukQmTywsSO8k2HDYQJtU0D32vwOW3x8M87dolsWC6iAVPn3oHwgmC9vOkHNsmh+rRj7IXq1g9rUN7cKitUgNUPZmCea6i+cvNK2dW5y/VxIRsaKwMAykByqK4TpOyE6uZ5kMbqAICAI1iHt3Smuobq1XUD4ac1FNfmNxqU688ydZTf9NTA71eO0oTC+/HDZ+Iiza8PHDfWUN16XI0UquuHNboCAuUtW1khuysgClxWKDlU7+jtsBWq7zyw0/g+TFsVf6yAcFJWiMbKAAC/apqVGqprb5GJR9oL1XV73YQiDBwAAPfwVza8pzPV04Xq1uY36UJ16+yHhikio7kQK3t93SJdrbmH6npc6QoImjoiW1khpysgCmBG/YyUUH1dyzrboXpVpEqazJVAKDwaKwMASplOdGqcnhqqmw27RwzVq0XmnjOwChkAgAAjWIfHQgPlX/IJ1fVCbc4ylg5iuFZ/rqG6cVxRdxojlBVyclwVoKyQhuJT66emhOr9GtbaDNXPnHGm8S8C1FjZWgYNAAA/sV5T5BKq63mwfmIRBg4AgLsoBQNvaaNSa031XEJ1vVDrGgwogEwrIKxNSUc6rvQCH2XOv2WFKsOVeYXqH5n1EQmZM/LhERorAwDKQHKorr1F7ITqeh7kPAcAKAFM+YW3rLPMk0N1bX5jJ1Q3L9Qo3wE3VkBQVggBKSuUHKo3VjfaCtUnjOL4LgoaKwMASllPR2qorr1F7ITq1mAeAIAAI1hHcaQL1a3Nb0YK1fVC7QANJ8ueGysgKCuEkcoK6XFV5LJC+/v2p4Tq88bNcxSqazAPj1dA5FNWiMbKAAC/0okEO9emhupmbxE7ofq2V0S6Byc1AAAQUKRJ8F7r1vxD9Y0rhn+O8pVtBYTdskLmcQW40Vi5AGWFYvGYbNi7ISVUjwzW8LYTqq9vWS/tve2ujw3BXQEBAEDOtHRetD/3UF3Pg/oFAEDAEazDW9Fekb2b8gvV9UKtbYfHA4evUVYIJVxWqDvanVeo/uquV2XN7jWujwtZ0FgZAFDS4ulDdb1esROqW8+DAAAEGM1LA2TZHc+5cj8rrjhRihqsZwrVtcaenVDdvFCjfAfMFRBt24a/p6wQilVW6JCTRdbe7/rQ4vF42lA9GovaCtVf3PmiBEVJnOdMNFYGAJSy2sbUUF17izTOsB+q1zR6PGgAANxFMoniSBeqa/Mbu6G6bq+jIV/Zc2MFBGWF4HZj5QJoqGpICdXX71nvKFQfUzWmYONDcFZAAACQt1BEZNL81FDd7C1iJ1TXc6D1PAkAQAARrMN7TbPSh+pm85sRQ/VqkbnniESYyVf2sq2AoKwQitFYuQBlhSKhiMwZOyclVG/tabUdqi+asEjqqywz8lFYNFYGAJSyyhqRweuSnEJ1PQ9OW+zxoAEAcB9/scFbkSqRxun5hep6oVY/sQiDh29RVgh+aaxcgLJCNRU1eYXqx08+XuaNn+f6uJAFjZUBACUtlD5U194idkJ163kQAIAAI02C98F6plBdm9/YCdWtF2pAphUQlBVCiZUVSg7VK8IVtkL1oyceXbAxYQQ0VgYAlKpYNDVU194iTkL1KOUYAQDBRrCO4kgXqmvzGyehut4HypsbKyAoKwS3GisXsKxQulB9wfgFjkL1Xus+omxXQAAAkL+4SPPrqaG6tWb6SKF6xy6R/ZznAADBRrAO7/V0pA/VzeY3dkL1ba+IdA8ETChj2VZAUFYIxWisXKCyQhv3bUwJ1a0100cK1fd07ZG9XXsLMjYEcwUEAAA56+sW6WrNPVTX8+CG5azMAgAEHsE6vKUXTzvX5heq64WafgEmygrBF42VC1NWqLu/W9p623IO1Xd37pYntjwhMeGPV8/QWBkAUMri0fxCdeM8yEo6AEDwEazDW31dItH+9KG6Nr+xE6pbL9SATCsgKCuEEikrFB384zXXUP2RTY9QBqZYaKwMAChVkYrUUF17i9gK1XuGr58AAAgw/mKDx+KZQ3VtfuMkVK9p9HLgKNUVEJQVgluNlQtYVihdqL61faujUL0qbNlHFB6NlQEAJSskMnlhaqhu7S0yUqjeMEVkNOc5AECwEazDe7WN6UN1s/mNnVBdL9KsF3IoT9lWQFBWCMVqrOyyUCgkc5vmpoTqm9s22w7VJ46aKE21TQUbI4KzAgIAgLxV1opU1+cequt5cM4yVmYBAAKPMxm8FYqITJqfX6iuF2rTFns8cARuBQRlhVCMxsoFKCtUE6mRusq6nEP1yaMny5LpSyTMH6/eobEyAKCUWa8pcgnVredBAAACjL+y4a3KGpFwJH2ors1v7ITq1gs1INMKCMoKoUTKClkD8VxC9aUzl0qlvr/CezRWBgCUsuRQXXuLOAnV9foLAIAAI1iHx0KZQ3VtfuMkVI8OXqChfLmxAoKyQnCrsXKBywolh+ozG2Y6CtVj/PHqLRorAwBKWevW1FDd2ltkpFBdz4MHdns8aAAA3EWwDu/FoulDdWu4OVKo3rFLZD8XYmUv2woIygqhWI2VC2B7x/aUUH36mOm2Q/W+aJ/s7dpb0DEiGCsgAADIm15v7N2UX6i+ccXwzwEACCiCdXgsLtL8en6hul6obVjO0kFkXwFBWSEUo7FyAcoK9cX6ZEvHlrxC9VVbV0lvbODn8ACNlQEApWzwmiPnUF3Pg207PB40AADuI1iHt/q6Rbpa8wvVjQs1AiKMsAKCskIokbJCZmCea6i+cvNK2dW5y/VxIRsaKwMAykByqK4lzOyE6uZ5kMbqAICA40wGb8WjmUN1bX5jK1TvGQ4kUOZcWAFBWSG41Vi5wGWFkkP1jt4OW6H6zgM7je/DnPK9RWNlAEApa5qVGqprbxG7obpur5tQhIEDAOAe/sqG9yIV6UN1a/ObkUL1hikio7kQK3turICgrBDcbqxcADPqZ6SE6uta1tkO1asiVdJU21TQMSIYKyAAAMhbpEqkcXpqqG72FhkxVK8WmXuOSISJUgCAYCNYh8dCIpMX5heq64XanGUsHUT2FRCUFUIxGisXoKyQhuJT66emhOr9GtbaDNXPnHGm8S88QmNlAEAps15T5BKq63mwfmIRBg4AgLsGp1IBHqmsFamuzy9U1wu1rsGAAsi0AqKjefh7ygrBblkhbTrps7JCleHKvEL1j8z6iITMGfkITmNl63sYAAB+lByqa28RO6G6ngc5zwEASgBTfuEt6yzz5FBdm9/YCdXNCzXKd8CNFRCUFUJAygolh+qN1Y22QvUJozi+i4LGygCAUtbTkRqqa28RO6G6NZgHACDACNZRHOlCdWvzm5FCdb1QO0DDybLnxgoIygrBtcbKhSsrtL9vf0qoPm/cPEehugbz8AqNlQEAJUwnEuxcmxqqm71F7ITq214R6R6c1AAAQECRJsF7rVvzD9U3rhj+OcpXthUQdssKmccV4EZj5QKUFYrFY7Jh74aUUD0yWMPbTqi+vmW9tPe2uz42BHcFBAAAOdPSedH+3EN1PQ/qFwAAAUewDm9Fe0X2bsovVNcLtbYdHg8cvkZZIZRwWaHuaHdeofqru16VNbvXuD4uZEFjZQBASYunD9X1esVOqG49DwIAEGAE6/A+WM8UqmuNPTuhunmhRvkOuLUCgrJC8HFZoXg8njZUj8aitkL1F3e+6PqYkMcKCFuhOo2VAQA+V9uYGqprbxEnoXpNo8eDBgDAXSSTKI50obo2v7Ebquv2OhrylT03VkBQVghuN1YugIaqhpRQff2e9Y5C9TFVYwo2PgRnBQQAAHkLRUQmzU8N1c3eInZCdT0HWs+TAAAEEME6vNc0K32obja/GTFUrxaZe45IhJl8ZS/bCgjKCqEYjZULUFYoEorInLFzUkL11p5W26H6ogmLpL7KMiMfZbsCAgCAvFXWiAxel+QUqut5cNpijwcNAID7+IsN3opUiTROzy9U1wu1+olFGDx8i7JCKOGyQjUVNXmF6sdPPl7mjZ/n+riQBY2VAQAlLZQ+VNfeInZCdet5EACAACNNgvfBeqZQXZvf2AnVrRdqQKYVEJQVQomVFUoO1SvCFbZC9aMnHl2wMWEENFYGAJSqWDQ1VNfeIk5C9SjlGAEAwUawjuJIF6pr8xsnobreB8qbGysgKCsEtxorF7CsULpQfcH4BY5C9V7rPqJsV0AAAJC/uEjz66mhurVm+kihescukf2c5wAAwUawDu/1dKQP1c3mN3ZC9W2viHQPBEwoY9lWQFBWCMVorFygskIb921MCdWtNdNHCtX3dO2RvV17CzI2BHMFBAAAOevrFulqzT1U1/PghuWszAIABN5gkgl4RC+edq4d6CSfa6iuF2r6BbhRVqijuXjjRok1Vi5MWaHu/m5p622T6kh1TqH67s7d8sSWJyQm/PHqGRorAwBKWTyaX6hunAdZSQfLNXdflzvN463HIgB4gGAd3tITZrRfpCKSGqpr8xs7obr1Qg3QFRAtGykrhOKXFZp9lsibj7k+tOjgH6+5huqPbHqEMjDFQmNlAECpilSkhuraW8Q6aWWkht16/YTyptdGv/9Pkf3Nw9c+OlHFWqpTb2NdrV7TmHjcaa1+LStUe5DIKV8iXAfgKYJ1eCw+8E+6UF2b3zTOsB+q6wkV5c2NFRCUFYJbjZVDoYINL12ovrV9q7R0ttgO1avCln1E8VZATDxy4HsaKwMAAiskMnlhaqiuvUWmHGUvVG+YMlxOBuU98U5D9YpakdrGgf5X1lKd+rea5gC1TQPf6zE1bXFirX4tK6RBfOfegfsjWAfgIaZCwXt6wkwXqpvNb+yE6npC5YQJcwWEoqwQ/NJY2WWhUEjmNs1NCdU3t222HapPHDVRmsw/SFB4NFYGAJQyLblRXZ8aqptGCtX1PDhnGSuzkJgRHHupyJQFIvWTBr72vSXSvF6kum7g64ilA1/mz+NxkU1PDVx3VY5iBQSAouBMBm/pzOJJ8/ML1fVCzfopNcpYlhUQlBVCMRorF6CsUE2kRuoq63IO1SePnixLpi+RMH+8eofGygCAUma9psglVLeeBwE9nnRCwUgZwEgrIEaz0g+A9/grG96qrBEJR9KH6tr8xk6obj2hAplWQDgJ1SkrBLOsUD6heoHKClkD8VxC9aUzl0qlvr8iWI2VAQDwu+RQXXuLOAnV9foL0NJ31gkFTkN1VkAAKCLeeeCxUOZQXZvfOAnVtUkJypsbKyAoK4SRygrZXQFR4LJCyaH6zIaZjkL1GH+8+mMFBI2VAQCloHVraqhu7S0yUqiu58EDuz0eNHzJWvoul1CdFRAAyjVY37Vrl1x55ZVy7LHHykknnSQ333yz9PQMvEG+88478ulPf1oWLlwoZ511ljz33HPFHCrcFIumD9Wt4eZIJ1RtUqKdv1Hesq2AoKwQ3GysXOSyQts7tqeE6tPHTLcdqvdF+2Rv196CjhHBWAEBAEDe9Hpj76b8QvWNK4Z/DrgRqjOJBEA5BevxeNwI1bu6uuT++++X//zP/5Snn35abr/9duNn//AP/yDjxo2TX/3qV3L22WfLF77wBdmxY0exhgvXxEWaX88vVNcTqnb+5sSJbCsgKCuEYjRWLkBZob5Yn2zp2JJXqL5q6yrpjQ38HB6gsTIAoJQNXnPkHKrrebCNv+0hiRMK8gnVWQEBoEgG/8rz3ubNm2Xt2rXyhz/8wQjQlQbt3/zmN+Xkk082Zqw/+OCDMmrUKJk1a5a88MILRsh+xRVXFGvIcENft0hX60CQkGuobpxQCYgwwgoIygqhGGWFmte7PjQzMM81VF+5eaXs6tzl+riQDY2VAQBlIDlU1xJmdkJ18zxITWyYx41e/1TX5R6qswICQJEU7Uw2fvx4+eEPfzgUqpv2798v69atk7lz5xqhumnRokVGEI+Ai0cH/k0XqmvzGyefUmsggTLnwgoIygrBrcbKBS4rlByqd/R22ArVdx7YaXwfpq2Kt2isDAAoZU2zUkN17S1iN1TX7dq0ErCWvsslVGcFBIBynLE+ZswYo666KRaLyc9+9jM5/vjjpaWlRSZMSDzJNjU1SXNzc9b71BIyVqFQKGVbobcX9jHjg6UvUm/vZLveb0H3aaSRRCoSwk9ju9lRfsrCgRvPPFVCM04Yvv8OPaE+KNLfLSEJSVyDCL14058P3ibTeNzi9bHkdLufxuLaPo10LCWtgAhNO0biZqiu97V1IKQyjhn9Db1Qm3788DHT0Sxxs6wQx1Je2/N9Xxq6l0KOPetoEssKhTr3Db5fDX5YUz/JOD6M+97yfEqobrxf6YW//lYBjqUZ9TNSQvV1Letk9kGzjfufUjdFPnzoh6UiXGF83x/rN0L1HfsH/sioCldJU22T8TN/v3c4Pc/58VhKXQERivVL3PywpnG6SEWNyIILJTRm8vD9J79fGSsgXh86nrKN0S2cn4qwT/m+e47wXuN0n9xScq+Tj7YH4VhybZ8CvD37e7Yb10wF3qeRzriRqoHzmbndDNWN3iJxkYaDJTTvYxLX6yi9f50E8LqGn+8MnOf02n32h0TefKyo5zk/HTNubffTWGxtH/qZ/q12StoMINTfM3Bt1KCh+vkD1+d6zET7JP76Q8ZxZfy+roDQ7VnOQ27K57kv+vPu8na3n1sgSIoWrCf71re+JRs2bJBf/vKXcu+990pVVWJXZ/2+tzdz+Q/9j7qtrS3h9jrjXWu4W3+vpqbG+Dpw4ID09w/WP9XV2qNGGb+jM+aj0cFZ1SIyevRoqayslPb29oQ3jvr6egmHwwmPqRoaGowPCTo6OhLeZHS7Pp4+rikSiRj309fXJ52dnUPbKyoqpK6uzmjk2t3dPbQ9FotLOBwy/o1b6ouHQmFjezQ2GA4O0vHpY0ejetvh7ToOt/epbvA50+2RcNi4X/0d6+0H54JKfNICkeoxwxfw77411FE+2h+V6CEnS2TqscbBqc9NrG2HRN74uXFCDUfCEjpouvROPV5Cr90vXR3tEo/VZnydzIxG79cqUhExfmZ9rfV2+poYY48mjl3ZfZ1K8djzcp/MY8kcp0p4nXScg6+ThqDGhZOGV1MXS2gwVNef9f31WYm8/ezAsReJSHTGydI/8WiRwQbJkc7dUvnHX0qsr0tiff3SOXgsZdsnpceGdezGMWn8dxZN+AvEGHuGY68UXqdE+p6k71HWfR143jNvT3yPkMH/ztzcp1rLPg2/Hya9HjoW88JYa2Jve0VCXe8OherxqYuN96veweOmZtdrEt/09NB7hL5fyaRFoutndH+iXQeG3peyvU4yePt4LOk9W9/jLceY3kZD8an1U4duq6H663teN8JzNb56vCyZskRi/THp6e+RUCQkj739mLxj/JEhUhWpkhOnnCjPNT9n3J/19fPbsae7rYdC8nkrHI5IKBRP//qlOcaU2/tkvjeZx5Lx+qU55+r2kAYG5h93ulLLsgKiXyokOudcqaob+LBGHzf0zkvG+5UxzoqIxA89RXrrpkl46xrjeJL4qKyvU7rzlr436TGTcC4Oh4xxZjr2OD95s0+2rpnM1ynDMWZu39/W5uo+Obk2Mo6xDNtL4XUKyrFnHk/mtVG6Y8nOdj2W/LJPpfg6WffJybVRJBzJ+Lef2/tkHkvmtVG6c64Mbg/rbGEzEO9pt4TqIv2jJ0tszjlSXVElsWhU+roPSOSPv5JQ27aBY696lESPPF/6ozEJ9/YZ57nKioasr1Om62+n1+Xlfux5uU91/7N0xGsmYz+jfRLWPkA6UaXlTYnrl9528MOa2LgjBq6NGg6W3tlni/THB2avR3ul5s+PSLx168BxIBGJVx0kvZpNjMn8t7vTa6Nsx571uQ/q6+TWsdfYyCpLlK8Kv4Tq9913n9HA9PDDD5fq6mppbW1NrDHb22u8CWRivgklq62tNb6S6ZtEOvrGlGmGvZ3HNP6gCIfTjkXf+NJt1zerdNv1edAvk15ADf9rxtTD9AI5nUgknDIO1/dp8ELL+jvmxdcQfZOuHCWhmjFDIZpR/mUwVDfGevgZEpk+vPSrsnuPyMZfG1GEaBhufEr9Manq3CtSVSmV9WNE6ofHlbJPIUuQnrKz6bcbY0+z3e7rVIrHnqf7ZDluzJkxKceS+YGHfkUqJHTwsQnlX/TDmqr9zQPHjJp5qkSmHz/8X43OfvjjL42LsrD+cVIzWsYkHUuZ9kkvptJJN0Zje5pjqSRep8StRuiZ/nXK/Pql2+7qPqW5/7RjMf9n1/qBppP6rYbq044Z+rDGGJMxo3j18HuEcVwNv1+FO1sk3PtuyvtSpn3S4yBddRbrMaYX3BWhwVnP4dBAqL73demP9xtj1vIvOlPdWv7l0bceNcq/6L5WR6pl2axlxuyw0K7B94g6/x575qkh+bw1+NM0r1/mY8/1fUp6DOP1S8PYrqG67kxSqK4f1lQs+oRU1A+Uf9FjqXrXqyLb/jD0fhWaeZqxsqa6o9lynhuTdZ8ynbf0mNFQJu0Y0wyf85NH+2TnmmmkY2yQ9XHc2Cen10aZtpfE6xSUfbJeN2U4luxstz5O0fepFF+nPK6NMv3t5/o+JV2DJ48l4brcvA5PCtW1t0jF0R8fmNGuY4/3S/WbvxHRsnT6XmGs2LpIIvWTJGI9zw3uR6Z9ynT97fS6vNyPPU/3yfIaZDqfDbxOMZFwtchBMwbGr/+jx9X2V4xJVHqOCenqiHnnS/XgcWWsgNjwGxHzw5qa0SKzlhkrICoHx5Bpn5xeG2U79tLdf+BeJ1f/JgTKU9GD9a9//evywAMPGOH6Bz7wAWPbxIkT5a9//WvC7fbs2ZNSHiZZuuUnmZakFHJ74e7b/D7TMht72837Leg+ZRqJtUGNJVQPmc1vZrxv+OftOyVklH/RmaKhhHpqxuOaUxstYyjkEqRiHEtOt/tpLG5tz35Ua7K4MDFUHyordFRCnb5QQp0+87gSCTVMHSgnw7GU5/b83pe8GWOW0aRprGyG6oatLxqh+sB9hNLWfwzpBb7OKCvQsWSWfzFnqjdWN6bUVDdDdVVdUS0fmfURmTBqgrR0tgyNxd/vHbmc53x2LGVorBwaaqw8ZfjGW16QkHFchVLfr4yp+70Jx1Mh35ey3T/npwLvU9qtNrfbeK9xut0NJfk6+Wh7UI4lXid3ro38dJ5L2NbTIdKy0QjVQ9aG3WYfrP5eCa3/hRF+Gr9p6S0ydJ7rbi/6ec5Px4xb2/00FmN72q2W7WZgrrrbJGT9sMYI1T8moYSa6uZxJRIa/LBm4Bjy7ljK9BiBfp18dB0BBElRO5l997vflQcffFBuu+02+fCHPzy0fcGCBfLHP/4xYTnLmjVrjO0oEUkz1VM6yttpUnKAhpNlr7JWpLo+83Flp/nNnGUDs0tR3lxprJy5XFm+9vftTwnV542bl7FRqZZ/MUN1azAPr9BYGQBQwnQiwc61CTPVExp2pzQqTdOwe9sriU0rgYRa/YPHVdYGuGmOKwDwWNHSpE2bNsn3vvc9+exnPyuLFi0yGpaaX8cee6xMnjxZrrnmGvnLX/4iP/jBD+T111+Xj370o8UaLtzUujX/UH3jiuGfo3xlWAGRU0d5IKmxck7HlTlLy0WxeEw27N2QEqqby1jthOrrW9ZLe2+762NDBuYKiFxDdT2uzMbKAAD4jZbO0940uYbqeh7UL8C6AiI5VDdWQDgI1TWYB4ByKQXz1FNPGY0Rvv/97xtfVm+++aYRul933XVy7rnnyowZM+TOO++UKVMsy6cRTLqsfe+m4fApl1BdT6htO4oweARqBYSTUJ3wCmnKCjkO1RumDIepLuqOdhuhutahzCVUf3XXq7Jm9xrXx4UcV0DYCdULvAICAID8xNOH6joJwE6obj0PAuYKiNBg7XNWQAAIkKIF65deeqnxlYmG6T/72c88HRM8CtZNyaG6fsJsJ1Q3T6iU74C5AmKwxp6BskIoVlmhQ04WWXu/60OLaz+JNDPVo7GorVD9xZ0vuj4m5LECQpu1FXEFBAAArqhtTA3VtbdI4wz7oXpNo8eDhq9XQGiDW1ZAAAiYojcvRZlKF6rr0q+JR9oL1XV7XfZmtshs2R3PuXI/K644UQK/AoKyQhiprJDdFRBdg/W0C6ChqiElVF+/Z71MrZtqO1QfUzWmYOODgxUQSY2VvV4BAQBA3nRm8aT5qaG69hbRYN1OqK7nwOb13o8dPsQKCADBxZRfeK9pVvpQfajz90iherXI3HNEIszkK3vZVkBQVgjFaKxcgLJCkVBE5oydkxKqt/a02g7VF01YJPVVlhn5KCwaKwMASllljcjgdUlCqG43/NTz4LTFHg8agVwB4SRUZwUEgCLgLzZ4K1Il0jg9v1BdT6j1E4swePgWZYXgl8bKBSgrVFNRk1eofvzk42Xe+HmujwtZ0FgZAFDSQulDde0tYidUt54HgWwrIJTdFRDWlYIA4BFKwcD7YD1TqK5Lv+yE6npCtdaoRXnLtAKCskIosbJCyaF6RbjCVqh+9MSjpaWzpWDjQhY0Vka+YtHsx4Hd6yFdRUHgAMDt96fkUF17izgJ1aOUY4RLKyDGHkppIQBFQbCO4kgXquvSLzuhuvU+UN7cWAEx+yyRNx8rwuBRco2VC1hWKF2ovmD8AluhuqnXuo8oPBorw43QShu6VY0e3qb/HVv/W37u9sTAXEOq/buHw3g9z42eMHBcHXsp4ToAl8RFml8feI+yhurW95iRQvWOXQPvV4AbKyCYeAegSAjW4b2eDpGWjamhupPO39teEemmqVvZy7YCwm5ZodDghRzgRmPlApUV2rhvo3RHuxNCdWvN9JFC9T1de2Rv196CjA3BXAGBANBwXEP1wz8w8K9+WKPHlXXF1on/lBhSbVguUnPQcANcrdXf0y7ypxUDARjBOgA39HUPNNjW81wuobqeB/X9ipVZMLECAkBAEazDW3rxtHPtQB21XEN1PaHqF2CirBB80Vi5MGWFuvu7pa23Taoj1TmF6rs7d8sTW56QmPDHq2dorAw3aajeuXdgBUTyhzX1k4aPq01PDXzgbKzmshxXep4jvALgpnh04N9cQ3XjPMhKOphYAQEguOjYB2/pyTLanz5U16VfdkJ16wkV0BUQlBWCH8oKzT1HJDIYerkoOvjHa66h+iObHqEMTLHQWBkl3FgZQJmLVKSGn9pbxFaoPvh+ZX5YiPJmroBQrIAAEDD8xQaPxTOH6rr0y0moXtPo5cDh5xUQlBWCHxor108s2PDShepb27c6CtWrwpZ9RPFWQNgN1WmsDGtZIRNlhQD4Qkhk8sLUUN36IeBIobqWq9IeEAArIAAEGME6vFfbmD5Ud9L5W0+m1AlFthUQlBVCsRoruywUCsncprkpofrmts22Q/WJoyZKU21TwcaI4KyAQMD4vLEygDJVWStSXZ97qK7vV9oDgpVZMLECAkBAcSaDt7S2+qT5+YXqekKdttjjgSNwKyAoKwS3ygo5+bCmAGWFaiI1UldZl3OoPnn0ZFkyfYmE+eM1WI2VC7gCAiXUWJmyQgCKwfqekkuobn2/AlgBASDAuMqGtyprRMKR9KG6Lv2yE6pbT6hAphUQlBVCiZQVsgbiuYTqS2culUp9f0WwGisDPm+sDAAp4ad+COgkVKcmNhQrIAAEGO888Fgoc6iuS7+chOpR6oWWPTdWQFBWCG41Vi5wWaHkUH1mw0xHoXqMP169RWNluIGyQgD8isbKcAsrIAAE2GBqAHgoFk0fqjtpUtKxS2Q/F2JlL9sKCLtlhcYeKtK83uOBI3CNlRtnFLWs0PaO7dLc2ZwQqk8fM912qN4X7ZO9XXsLOkakWQGhH/75bAUEyrCxcmhwUgMAuN1Y2axpTWNluIEVEAACiBnr8FhcpPn1/EJ1PaFuWM6JE9lXQFBWCMVorFyAskJ9sT7Z0rElr1B91dZV0huzNEFEYdFYGWXQWBlAGcvWWNlOqE5jZSRjBQSAgCJYh7f6ukW6WvML1Y0TKgERRlgBQVkhlEhZITMwzzVUX7l5pezq3OX6uJANjZVR+o2VASBtY2U7oTqNlZFuBYSJFRAAAoQzGbwVj2YO1XXpl61QffCEaS49RBlzYQUEZYXgVmPlaYsLOsTkUL2jt8NWqL7zwE7j+zCnfG/RWBkl3lgZQJnL1FjZbqhOY2WYWAEBIMD4Kxvei1SkD9WdNClpmCIymguxsufGCgjKCsHtxsoFMKN+Rkqovq5lne1QvSpSJU21TQUdI4KxAgIBE4DGygDKEI2VUQisgAAQQLzzwGMhkckL8wvV9YQ6ZxknTmRfAUFZIRSjsXIBygppKD61fmpKqN6voZrNUP3MGWca/yJAjZULvAICJdJYmbJCAPzWWNlOqK7vV/UTizBw+BYrIAAEFMkkvFVZK1Jdn1+obj2hAplWQFBWCCVSVqgyXJlXqP6RWR+RcbXjXB8XsqGxMkq/sTIApG2sbCdUp7EyrFgBASDACNbhLess8+RQXZd+OQnVKd8BN1ZAUFYIASkrlByqN1Y32grVJ4zi+C4KGivDDZQVAhC0xspOQnUaK0OxAgJAgBGsozjShepOm5QcoOFk2XNjBQRlheBaY+XClRXa37c/JVSfN26eo1Bdg3l4xb8rIBAwAWisDKAM0VgZhcAKCAABRJoE77VuzT9U37hi+OcoX9lWQFBWCMVorFyAskKxeEw27N2QEqpHBsM2O6H6+pb10t7b7vrYENwVEAgKfzdWBlCmsjVWthN+0lgZyVgBASCgCNbhrWivyN5N+YXqekJt2+HxwOFrlBVCCZcV6o525xWqv7rrVVmze43r40IWNFZGGTRWBlDOsjRWthOq01gZVqyAABBgBOvwPljPFKrrJ8x2QnXzhEr5Dri1AoKyQvBxWaF4PJ42VI/GorZC9Rd3vuj6mGADjZXhCsoKAQhYY2UnoTqNlaFYAQEgwEgmURzpQnVd+mU3VNftdTTkK3turICgrBDcbqxcAA1VDSmh+vo96x2F6mOqxhRsfAjOCggEDGWFAPgRjZXhKlZAAAgugnV4r2lW+lDdSefvueeIRJjJV/ayrYCgrBCK0Vi5AOFVJBSROWPnpITqrT2ttkP1RRMWSX2VZUY+ynYFBALG542VAZSpbI2V7YSfNFZGMlZAAAgo/mKDtyJVIo3T8wvV9YRaP7EIg4dvUVYIJVxWqKaiJq9Q/fjJx8u88fNcHxeyoLEyyqCxMoBylqWxsp1QncbKsGIFBIAAI02C98F6plBdl37ZCdWtJ1Qg0woIygqhxMoKJYfqFeEKW6H60ROPLtiYMAIaKyNvlBUCELDGyk5CdRorQ7ECAkCAEayjONKF6rr0y0morveB8ubGCgjKCsGtxsoFLCuULlRfMH6Bo1C917qPKNsVEAgYygoB8CUaK8NNrIAAEFxcZcN7PR3pQ3Unnb+3vSLSPdjMC+Ur2woIygqhGI2VCxRebdy3MSVUt9ZMHylU39O1R/Z27S3I2BDMFRAICJ83VgZQpmisDLexAgJAQBGsw1t68bRzbX6hup5Q9QswUVYIvmisXJiyQt393dLW25ZzqL67c7c8seUJiQl/vHqGxsoog8bKAMpYtsbKdkJ1GisjASsgAAQXwTq81dclEu1PH6rr0i87obr1hApkWgFBWSGUSFmh6OAfr7mG6o9seoQyMMVCY2W4gbJCAILUWNlWqE5jZViwAgJAgPEXGzwWzxyq69IvJ6F6TaOXA0eproCgrBDcaqxcwLJC6UL1re1bHYXqVWHKQXiKxspwA2WFAPgSjZXhIlZAAAgwgnV4r7YxfajupPO3nkytJ1yUp2wrICgrhGI1VnZZKBSSuU1zU0L1zW2bbYfqE0dNlKbapoKNEcFZAYGA8XljZQBlisbKcBsrIAAEFGcyeCsUEZk0P79QXU+o0xZ7PHAEbgUEZYVQjMbKBSgrVBOpkbrKupxD9cmjJ8uS6UskzB+v3qGxMsqksTKAMpWtsbKdUJ3GykjACggAwTWYFgAeqawRCUfSh+q69MtOqK4n1I5mjweOwK2AaJwx8D1lheCkrJB++OezskLWQDyXUH3pzKXS2kO5o8A1VuY8B583VkYAxaKZaxA7ec/R2cqsHEWmxspOQvUC1MSOxqIZG7a3dLbYvp+aipqE1YLw+QqIQ04WWXu/h4MGgAEE6/BYKHOorku/7ITqpij1QstethUQGqzbLSvUvN77scOfZYUqIrmvgChwWaHkUH1mw0xboXqlvr/qbtDQyfsVEC0baayM4pcVmn2WyJuPFWHw8F2oru9LOsFFjyvryho9Pzz178Mhpx43OvPT/GBXt2sDXDPEGj1O5OQvEq6XO22s3LbNV42VNVTvifZIbUXt0Lbu/u6hRvDfefU7Rmm8Ksvx39HbIe297UPfj6kaYwTq2t/mojkXEa4HZQVE12CuAAAeI1hHcS7s04XqTpqUdOwS2e/uhRhKbAWE3bJCYw8lWMfIjZWdrIAogO0d26W5szkhVJ8+ZrrtUL0v2id7u/YWdIwIxgoIlGFj5dDgpAaUN31f0mumg48bCEBNGrbr+1XNQcPlFLT2dXID3N5OES1LpudFfS/TD6QJ1suX2VjZrGntk8bKOlNdQ/WTp50s1ZFq2bhvo7T1DnxIrUH5x4/4uIyrHTd0+/Ut62XN7jXSWD2wenXRhEUyb/w82de9T57a+pQRyhOsl/cKCAAYCcE6PBYXaX594GI8n87fG5Zz4kT2FRCUFYKbjZXtroAoQFmhvlifbOnYYvyBmGuovmrrKumNWZogongrIGisDK8bK3Oeg/WDGg2qzOtuPa50ZY1+CFhdl3kFRHf7wM/1uJp1BisgkL2xsp1QvcCNlfWa6e32t6U72m38fw3VF4xfIEc0HTF0m1d3vSob390ooytHpy2vZ15XoXxXQACAHQTr8FZft0hX68CFea6hunFC5UIHI6yAoKwQvG6sXKCyQtY/7HIJ1VduXim7One5Pi5kQ2NleFBWqMiNlVGmjZVZAYGRGivbCdUL3FhZZ6prqK7MUN0681xD9Rd3vjj0fXKovqdrDyv9vOTTFRAAYEdhzmRAJoP17dKG6rr0y1aoPnjCNE+8KGODKyAoKwQ/NFaetrigQ0wO1bUmqJ1QfeeBncb3YU75/lgB4SRUp7EyzLJC+YTqlBWCm42VgZEaK9sN1QvUWFnLt1jLvzgN1XXSwhNbnsjYABXltwICALLhr2x4L1KRPlR30qRE6z9qUyWUN3MFhKKsEPzSWLkAZtTPSAnV17Wssx2qa5MubdaFgK2AoH4xzLJCyqeNlRHAFRC5lhUysQICbjRWnnuOyGCZOzeZjUpzDdWtkxbgMZ+ugACAbHjngcdCIpMX5heq6wlVmypx4kS2FRCUFUIxGisXoKyQhuJT66emhOr9GqrZDNXPnHGm8S8C1Fi5wCsgUCKNlSkrBCdYAQE/lRWqn1iw4aUL1be2b3UUqleFuW7ylE9XQADASEgm4a3KWpHq+vxCdesJFci0AoKyQiiRskKV4cq8QvWPzPqIjKsd5/q4UODGyoCdxsqKskJwYwUEjZVRImWFQqGQzG2amxKqb27bbDtUnzhqIiv9vOTjFRAAMBKCdXjLOss8OVTXpV9OQnXKd8CNFRCUFUJAygolh+qN1Y22QvUJozi+i4LGynADZYXgKhoro/TLCtVEaqSusi7nUF0nLSyZvkTCrI72js9XQABANoNXU4DH0oXqTpuUHHB3Zmg0Fs3YpKals8X2/dRU1CTMkIDPV0AccrLI2vs9HDQC2Vi5o7moZYX29+2XTa2bEkL1eePmOQrVNZiHxysgdHaoz1ZAoAwbK489VKR5vccDR+BWQDTOGPieFRBwUlZIP/zzWVkhayCeS6iukxZaeyh3FLgVENZrdQDwEME6vNe6VaRtW36h+sYVwz93KVTvifZIbUVtQkd5s/nNd179jrEc0FqjWEOq9t72oe/HVI0xAnWt6XfRnIsI14u9AsJuWaGuwYACyNZYecpRRSsrFIvHZMPeDRIZ/OPVDNUjg2GbnVB9fcv6hPcreLQCQo8Hn66AQBk1ViZsgJ0VEBqs210BwQc1MMsKVUR8W1YoOVSf2TDTVqhuTlrQ6y94vAKiZaPvVkAAwEgI1uEtvXDZu2k4fMolVNcTatsOV4elM9U1VD952slSHamWjfs2SlvvwIlZg/KPH/HxhBrFGlKt2b3GCLjUogmLZN74ebKve588tfUpI5QnWPcQZYXgl7JCZjkZF3VHu42Z6pFIJKdQ/dVdrxrvV/AQjZVRBo2VUaaNlVkBgQCUFdresV2aO5sTQvXpY6bbDtX1+mpv196CjhHBWAEBACMhWIe3Bi9e0obq+gmznVDdPKEWoO6dhupvt79tBFn6/82O8kc0HZEQUm18d6OMrhyddkmheYGGAK2AcLmsEALIx2WF4vGBP16TQ3VdaWMnVH9x54uujwl5rICwVVaIxsowUVYIPmuszAoI+LysUF+sT7Z0bDH+lss1VF+1dZX0xvibzjMBWAEBAJnQkQPFkS5U13pqdkN13V7nfkM+nalu1tQzQ3XrzPPkkCo5VN/TtYfZDcVYAeGjskIIKDcbKxdAQ1VDSqi+fs96R6G6lquCV2isjPJprIyAobEySryxsnWSUy6huk5a2NW5y/VxIbgrIAAgG4J1eK9pVvpQ3Unn77nniAzOQnCLlm+xln9xGqrrhdoTW57I2AAVHq+AKFJZIZR5Y+UChFdaW33O2Dkpobr5IaCdUF3LVVGeKmArIOYsK8jKLJRYY2XKCiGXFRD5lBViBQTcKis0bXFBh5gcqmt/LDuhujlpIUxU4o8VEE5CdRorAygCzhbwljb/bJyeX6iuJ9T6ia4PzWxUmmuobr1Qg8d8WFYIAS0rlE+oXqCyQjUVNXmF6vp+pT0gELDGygVcAYESaqxsoqwQ7GAFBPxUVqiAZtTPSAnV17Wssx2q6/VVU21TQceIYKyAAICRkCbB+2A9U6iuS7/shOrWE6rL0oXq2lHeSaheFSYM8cUKiCKXFULABKCsUHKoru9XdkJ16/sVPEZjZeSNskJwEY2VUQZlhTQUn1o/NSVU10bwdkP1M2ecafwLjwRgBQQAZEKwjuJIF6rr0i8nobreh4tCoZDMbZqbEqpvbttsO1SfOGoisxuCtgKiAGWFUKaNlQtYVihdqK4fAjoJ1VlR4zGfroBAwFBWCF6tgLAVqrMCAv4vK1QZrswrVNdJC+Nqx7k+LgR3BQQAZMNVNrzX05E+VHfS+XvbKyLdg0tZXVITqZG6yrqcQ3W9UFsyfYmE+ePVHysgilhWCGXcWLlA//3TWDlgArACAgHh88bKCBpWQKB8ygolh+qN1Y22QnXrpAV4yKcrIABgJCSA8JZePO1cm1+oridU/XKZNRDPJVS3XqjBYz4sK4RybKxcmLJCNFYOIBorowwaKyOAWAGBMikrtL9vf0qoPm/cPEehugbz8Ip/V0AAwEgG00zAI31dItF+kYpIaqiuFz52QnXrCbUAkkN17SjvJFSP8cer9ysgWjb6qqyQWbojU5DZ0tniqHGlNUSFj8sKzT5L5M3HXB8ajZUDjMbKcKusUNu24e8pK4RiNlbuGgy+gExlhTqai1pWSP8W27B3g0S0IaYlVDcbwdsJ1de3rJf23nbXx4YRVkDo8eDTFRAAkAnBOjwWH/gnXaiuS78aZ9gP1WsaXR/d9o7t0tzZnBCqWzvKjxSq64Ua5RaKsAJi8MLZL2WFNFTvifZIbUWt8X1frC8h2PzxGz9OCEf1Z3rcmEG8NsDVWv26ikLD1IvmXES4HoTGyqHB+pAeNla2fkhDY+WArICYeOTA9zRWhpOyQmb4RFkhuIHGyihkWaEpR9kvK2SWk3FRd7TbmKkeiURyCtV10sKa3WtcHxeCuwICALIhWIf3ahvTh+q69EuDdTuhup5Mm9e7OiwNP7d0bJHqwUaWuYTqq7aukt4YJ3VfrIAoYlkhDcg1VD952snybve7xnFlmlE/Qz5z5GcSal9rmY4x1WOGGuBqrX49rvZ175MntzxplAIhWA9AY2XrDC2PGiu/t+m9thsrM/PKQz5eAYGA8XljZQQQKyDgl7JCh5wssvZ+14cWjw9M5EoO1XXii51Q3boSEB7y6QoIABgJa4zhLZ1ZPGl++lBd2QnV9YQ6bbHrQ7POKM4lVNcLtV2du1wfF3JcAeGDskIaqusKCP2wRr+OGHuEzGmaI+NHjTe+4hKXP+z4g1SGK2V05Wg5rPEwY3b6lPopxs/rK+tZARG0xsoFKCtEY+UAorEyyqSxMgKGxsrwU1mhAjZWbqhqSAnV1+9Z7yhUH1M1MOkFXqCxMoDg4iob3qqsERm8wEkJ1TWkthOqW0+oBZAcqmvjGjuhunmhFuY/K3+sgHASqhegrJC5AsLECogyaKxcgLJCisbKAUZjZZRwY2UEEI2VUQZlhbS2+pyxc1JC9daeVtuh+qIJi1gt6iUaKwMIMN554LFQ5lBdl345CdWj7s+W0TIdyaG6dpS3G6rrhZrWxkaAVkDoMWWdHeESVkAEtKxQPisgClBWyIrGyiWyAqLIjZVRhmWF5p4jMljmDjDQWBlulRVKDtV9UFaopqImr1BdJy3MGz/P9XEhuCsgACAbrorgvVg0fajupElJxy6R/e5eiOmF1tT6qSmhuja/sRuqnznjTONf+GAFRBHLClmxAqJEGisXuayQNlZODtVprOxjPl4BgTJsrExZIdhZAWE3VGcFBAJSVig5VNdG8HZCdeukBXjMhysgAGAkJDbwWFyk+fX8QnU9oW5Y7vqJU+tc5xOq64XauNpxro4JeayA8EFZIVZAlFBjZUVZIbixAqKIjZVRpo2VARMrIFAmZYXSheoLxi9wFKpbV5+ifFdAAMBICNbhrb5uka7W/EJ144RauAud5FBdO8rbCdWtF2rwwQqIIpcVYgVEwFBWCGXUWBkB49PGygggGiujTMoKbdy3MSVUt9ZMHylU39O1h5V+XgrACggAyGTwihzwSDyaOVTXpV8dzfbrqekFvsv29+2XTa2bEkJ17SjvJFTXABUer4DQ2aE+Kivk1gqIkDkjH8ForDz2UJHm9QUbImWFSmQFROOMoq6AQEDLCumHf4qyQih2Y2XrtTrKW6ayQhOPLGpZoe7+bmnrbZPqSHVOobpOWnhiyxMSE8qKeMbnKyAAIBuC9RIVjcUlFh+cMZdkd3u37fupqYrImJrhgNAVkYr0obou/ZpylL1QvWHK8Mx3l2hjvw17Nxid5K2hutn8xk6ovr5lvbT3trs6LthYAaF/8PmorJBbKyBaOlsKNi4UoLFyAcOGTGWFZh802/ieskIBWgGhwbrdFRAF/KAGASsrVBHxbWNlBHAFRMtGGiuj+GWFZp8l8uZjrg8tOjiRK9dQ3TppAR7z6QoIAMiGYL1EQ/XuvqhUV4aNf818PRIOSU1lRO5/aavxfTwel5b9PdLbPxAqhkMhGVdfLVWR4RNSNB6XS0+e6WK4HhKZvDB9qO6k8/chJ4usvV/c1B3tNsLPSCSSU6iuF2prdq9xdUzIYwWED8oKsQKiDBsr+7Ss0GkHnyZPv/O062NDARsrF2AFhNaczTYDz8mHeTUVNQlhBYrUWNnJCgiXZTueOJZ8ihUQ8FNZoVDhVmamC9W3tm9NeG8aKVSvClOO0VM+XQEBACMhWC9BOlNdQ/WDaislNHrgxNNQWynvnTJGIuGwnHvUVOmNxuTxN5qlszcqepOqirB8eP5kGV83XF7l2b+0yAub9kp3b9S9YL2yVqS6Pr9QXU+oXYMBhYv0gwbjIZJCdf3D0U6obp39AA9lWgFR5LJCrIAIGsoKwWeNlV1eAaHnsgN9ByQcChvHhfW40vcr/XD59ldvHwoTdIWD3tb8udaaNRvgalmhKXVT5BNzP0EgWuzGynZXQLhcVkiPp67+wffLQdbj6hd//oURTulxY4bvmY6r0VWj5bL5l3EsFXsFBI2VUSJlhUKhkMxtmpsSqm9u2yzvbXqvrVB94qiJXIN7yccrIABgJATrJUpnqmuoXl0RkYNGVcqCgxulIjzwh0zjqCpZvna7dHT3y+jqCiOEP+/oaTJxTM3Q77/89j7ZuLMAM2Wty7OSQ3Vd+mUnVDdPqAUo39FQ1ZASqmtH+al1U22H6mOqxrg+LuSwAqKIZYUUKyAChrJCKPHGyhpuaqC5cMJCmdU4K2FljX4IqMeVHlMaJiyZviThuFq1dZURoo6SUcZxdeykY+W13a8ZdWwJQwPSWNnl1Q9mWD6udpxxrGi5KuvKmhMmn2DUKB5TPXBNlOm4erf7XeOLY8krNFZG6ZcVqonUSF1lXUqobhopVNdJC8dNOk6Wb1ru+tgQzBUQAJANwXqJMsu/JIfq/bGYEapvf3dgllGmUP25v+wp7ADThepOm5QccHdmqM4snjN2Tkqorh3lNVi3E6ovmrBINr670dVxocArIApQVkixAqIcGytTVgj+XgGhx4WG6maAqceEHld6/tMPAQ9rPCzthzV6/I2uHJ2wAkKDdXjA542V9Vg5YuwRKT0g/rDjD8bsdf3KtLJGjystA0MtY4/RWBklXlbIXBWTa6iu71f69x+KwIcrIABgJHR3KGFa/iU5VF/3TqujUF3vw3WtW/MP1TeuGP65S/SPu3Shut3wUy/U5o2f5+qYkMcKCLtlhczjqgAyrYBwEqqzAsJD2Ror+6CsUHKoTlmhAKyAUD5aAZGtrJCTFRAIWGPlAsrUWDk5pKKxcomtgLC+n6G8ywopn5YVSg7VZzbMtBWqm+9Xev0Fj1dAJIfqPlgBAQAjIVgvUdqoVGuqJ4fq73b22Q7Vjz10rNS71rR0kF647N2UX6iuJ9S2He6OyzrEpFBdm9/YCdWtF2rwmA/LCmVbAaHsroBgabxXXGisrGWFRk8oWFkhRVmhgPB5Y+V0oTorIHyshBsrnznjTONfBKix8rTFHg8a/uTvskLbO7anhOrWDwFHCtX1/Up7QMDjFRDJoboPVkAAwEgI1ktUTWXEaFSaa6h+4nvGycKDC7DM07rcNzlU10+Y7YTq5gnVOlu5gKG6dpR3EqqzpNljbqyAcLmskGIFRBmWFZqzrCDvS5QVKrEVELZC9cKtgNDyG+lCdVZA+LysUD6huo8bK2uNdgSssTIwUlkhJ6F6AcoK9cX6ZEvHlrxCde0BYTbshgcCsAICADIhWC9RZuuO5FC9MhyyFaofc8jYwg4wXaiuS7/shuq6vc79maEb921MCdWtM4ZHCtX3dO1hdoOX3FgBUYCyQglDZAVEMLjZWLkAKCsUNP5cAeFGWSFWQHjMp2WFrCgrFDA+a6yMgPJxWSHrJKdcQnV9v9rVucv1cSG4KyAAIBuC9RIWTROqHzX9IEehem+0AH+INc1KH6o76fw99xyRiLsz+br7u6Wtty3nUF0v1J7Y8oTEhHp8nsm2AsKnZYVYAeFz+TZWpqwQfLwCIltZIVZABLSxcpHLCmVaAUFZIb/y7woIBEwAygolh+r6XmMnVDffr8JEJd7y6QoIABgJZ4sS/sz3jzvaU0L1MZZmpCOF6i37e2RPh8szUrSOZuP0/EJ1PaHWT3R3XBoqDP7xmmuobr1Qg8d8WFZIsQIiYCgrhBJvrJytrBArIHyMxsoooxUQCAp/lxWisXLA+HgFBACMhGC9RHX3RaWtK/dQfVd7tzz6+k6JDf4R7hprg6rkUF2XftkJ1a0nVJelCz+1o7yTUL0qTBMuT2VaAVHkskKsgAgYygqhTBorZyorxAoIv/JnWSFFY+UA8nljZQSMT8sK0Vg5gAKwAgIAMiFYL1HRWDyvUP1Xr26T3v4CBnrpQnVd+uUkVNf7cFEoFJK5TXNTQnVrR/mRQvWJoyYyu8FLbqyAKEBZIcUKiIBxo7EyZYXg8xUQbpQVYgWEx3xaVkjRWDmgfNpYGUHj37JCNFYOIn+vgACAbAbX2qAUVYZSQ/W39xwwgvORQvWevoFQvaqiAJ+99HSItGxMDdWddP7e9opI9+BSVpfURGqkrrIu51BdL9SOm3ScLN+03NVxIccVEHbLCoXMVr/erYBo6WwZ+p4VED6TqbHyxCOLXlZIZ4cqygoFaAWEGT75ZAVEtrJCTlZAWN/DEIDGyl2DAUUBZFoBMbVuYLYoZYUCtAJiylH2V0CY5WRGcMHKC1wZ9UNLH3LlflCAskJ6nivBxsqc54rApysgAGAkzFgvUZoVHjm1MSVU/2vLftuh+qSGGhlf5/KMFL142rk2v1BdT6j65bKw5Y/XXEJ164UaPObDskKsgAigvBsrU1YIg2isDLfRWBklvgICAePzskI0Vg4a/66AAICRcFVUomoqI1JXW5FzqD71oFr54JGTjHDQVX1dItH+9KG6XvjYCdWtJ9QCSA4/taO8k1Bdm3nBQ7oCwmdlhdxaAbFk+pKED3xQQJQVQpk0Vs4UqrMCwsd8WFZI0Vg5gHzaWBkB5dOyQjRWDiAaKwMIMBKbEhWxBOK5hOrnLJwqVZFCHB7xzKG6Lv1yEqrXNLo+uu0d21NCdWtH+ZFCdb1QI2zwkBsrIApQVkixAiJg3GisXD+xYMOjsXIA+bSxspYVyidUZwWEx2isjDJprIygobEyymcFBABkQ431Epccqh82vs5eqD5YW91sTOWq2sb0obou/WqcYS9U15Np83pXh9UX65MtHVukenDGaS6h+qqtq6Q31ltyjXBjGY6D3ZZ6/SOpqYrImJrKwqyAqIj4qqyQFSsgAnQ85dtYuaPZ87JC7216r+2yQqU288rXx5IbKyBmnyXy5mMFKSuk5zlWQAQEjZVRiBUQbdt8twICATvXuVFW6JCTRdbeL26jsXKJrYCwXl/TWBmADxGsl7Bt+zplR1t3Qqh+yLjRtkP13mhMWva7PMMpFBGZND99qK7shOp6Qh17qOvBuvUPu1xCdb1Q29W5S0rtYr6rt19GVw+/Vehx0ds/cMx88/GN0lBbKfWWC3X9+Z6OnqE/AvR40lr9+u8n33eIywFW3NdlhXQFRHPn8MVgua+A0ONpf3efRCIho1yVdWWNHjd6PJlGOq7G11fL5087zN3jyY3Gyj4tK1RqjZX1WOru6zc+dND/r/RwMo+r+1/aamzr6O6Ttq6+rMeV/v4XTnf5WPJpY+VsZYVorOxzNFYuqmV3POfK/ay44kQpKp82VkaW66aePgkPng8i4YHrJ/Ps8LMXtxh/q5nX5Xq7cfXVCauOzfPg2NFVcsUZ73HvXOdGWSEaK+etZN6bPG6sDABuIlgvURoWvLX3gFTrTN5cQvX+mDz+RvPQhZprKmtEBi9wUkJ1DRPthOp6Qi3QzNB04ac2rrETqpuzH8IlVGFJQ0wN1U+bM8H4Vz+s0ePK9IH3TpKFBw+X5NGL+0df32mEV2YDXK3V39HdbxxP3b1R92eGZloBoasfilhWiBUQ6Y8nDdVPmz1BJoypGdpuHlcHjRr4w/3YQ8dmPa7qayoGglU3jyezrJB++FeCZYXM2aSldCxpqN40uloqK0JSGQoZDbvN3iLnHjVV1r7TKv/31r6sx9Uv1rwjHV39hXlvyrexcoHOc5lC9XJfAeFrPm+snM8KCMoKecznjZWReq7TsHxCfY2Mq6+S904ZI5HwwDVBNBYz/s7r7I3K6KqBiSwfnj/ZmMxiMs+Dert9B3oLc67zYVmhbI2VNVi321h547vDEz5QYD5eAQEAIyFYL1HWQDyXUH352u3SbJnt7p5Q5lBdl37ZCdVNUfdny8yon5ESqmtH+dkHzbYVquuFWlNtk5QaDdX37e81VkBYP6w5872TEo6r1X9ukcpI2PiyHle6VLUgZYWyrYAoYlkhxQqI9HSmlYbq5h92Wq7KPK70OEvXAyL5uHrfrCb5xSuWZeyFLitkdwUEZYU8pR+uaKheV1UhR00/SMYMfvCituzrlD/uaB9abZPpuKooZHPgTCsgithYOVtZoXJeAeFrPi0rpGisHGA+bKyMzDRUP35mk1QMhur9sZise6dDph00yjjPVVeG5byjp8lEy6SFl9/el3AerKkMl01ZITcaKx9cfzDBupd8vgICALLhqqjEJYfq7V19tkL17e92Gd+bSw9dFYumD9WdNCnp2CWy390LMb3Qmlo/NSVUN5vf2AnVz5xxpvFvqdEZxcm1+oteVmikFRB2ywpNWyyFxAqIMmysXAA0Vk5PZ6onh+p6XD33lz22jyvzfaocGiu7UVaIxsoeo7EyyqSxMtLT8i86Uz0xVG+VdzsH3gcyherW86Cu2LKWQXMFjZVRCD5cAQEAIymtxAYJDm0anRKqv7b1Xduhum7XOn3uios0v55fqK4n1A3LXT9xVoYr8wrV9UJtXO04KdWyQibflBUaaQWE3bJCBZRpBYTdUL1UV0AEsrGyD8oKmSgrNEA/o9HyL8mhupMPa7RclXXZvOsrIJSPGitnKytU7isgfC+fxsoF4sYKCC0rVKrnuZJdATH3HJHBMncoPKN3SB6hup4HrWXQyqWsEI2VA0hXQCSH6j5YAQEAIyFYL1EaPk0bOyolVO8bbPI2UqiuF2pap8/1maF93cNNRXIN1Y0TauEudJJDde0obydUt16olRL/lhUaYQVEkcsKsQIiYCsg3GisrMeU9f3MJZQVyhw2mDXVc10BoT0gNBx0n78bK6cL1VkB4WNaVihdqF4CjZWXTF+S8IEPCsyNxsoFXAGBVOYZKjlUrwyHbIXq1vNgOZUV0sbKyaF6KTZWLhkBWAEBAJlQY71EWQPxXEJ1vVArRNQgg/U404bquvTL2qxtpHpqeoHvsv19+2VT66aEUF07yjsJ1TVALUW+LCtkroDQ2aE+Kivk1gqIUGH+Kyyqkm6sPPbQgtTrH3oIygq5XlaotbOAs9F82lhZywo1dw6fa1kB4XM0VkYh+LCxMjKLDtZUt4bqWgbNSaiu11+elRWaeOTA9zRWRomsgACAbErrr2ykSA7VDxpVaStUt16ouS5SkT5Ud9KkpGGKyGh3L8R0WfuGvRtSQnWz+Y2dUH19y3pp722XUuPPskIurYAoQFkhK1ZAlGFj5QKgrFDAygr5dAWEG2WFSnUFhG9lKytEY2W4uQKiiI2VkZmeobQJaXKobi2DNlKorqv89nT0lE1ZIRorB5hPV0AAQDa885Sw/V39KaH6goMbHYXqHd2DF0euCYlMXphfqK4n1DnLXD9xdke78wrV9UJtze41Ump8W1ZopBUQPigrpCsgkkN1VkCUeGNlygp5ypdlhXzcWDlbWaFyXwHhXzRWRuk3VkZm3X1RaevKPVTX8+Cjr++UmNsfIvu8rBCNlQOIxsoAAoq/ikpUNB6XN7a3poTqZkd5O6H62ndahy7kXFNZK1Jdn1+obj2husictZgcqmvzGzuhuvVCrZS4UVaoIM0BR1oBYStUL1xZIVZAlGFjZR+XFaKxMo2VrdKF6qyA8DEaK6PEGysjs+jgNXeuobqeBwtznnOhrFCB0Fg5gHy8AgIARkKwXsKzG/riuYfqeqH2f28N/sHmJuss8+RQXZd+OQnVC7B0uKGqISVU147yTkL1MVVjpBT5sqyQGysgClBWSLECImArICgrFDj+LSvk78bKmcoKsQLCp3xaVkjRWDmI/N1YGelVhlJDdS2DZidUN8+D5vmvHMoK0Vg5gHy+AgIAsvH12aKnp0euvfZaWbx4sZx44ony4x//uNhDCgxztV9yqK4d5e2E6tYLtYJIF6o7bVJywN2ZoZFQROaMnZMSqpuNteyE6osmLEqYHVEq/FlWyKUVEAUoK6RYARGwFRAjNVamrJBv+bKskE9XQLhRVqhUV0D4lhuNlQtQVsiKskIlsgLCSahegBUQSE9PUUdObUwJ1Z007J7UUOP+tZOPywq50VjZfL+Cx3y4AgIARuLrK9lbbrlF3njjDbnvvvvka1/7mnz3u9+Vxx9/vNjDCoyG2tRQfd07rY5Cdb0P17VuzT9U37hi+OcuqamoyStU1wu1eePnSanxbVmhkVZAFLGskIkVEGXYWJmyQp7yZ1kh/66AyFZWqNxXQPgXjZVRHisgkF5NZeT/t3cfYHYWVQPHJySEBEIoggKKih0BBUGsnxVUsPcCYsOCYu9i7x0RFBS7oCKK2EBQQUSsoIANCzYQVJAikEBI+Z7fJLPMvrl19+7uvZvz1/uQ3fvuW2bOnDnnzJnzpkUL5004qG4efMiOW+XyKOtaWaF4sfKIMaQ7IIIgCEY2sL5kyZJ07LHHpoMOOijtsMMOac8990z7779/Ovroo2f61kaCuevNSTtss3itoHp5o3wvQfXdt9s8bbxgwIF1hst/z59cUN2EeuVFg72v+hYbQXUvv+klqF4barOJoS0rVDOEZYViB8So7YCIskKjxtCWFRqBFyu3CqrHDoghJl6sHMzyFysH7ZlbBcQnElTP8+BUzHNDXlYoXqw8YgzxDoggCIKRDayfd955afny5WmXXXYZ+92uu+6azjnnnLRyZawe95LdMHcSQXWG2s7bTsE2z6oe51pBdSvMvQTVy4Q6BeU7WgXVvVG+n6B6XXN0NjD0ZYUGsQNiwGWFEDsgRmwHRJQVGjmGtqzQkL9YuV1ZodgBMawMZ1khxIuVR5HhfrFy0J5mUF0ZtJ6C6mvmwWJLrAtlheLFyiPICOyACIIgGLnA+iWXXJI222yzNH/+DVksW2yxRa67fsUVsRLZjZLb0Ayqe6N8L0H12lCbEloF1W396jWo7veLBp8Zet5l560VVK8zhrsF1S9deumszG4Y2rJCg9gBMQVlhcbdYuyAWPderDwFRFmhESsrNKQ7IAZRVmi27oAYWoa0rFBNlBUaMYb0xcpBey68bMlaQfW+Xti9YmW65Orr1pmyQvFi5VFkuHdABEEQdGLOqilZvp48xx9/fDrkkEPSqaeeOva7Cy64IO2xxx7ptNNOS1tttdXY73faaae0YsWKtPXWs/ulFYymXsg9OielhevPTdevWJnWxBpysJ2BtWiDG+r0CThct/wGZ0vAoWQBrly1Kl2zbEXaeIN5ubRMR666uMcbW7V6QqyynPLvi4HOqDGRrr/h6rf15HtfldL1S1ZPrOVJZJcuvzYlQe+ypbUFjKdeWLVmMveymzlrliU4fuXfG66/YTbAakPN9/X2ZQGLa66/JmeELN5gcS4FMhtkSctsOH/umtqMq8bkhlyRFb9eOH/uuK2qTblaf+6ctHzlqix7A5GlcnPuRzu7NrkpRli+6MKUll2z5p59P3e1XI0tO62Rq7zlcFVKCzbrKEv9yhPZkbUu0FDXaCQzG61/gzPUSa783ZLrl6RF8xfNKnkiK2LpxeEraLMVaxRWJ7mim8piTUd56kuW1ugVJTzGdE26Qa7mL1pz7IpKrtJ4fSVYsWzJ6sz3AcoSFs5bOKaPSjYW+ZAx6vcL11847mVdTbly3PJVy7PszRZZ0hwL5q2e54ohQxzI1UbzV+sCv1+6bEVbuSJLV1+3Ii1eMGjdlG6Qi3yh5TfIlXlu3oKU5m6wWh2tasrVGn3lGDqqyzzXqzytWvM//U9myEudcQx63qJN/ncHucpnGoG5rh90W9Etk8HLcLuWMp7oPJdlpjo5mRr7ec5q+8oxY3+/Rq7yHDQ185z5qgTUUeSK3VRYunxpW7kqtpMkhtmgm1BM11Zs2cc7HXqSpYnopmwrVTdY5KrMc2xyNnahqa9WXp/SsqsHLk+d6Gdng5fhDrye+EzOdeUl23NusKlXz1dz0kbz5+adgOa5cqzvzHOVtZuWXLc82+GDtZsa8xybo9i77Nj5G62Wj9K14+Rqjb4iiwOc5/KtrZmfLBjTRbVeyXKW/7/6phzDvqop+mpU5rnZo5tWrY4PNHfqkassG2suSK7q/qjlynnc2AabDCQ+MEjdVMcYZiticUcdddRM30YQzAhDG1g/8cQT09vf/vZ0xhlnjP3u/PPPT3vvvXf62c9+ljbd9IZtY7vttltatmxZ2nLLLWfoboMgCIIgCIIgCIIgCIJg3SIC68G6TJXeOVzc5CY3SZdffnmusz5v3ryx8jALFixIixeP385+5plnztBdBkEQBEEQBEEQBEEQBEEQBOsaQ1tjffvtt88B9bPPPnvsd2eddVYu+7Lemnq8QRAEQRAEQRAEQRAEQRAEQTDdDG2EeuHChelRj3pUevOb35zOPffc9L3vfS996lOfSvvtt99M31oQBEEQBEEQBEEQBEEQBEGwDjO0NdaxdOnSHFg/+eST06JFi9KznvWs9PSnP32mbysIgiAIgiAIgiAIgiAIgiBYhxnajPWStf6e97wn/epXv0qnn356BNVnKf/973/zi2rvf//7pzvf+c7poQ99aPrkJz+Z6+sHQT+ELAVTgZdjf/nLXx77+alPfWo69NBDp+36t7/97fNLu4N1h6uvvjodf/zxPR1Lvz3ykY+cVpkMhmO+O/HEEyf89695zWvyB2SHXgvWTUKWgkES8hQMkloe+rHV++W4445LD3jAA3o6NuQyCIKRCqwHs59///vf6fGPf3z661//mt73vvelb33rW+kFL3hBOvroo9MBBxyQVq5cOdO3GIwIIUvBVPHtb387HXHEETN2/R/96Edpl112mbHrB9PPZz7zmfTVr361p2OVyTvvvPOm/J6C4eL9739/Ou2002b6NoJZQMhSMEhCnoJBctBBB+XPMNnqz3zmMyOZIQiCccwb/2MQTC/vfOc7001vetP08Y9/PM2dOzf/btttt00777xzzjb+4he/mPbZZ5+Zvs1gBAhZCqaKma6YtuWWW87o9YPhlbm///3v6XOf+1y6zW1uM+X3FAwXM62XgtlDyFIwSEKegkGy8cYbD53cbbTRRtN2rSAIRoPIWA9mjMsvvzy/lPbZz372WCC0sM0226THPvax6Utf+lLafffd06mnnjr23YMe9KD06le/euznD37wg+kVr3hF/vcf//jHvDXrTne6U3rwgx+cs5ULVpZf/vKXpze96U3pLne5S7rHPe6RjjzyyGl51mBqCVkKeuGss85KT37yk3OZIAsu5OU///lP3v6prz/84Q+nu93tbmm33XZL73rXu7KRrgTLa1/72vTPf/4zl2S58MILx3ZI7L///mmnnXbK8vHjH/947DqOO+SQQ/K5nve85+XfKWnm2q5rq6mFnoItrkoYOZa8eXH3L3/5y5alYJYsWZLe+MY35nP7vOENb0jXXXfdNLZi0A8XXHBBLmNH5h7+8Ifn0lT6n8yRB5l9diPc7373S8cee2z+G98ddthh6ec//3nu+06QhRe+8IVp8803X+s759lrr72yTD3mMY9Jv/jFL6bsOYPeoD/06Q9+8IMsB/re2Dff6CP64bnPfW4uBQTzVjmOjvrDH/4wNgd97Wtfy5+ydf3Pf/5zfheRY+mlpzzlKen888+f0H2SxYc85CFpxx13zHrmLW95S1qxYsWYvrIr7CUveUmW67333jv97ne/SwcffHDWnfe5z33GlYFop3eDyRGyFLI0SEKeQp6GVd7qUjD/+9//ss2jP+9617tmn80xrWx1cvm2t70tPfCBD8w2luP66fPvf//72R4ns673spe9LF1zzTX5uygFEwRBkwisBzPGb3/721wbltPfCgFLE6yJU4ChBLP+8Y9/jAs6nXHGGen//u//0rXXXpsnyF133TV94xvfyAHTj370o+Pq1J500klpgw02yAYfI09QQ+mQYLQJWQq6cdVVV2Uj/V73ulcuEyTAqf/tcCiBb/0n4C1YLQtYsJyx/7rXvS5ttdVWuSTL1ltvnY8nC5w2W085eK961avGZctYwHEuRj8H8mlPe1qWP8FOToH3h3z3u98dO56TKuuYPDnuOc95TrrsssvWeo7Xv/712TEgj0qA+PeHPvShaWnDoD/oJDK3ePHiXNZFnwqYF37961+n3//+9+mYY45JBx54YA4QkDFyZZsx2fNzO5zTosoTnvCEtb4jZxxK1yer97znPfP16b1g5qF3jGF99PnPfz73v8Vaeunss89OX/nKV9Ipp5yS5YU+ohfMR/vtt1+68sors3xYNPFxrFJnFubs2vr617+e9YlgkyBTv5gjBTgEEb7zne9kuXQNQYbCZz/72bxQbX7cdNNNs35TV5ksC5JYdHZP3fRuMHlClkKWBknIU8jTsMlbjQSYSy65JNvX7HRl8Px9O1udLUTWyCsbvdc+9/sXv/jFeRHIYgw7m08wmRruQRDMbiKwHsxolnGn7VSbbLJJ/u/2228/lq155pln5gnxoosuSpdeemk24kyqgqHf/OY3041udKOcqXDLW94yG1CMORNvgZElSHqLW9wiZ5v6+Te/+c20PG8wdYQsBd2wWPL85z8/191XIogjaMfCn/70p/w9R49hf6tb3Sq/CPIOd7hDDnzOnz8/b0O1E0JJlrIjQpa6rJqb3/zmeRGGoc95KzzxiU/M5xIsZ4jf8Y53zM6g3z360Y9O++67b/rEJz4xdrzjBOFvfetb56wbMnvCCSeMewYyypmUpez+d9hhh/TWt74178oIho+f/vSn6eKLL85lqvSvjHX9XpgzZ05673vfm253u9ulxz3ucblkFVlZsGBB2nDDDdP666/ftgwQWbPDRv87TxMOqmwq2VZkjmy5zlFHHTWlzxz0Bl1ExzzsYQ/Lc42+Nx8Z13ZA/eUvf8n6QRDAy7jNQ+YjwSkBI3MdOfGxW4F+e9KTnpSz+ugkuoGekSnaL2TvHe94R9aPN7vZzXJ2KP1VdCUsJgo4mP88w9KlS/OiH/1F7ugq82o3vRtMnpClkKVBEvIU8jRs8lYjI52M6X8+nd2hdiW3s9VlqkuuIhf99LnFF3IjccG17n3ve+cEhZCPIAjaETXWgxlDIBIy6ExaTWz3AgPMarTsAlvZTbgCqTI1YcsX483kKzBav+RPsKwuDeI69c8mZ1mFwWgTshR0g6EtyOilkLKEOXW2LjO4waBftGjR2PH+3ak/GeX1sahLsnAyCzLWm7spyJbMrUK5D6y33nrZWWxulVZPmxxyTAu2p/oEwwf52m677cbJla3HdjmA40/uChy/WiYKFgEt3hQENMp2acHyVpAdzmONa090+30wWGr9IQBV6ws/L1u2LPeVTDsLKAU65m9/+1vLgJPt7XYnWOA1hymBsMUWW+TvLcZZMC4UGWwFOXQPMgOLnqR7BBYK9TzrWNfxX9jJBc/guE56N5g8IUshS4Mk5CnkadjkrcbOCMFxQXcfSS6SFtpRn6+bH1BjwUiw/vDDD8/BdB/HS7wJgiBoRQTWgxlDcGjevHnZ0GoVDFWaQVDCpCgAIbjgIzgqy1gJD4acDGMIgplkGWntkAHYJF6yM/qELAXdsOgiq4WsyDqRhaK24znnnJO/Z0D305/NWv7N44sD1/x3nQ1T6oKC/Nb4ToC9m8wFwwsZacpQ/XMvfV6CCXUZKrsZ1JbldJYMdJlY9JwdDQITrWTO+cldMPM09Uerftdftrabi2rqhZqCuq92PWy22WZ5h5XsPwEs5aJgS7uSZYUb3/jGbe/t9NNPz4syAhDmRP9WcqGmKbut7r8XvRtMnpClkKVBEvIU8jRs8lZD5k477bRc/kd/8dOUflGOsxW1LdRPn0uusiBEZiWveFeOMkNBEATtiMB6MGPIDN5jjz3SEUcckf/LGLJ93YSpFqy6feXFf7IRvJzSFjCZnLbX24p4xRVXpHe/+935GIFTE22dSayen3IOtnMFs5eQpaAb6pkLSH7sYx8b+x0Z6WUxpFWpjX4gT80XR5bFnoLsmdppZdTbwtrM7CGPvitZ6mT5Ix/5SJbxYLi47W1vmzP4vDCrBBy8D6Ig007QoZSwsjBYMtBrmRNAtyBYc/LJJ4/7WakXL+N6xjOekX8mW5xF+rDg59jdMDrow3/961/j+l6ZKH3qZWxkpOgvtYe9gE3mZwksCTaU7+2MqHdHdHs5oOCDWsRloVm92bvf/e7TqneDwRGyFAySkKdgppBtbnexxCgfiQRkrxdbvZ8+5/N539EHPvCBcTabkkJBEAStiBrrwYxy0EEH5bIctrnLIJaFt2TJklwHT3kPW75KMNQk583csjYFBwQoBENtb8cjHvGInLVn9do2RUFVtfh6NdiC0SZkKegEGbA74Sc/+Um64IIL8suKBCeb20xbsXDhwlyXU5B0IuV+1PsUOLdt2gtSBcG/8IUvpH322WfsGM6nDC6ZXGRNXVD1Q2sEZ2Vq+f7cc8/NCz0HH3zwhJzKYOqRWeUFWl7wRo/IJq/f00A/CRD4Tm1135OVInOCERdeeGHLcwto1B/Bdw5j2fYsu0o2u0x3Mieby4KMzMFgNLBIIkNOHwoeKb3gJWrFsScjFohl4dFv5MlCG5kRgDr66KN70m9NnMvCny3ytr+rjewdEhM910T1bjA4QpaCQRLyFMwUFnS8W8aLTdnkJ510Uk6S6sVW76fPHUvO2NpsKIlXbO6QjyAI2hGB9WBGseVPQEH2g7eAc/ovu+yysW2BsowFF7zh3Uq0F41AzTwvwbGVq2RACDodeeSReUIVfJJZLHClHm0w+wlZCjqx11575QWTF73oRTnjyUtsvXxWULOboSxwLXipjmOdWd4rXi4qQ8Y2ZudQs5FD6D4Ktpt62SV5U3/005/+dFq8ePFa57L92oueOLYWkSwgvfSlL+37noKpx5bmQw89NAcX1OX0fgd10UtJH0F3NT/pKrtmBCeKXtpzzz1z2RYv8qpfitsre++9d5YLtWjJfVm4iWyr0aHuQ+UTBAPoDrVfQaY4/PrXonApi+Dn4447Li8Mkx3y1w8HHnhgXkT2AmZ6xlZ6W+Inovsmo3eDwRGyFAySkKdgplA6SE30Aw44IMuZRRu2Uy+2ej99LimL7EpSkPAgIE+O2edBEAStmLMq9jwFQ4rJ8phjjskGlJffBMFECVkKhhlBdpRSRMHsQOCAE1be3QABdDtgbGE+7LDD0imnnDKj9xgEQRAEQRAEQRBMnMhYD4YWAVAZCREIDSZLyFIQBDOBrCplf2yL//GPf5y3zzdL/ARBEARBEARBEASjSQTWgyAIgiAIBowt6x/60IfSF7/4xRxM9x6Ifffdd6yOehAEQRAEQRAEQTDaRCmYIAiCIAiCIAiCIAiCIAiCIOiDyFgPgiAIgiAIgiAIgiAIgiAIgj6IwHoQBEEwaX72s5+l29/+9lN2fud2jYnyjW98Iz34wQ9Od7rTndKTnvSkdO6553Y8/rvf/W6+Zv150YtelEaNCy64IL8ssx2HHnpoeupTn9rTuZYtW5a+/OUvD+S+fv/736df/vKXAzlXEAxCj/QzFqaaFStWpPe///3pXve6V9pll13Si1/84nTppZf2NEYf9rCHddWVNqt63vvc5z7prne9a3rJS16SLrvssrHvm7rv7ne/e3r961+frrnmmo7nfcADHpCOO+64NIgX/5544olpKrj66qvT8ccfP+nzzFZZ+ve//53nut133z2/ePld73pXuu666yZ8fjJRy9Juu+2Wz6+PO6H9tOOw9Peg58TZKj9///vf07Oe9ax87P3ud7/8wu5ONHVN+ZQ+I3uve93rstzc+973Tp/61KfWsjvL5453vGOWt27XvPDCC/Px/jtstkx5losuumgt+01ZOd+VceHF9z5TbbvR6dq1He5nxx13TH/605+mbE4YBL2213Tawq3m5qOPPrrjMf/73/9yacF73vOeeW72TH7XZPny5emRj3zkWnrU+37YCXe+853Tfvvtl+WsHhetPr/4xS8G/KRBMPuIwPqIwzB9+9vfnu5///tnBfnQhz40ffKTn8zKdDoxEXz84x/PE+hd7nKX9LSnPS39+c9/HkjQqhdFf/7556dnPvOZ+dru4YgjjkgrV67M35nQ67/ZYYcdcr3bbob2IAOFP/nJT/I9TkW/77TTTvmZZ1u/F/SjF/494hGPyDLumT37FVdcMWVG4kT49a9/nQO27lEAtylfP/jBD7KBw9l4+MMfnr7//e+PfccoqtvEOXoJ/g7K+erFkJsMAiTdnOip5Mwzz8xG6POf//z07W9/O/fBs5/97I5BInJM1n70ox+NfcjdqMEh7SZHvaLt6NZB8IIXvCD97W9/G8i5gmC2YV494YQTco3+Y489Nl155ZXpVa96Vce/EYB62cte1jK40eSYY45JX/nKV3LAjO7/z3/+k3Vkc36h9374wx/mcU+PvPe9703TgfvqtCA4GT7zmc+kr371q2ldoR9ZYguwyZYuXZrl4uCDD06nnnpq/tvJnN88VGTp85//fD7m1a9+dZoOprK/Bzknzgb5Ya8/5znPSZtttln62te+lt7ylrekww8/PH3zm99se/7axvLZf//9001vetP0wAc+MH9P5/zmN7/JfsCb3vSmdNhhh6XvfOc7Lc/xve99L8vaRz/60XzP08FU2DLrr79+OuWUU9ay3zzfnDlzxn6ms5t6e6bk9Prrr8/9vS4wle0prvHWt7614zHGwXnnnZfHJt9ffMHCdxOLUI6rsWBDZh/zmMdkG2DzzTfPvhHdv/XWW681HgXgxRl23nnngT9rEMw2IrA+wsgqefzjH5/++te/pve9733pW9/6VlaWjOEDDjhgLLA8HXzpS1/KCvwNb3hDNmBvdrOb5cAV43yyQatuit41GHI3uclN8iRhwmGAWdkvbLXVVmN/e9JJJ+XjTUK/+tWv0nTw9Kc/vadss4n0OwNW1tls6/eC7Bj9+bznPS/L+Lvf/e6cHcL47pRFNZ1cddVV+bkFbMs4JF9nnXVW/p5hc+CBB6bHPvaxOeAuaO65aoNnr732GmsTDong+nOf+9yuGYLTZchNlH/+8585G7KdTEwHl1xySTYcLWxsu+22uX8szHRa7PLd7W53u7TllluOfRYvXpzWZeKVLEEwfVmir33ta3M2+W1uc5u8gFrmk3Zz6xOe8IT0j3/8o6fzC1rvvffeOSuZnjOf/vSnPx13zCabbJL1HtuKrWU+mqos8unUNeuaHutHlv7yl7+ks88+O2ep3/a2tx3LLmfXTOb8G2+88Zgsbb/99umlL31pOv3007PtNNWELE2f/PBz9O+b3/zmdMtb3jLd9773Tfe4xz066q7axrr22mvzwgu/gMwsWbIkB/MFjyVF7bnnnllXNRNByt9vs802aY899sg+4nQF1qcC405gvbnzgs8qK7+gjXyGQU6Nbfc3VbtDhomZ1CnGhDjGG9/4xrxLwLiwAGPRpfaJ7Rz53Oc+l8dsjfHk7yQj0vF0PT/t5z//eZo7d+648SiT3bXe85735MWeIAg6E4H1Eead73xnXtW3YmkSFjDiKB111FE5Q7MOLE81AoGUtKDpdtttl40qgat22+P6CVp1U/SCgjIorJTf6la3yoacQHadIVGfQ/DXSu3d7na3tbIeRrHf582bl8tbzLZ+L+U7ZEvJOCLbZFy/eXaBhK9//etpGLj44ovz4oYsHvcou57BUtqBU2q7ni13t7jFLdI+++yTn6MOUixYsGCsTcjxK1/5yuxkNIMdw2AkNrf6Mt6acKSe/OQn5wUDcMxkRbbbMdDc9i0riUOmnRiCzW2YHC/f+bziFa/ouIPBPVh0gjYlTze60Y3SrW9967Z/Q1Y5hv3sShG4t+An08qiU4F+svhk2+auu+6a+9bvys4YbfGFL3whb7sXwPK9Z4Q2efnLX54XDO0K0SZHHnnkuL77yEc+krdI0wcWoGSklJ0QjGVt2evOBm1tRw/DW9vSqxxr96kPGeBlC3Wna4Nja/eGNjF+Gf5wL87jfIPYlhsEU0G7sQBya2HboqFFUPL9u9/9Lmf4Ggvmg1q/F33oWGPcQmzRh62wECuIBLt93IsgeDuMc/coE70XNt1007yLykI9nSgDT0CsEwsXLkz90E13WVguu7zoPnqq/B3bwqfME3TOIYcckp+RnullDvn0pz+djzFPma/Yjv7OdbTXVJYuG1VZYn8oo7HFFlusFdQbxPlrWaozb7uh3/Tvhz/84dx+2kVAqNgu5h22qL4mZ29729tyBm2r/iYT2tu89ahHPSrbWE1ZaJaMYGvqv7KbUP+0mhPXdfm58Y1vnDPbFy1alPvGtfho3eShoH/1H1up6Ag7cfVrgQ11zjnndEwi2nDDDVM/kC2Z9fQEf4rdYuGnX1um2HN0nvvkq7QqP1KXARIoFSQt9iz4sWzK2n6jr/XnRhttNHae+txKgbzwhS/Mx1gEYRcbt63k1PmMEbYqG95x/cpFE77Nvvvum3cYtCpLUmAbF73sPv7whz+Mfef3EuQkIekHY9p9ey73xcaud2QJ5Ltn9+tv+/F/u9nek2lPz9TUCc0dxnbvPPrRj87PxWfU347nJ3YqFbXeeuvlbPnmfE0f1IlYZMr9y0ivMXY8U62LBectqDb5wAc+kBfsO/lKQRDcQATWR5TLL788T+wmPkHjGiv2JiWTF2NGYLLwoAc9aNz2yw9+8IN5ssAf//jHrPSLUVFnBHRzkAQUTQwFBjOjql02Sr9Bq06K3uQiuDN//vxxx3VyBCZieJmAtYlrM65M8LYnFgQXS2kWgXtB7vJ3MFlqR4Y+w1zWLMNL8LjpEDbr/zFmGc0MEYsBgoIm19qoY5DOtn7nWDPob37zm4/7PadPFrvnAgObM8io8RxNY03wQJYLw4kh08zq69YGsp0Fw7UrQ7eJxQLGpOd3LzJN7ChgjME1S3vXdMrWsmDSlOlOFJk5+eSTc8YOOZRhWALOnExZ9Ax3ciQ4oV1aGXLFWNe3+tw21/KdjAjGrvNynhhvHDiOHBiJ+swxanKWDArt1q2UQUFwyHiyiNRqGzf5NfbIouOMdTsAusFw9ewcFRketYNSQ4b1n90D5EF7Kk1QDO4mDFrjk+PLgXUvHN9SFoljqg4nY1iwxzioHS3Oi8VCMkze9GGd9eO7DTbYII8Hjp97cX+woGYRkV7UbnSDttffsrw8r597qVWrjyxYKCdBz3gGTo6yRc6jzcruHzuJOl2bztLfZNC56CbnJTfuxXmcr5dtzEEw3XQaCwVzkDnBHC5QrRQauTcWSnDFfEDPGwf0oUVWW7fNQYIu3TCvCzJZpO20CPWUpzwlj6deg9/sD3OMoJu5nb1Cr7ZD/XWZpPV83wuddBf9wH7TJu94xzuy/pNJT4dYDPXR5gU2jcBJq7m0CTuInnesa9P19LKgWAnA0mPTwSjJkkQHQaaCa9LzEgMGcX4I/uhrAahesm3rIBrZIQMWqs39agVDUItNb97kD5A7NZDb9bd5S9vZAdktwC/Aap7SJ/pHcFsftJoT13X5qXFdekk7saO6YXHEPbG3692GduXWtjD7n13ZLpmC3WWhsF9dxT5TTtU93OEOd8gyps36tWX4ZGxFvp7M+W6U3a3K15T68YKf5Im/VOw3pTTZop36R3sZH8aGRQnnbCen7s+CDD3J5p2oXNQI5JpX2ISt4Bu5nrall/nA/I+SaAILM/xOAW9+Bf+J3JF581uZp9jR5Jyf5VlcW8KdduqVTrb3ZNqznW9RsDgg2YevZNGOnJB7yYLFVnfuekGpTsIyb9djwv3x0UoQnc9kjIhXNPFMFsBq2O7/+te/xv2OTAq2k4sgCHojAusjym9/+9u8ii8Q2AqOkmChCacEAgXRTJR1NvEZZ5yRjWgZS4L0JdArCGsC6TW4Y/XTJFOQ0eD+nG+yQatuil6GTVnlh2dhUHdyBJyHQc6I6gcTnjIy2ohDUMqYmPwFVhmvAmvaQ6CNUVYcQ3/LQCoOgu1Z7lPWTC9OsMnQajr0qeBv06ibbf3OkBEgboUALgcCHCkGqXZwnzL6BdJlgoBDrS/cn+flhBR6aQMODMPH37Ubc/AsvmcwWXgpNeksAjHUa6NKkFfQuhXaUP8ysnpxaJvOAcOTM6zuu0AunE/mkHYik5xbgWtGYStDjrFHhj/2sY+NWwxxjCDL4x73uHx/nCfHySaE9pe1oi0sdJRsc5lhJcOqG8YFg9lClcBLXS5ISRnPxrnU1oxJY4+814sprbCLgPFrazvHsFWGRnHwXIfhysgnExzxdvWFOaucLA6fHTEcOu1ON5Fh98bgdr8+/s3BsO2+XvTwLMalj74rkHP3ICOIXPu5LOpxCDh9dCA5U9KHkyIYQEfpIwGHMlY64TgBLgtWnsNCgW3H5FVbOF/Z/eO/na5N73gu+sF4pPuMK7rEvfj7XrcxB8F002ksFATYBI6MS/MDnWEcGwsWao0FpRHMMZxm87gdTeYa5+2lFrp5hL42VxhD3RIGekXwh4NuvhAwL3ZETSlvZh5zfXZOv+/06KS73IOf6QeBAjpTGwtKuDefOtvuiU98Yt7N1dza3goBRTsXBVbNXyUjtPQtvUiPTQejLEvmKv2udMtkzs82LrLkngUo+w3YsB8E0MmAa7GpyjxJlswlEovYwQKCsn7JUKv+Nkebb2u7rJMs6ROZsfrHnOdnfdKcE6eCUZUfgUn6RVKB3QXdcG7PwbYvFDuspvxc+w5ky4e/wK+jU3rxrWrIi6QoiTxseDtR+V0TsWXoOn1BHjuhX4wF+klfyBwugU4+EttS//vw20rd+VYYA3QnGWE3s8ktArSy3WBhy1jR5pORixrJNhK92PCt3u3DZjTu2fb0Mt9Bm/K9CvpAIN198X/Y7cae/xq3xW52DePAIgedIADP9+j24tqaTrb3ZNqzF1l3rDbXDuIKfB5jSgk2OHcvyVX8IXGHkrhkIYgPyB5vtWjYbkw1fXHtK/CvxE8QBL0xr8fjgiHMWEe7VdGimE0GpU6bjCSr0bY9MrAYmoI+JhJBIxO5SQ4UvUnFKqiAWO0gmUQYDbJFOUhKgNRYabdqLAjbynFpBq1kzAqcmdhbvXyjH0UveCpgJmhYG+2uWQKGJlIfwd26Vl0vmLhLxsAznvGMsSxZbWUCY0SZhLUjw8H9FMdQn5T+cizDjdHfDX0kEG+HQgkEclKsRjeNutnW74zKboE3AXuGBeOqGJ2cL3LCWGMgaj/ZbvqHceb+SxmgXtpAwJRh1wucMIYfo8a5yEmNoLTMCkZVbSS7D4sY0LelrmW3zIcmgsYl+O8lqcVI1N4cAUasPpWpJRisX2tDrlBeitvE4oi+d64CeXaf+peB68NAlDVRsiA8X6sFl1bIRGHgFwRSyg4T2/mNX7s+aow1mfX62WJAgbyWbY/60cf4IK+yGmVu1Ats2kzfyc7XLp7N8c5vYUufMO5LyRMyJTuKfJBjDpexzwD395whWYD1eOHs+o6cFPnmhNXOSf0iYjqldtq1te/pOe0r6GEHS8GYavUyLU5us21qOAR0EodY1peFCmV/Wjmo3a6tDTga5N+zk3Xvhui3nEQQzAS9jAXjsuBYuqXM6UU/clYdZy6x+0aQqZzPHADzal2DuH73S9ELFvUEn2XUCTz0Q/P8FtnN6Rxx4xTmZP+mF0tgy/zs3+ZYNqd5lp6jy80DAu8F9pZdUE3a6a7yNwIA5ky6QuCuU7Db3NUr7q8EqaBvputlmbNFlgTVJRPYkWZX3mTOzy4pOwyVWCBDApRsejLRag5uwk4zN7aaJ92XhSHZqq5tQaWTfd+vLNX2Bht2OmVpVOWnJMWwZ+0coW8EEtudn/3btOvcezPgV36u/aeSCMNuZpvRK/SLRcNWdk+dEFSoE0iKnJEv9l+/tkzdH53Qj+651lUoY4XOZXtKxDEGjYF2yPzWvhY+fPi4xlIvY4De7SQXNRYB6lKr7N+astNIadBmGUe2Pb1S744iH7W9KrBf0Mf1ffqZ/V/O1Ux04ueXMozN8kHtAu7tbO/JtGe/8xOKD9p8H1un55AwZZ7mlxR9YBHOuCw6u0m7MVWXZdUGEsqm62XlQTBbiMD6iFKyD62kt5rAS30zgWCBHgFK2aoCrBykYtiUrUMCPIKttQI32dcOUScHqTaSOFuMrnalGUw+/QatelH0vmfsqkMnK7d20ATPGFjlOJOawKLtaDIpuhkK7QyvMsGb0ExiJl3GfDG8bIlrBeOol6A63Kv+ZmwU48OEvC70u+eutwi2wuq8AHGd5WLxgDPC8HI956kzRxj8JbDeSxvUBpOFinYBBQ6XvvGxxZDM1YF1BpOfBSo4SXVAUhC7bHFnaOorssnYYSRxPFuNi25GYpFR2X7+hqzaMmyBqJNT1M5IbCXTZNTYKAYhvWT7pHYwHji7xkPJNGmVRdGUqWbd93LdkvVum2iznJNxJSux1HWHRTiZM/qzNmQFt8kH3VDvTigOVTPD2/H6hTzKhiv3W+6LE6FckAUwH8EiY7BdxonnqDP4m8fVz9/qpUG+L38vk6a50FUWS2o4rc224bAVZJpb0OBgWXjzbzsD2t1/p2vrY86stqe3yYA+8+lWyzkIZppexkJTF9b6vIY+tNBG/8jEsz2bnSKIXRxhC1I1FgjpzpJEwBlmA5Skin5ont/irmzMuq60nUtKLlhULnOpa5f5hH53/+XdIHYs1Xqzlb7ppLsgS48+oi8lAsjYsyhurmhFczG30xzSzvaaCUZRlvSDOVwQrC7jMdHzm5tr24QNpuyPHWTKPrSag5u0mkuLLJWydWRJewjks9PaZdr3IkulT2ZalkZJftiAEoDqkiUSI9ihki1anR/0kWBuMyPbNV2n7g9Z5PynOhBYy5bEDn4Ke8cO3lZ2TzN42UlXTcSWacpXbc/VeqrTCyFLIgq73/2WOvftIP/GlHvUp/xaO0ztDO52j93kooaPJ4Gq0CwrAtfmD2ujpt1oAay5W7ce882dH+1kub7/Ap+y2Ka1Tunkb7ezvSfTnoOcn9o9h3I94iEWrMydBX6e4yzKwHjjo/N5fddK/stLhwvGsPsVOwiCoHeGx/IM+sLkRzHLvG0VYKVEBTsExxgcgoE+Aq4maRlLgkSlliIFahIxcbSjk4MEQVMBRopYfbV2k+FEglbdFD2jjQEtO9TfN1fZnac2vFzPZCOQqYRKK0NBZmwvbQCZC1bmlXxghHIWOCX+2yq7vpVBUFMH3Oprrmv97nmVPWqFjAfOWrvgsDYsLzdqBmnrZ+qlDer+ErBvBhTIikWPujYpZ6J2PBiupZa5bPjmC2U4ArWMWqiROcI48oztxkWvMipTX/CCgeij/dRSrOvJt3vmGgEWWTay0+uxWdet53hoF86ITHkBZo5T6Qf3WL9kx+/rl/y4V5n2xcnyXVk04tAxvC2mFEPQ4opxbIFGGzblTPaMgBFDtECuOItN3VAcWbpBO5WsJH3hvM1+K46eZ3R9O1F86BPtrc/du0UFDh84kJxMY3UigbICx9IYcH0ZVSXzxO6N8nLZGvffqSQMHca5siMG5E2/lnJEtbPQ7dq+1+4WO2UVycaxOKRtI7AeDDvdxkI/1PqwYNG16MNWNoLdX+btsvOOvjDHTOQlYs3zexbBBAuL5XyC7XRqp0xLc3tZ0OO4N/VmP5j3BW0FPy02+5iDZa0KrDeDUU26zSHuzYJ52XVFz5ZMyn5emrkuypIawbI+2QjKjtQMWlbJUqs5uF9k1etfOyp82EvK0vELuvV3sZncdwnwkaWSSFNkqb5nQU7yOx2yNEryo928U0ZAspyLv8JuamU7FUpN8WbpFLYC+WBjlp2Hkk4szHTyNcrz8AG62T3doCcnY8uQr9rOq33LYs+Sr/qFkpK+7Ahw3xJX+G8WIjsh29xiqb7yEURlk6KbnHaTixp2X6fM+TJm3K/Ei7qf2Lx2Otbj3T1aiOlU5qYVziVxrJUPXO5hMkymPYtOqeeo5vzEp6ixAKRsU3PXVqvnoNsE1d2Pkmc15KaGL2OxvCR5+Xe9a4Qvp9yXcVuPR/53t1hFEATjiRrrIwoDxURki1sJtpkEbeMT3KV0S9aPDFVZHAJLAkkmb0rVymsJBJqITN6cKkrchyFTsry7IStAMMn5bCnutArPGJH1VAfm6qBVCQr71FmznRQ9h0xQ3Ra/Xt8+z2DwYXiVbJry6TdDxWTOIGHomuisDJes417g5NYTcG14uR8Or8Bk6Xe1sUuWNCNXbdLZ2O+ykDxDc5FDkFpAWD8ppWHba10v20KLwKnnE6AWuBcMrq9b6LcNSkChfNy/TBYOXJ2Jw5kogVS13o1NBqZAea8164p8ot246BWLARZ9OJ8cJdsJyYOgdC/OYQlkkCvtbeFIO8vmUIex3hZbnAHOAbkt/V+2H1qcINPaWN/KzK93JqiTaPFBkIWMCZoX45zjS8ZliFvUEaSWsaF/2wWFZOsrhWRbO4fQbgF91jRICwLS9IzSLgLinERGrD5sBYeEY2JccngZ+xwl447zaScHp8w1ffzbexDabdXsB89g7Aniezb3bAGtyJ6sfr/Xz93Qb3SZLcD6VFktgfPSb/pYPzmfeafTtQXWyYgFB31skaLoonJf2rbdC8iCYKowBn/4wx+O+9TzUi9joR+cS2DE7hBjQdCP89vpXHa/WAike1zfzi61f+mS+r0OE8G8abHWPEBX0bHOz+Gu32lirHtmH+NbeQ5BxVYlwvqFfqUrZEbTAxZSJQEU/UDX0Bfm+lZ0m0MEKOh79oO5SFDS/ODj3HaU1YGOiTLbZEkQkc4u750p/e8z0fOXa5TzaHfvdTFnNwP3E6WU3zPvugf3UstSp/62kF/eN6BN2UblZexFlpQVZF+7Z7LGNuOPNOfEdV1+6A/tIiuZbeZvLEC0KhNV47ytAvXaV6Y+e09bGc92JJcklUItp2xv12SHDMLGmqwto034p9qcriWnxVdiz3o+mfyer5RuLOdWsoYvU3Ytd0LA2rn5L+SR/VyPgU5yOhG56IbAOt+u1suCu/Qyf4StrJ/sgJrIgrF3CvDlLADS8canDHnyOggm0558JAtFxo32lGRHbgoW/8x3fHc6RfzAGOCnF1+KHIsjNCFn7kuw3wJPLfvm59o/9aHb+CjFb7RIZ+7Vx64pZmFerN9V1248BkHQmQisjzCCTQwaBjAFTSkK4DECTZLF8BAI8yJCk7vJnOI2UVPO5eWKApiCggLUDGvGkIm+26p0wd+ZRChoK/NFyZdAY2149Ru06qboGSwmLcamSaRcWwZWwWRTfs/A1l6MJG1TbyecKCYuL9CUWcJ4t7KtL8pWa4aX+9cO7ZxExkUJvAn81Ua/gL3+ZlirHVkcXc9hwjTBz8Z+96wWSmxz0z4MFH8nI5Ys2I4OAT5tJsDnOQR0GST+3nEy0hn6nC6GedkiN4g2gIxdAX7nYOApK8Q5s+gARhMjUiADpZ1qeXAP5feCCSeccEI+T72FdTK4lucqhrNzM9pt/+9myIEcaTfyTcbIn7HknLZylxfF+r1n4XgaB6UNGIHal6EuE0xw+fDDD89OhWPr7eZq7drKLejCeLYboh6nxro+dYwtqwJFjMR2Lw/j6MnCk3Wkv/Uxg7fdAgdnx/d0CCPU2BOcbyerFsboE/Ll/DKayGZZ5NLvnCJySnaNafpiEDifa5E9bck5cu+lNIN7sKjRTb9Cxgq596wcIOOVA1AWoughOtYWX7/rdG1ZN4InnBHGP0dANnupA+m8Fse6vVcjCAaNrdzspvpDl/UzFvqBDqcX6Cv6xIIg/Vf0YSsEB4xZASVjzOInfVkWGOld7+qYKOZDdXzpbrYD/UqH1Yuszm+8+hjf5m3JC90CPP1kGQsiej66hI2ipm2ZA8yl2q1V1mQvc4ga3kpmWEQwrxW7SraxBWt6qZcFx3VJliyUs5d9X/q+fNrR7fyw6FzOwy6zyG9RtlUN54ng2oJZZJlNYNepebuX/jbfszXYK178aR6vg3MWwS3MmLO1vX4ThGf3N+fEdV1+2GD0CJvS/eoDfdIMhDdRiqJdOSn+BRuOH2A8u1apQV4osiXJx7hn27IJO2W198pkbRm6iG6i27QjGavLp9DFXqKrb0uZyWI7ehaB2152Ntp5bTyxuV2TTS5wjW5yOhG56Aa7uLkT2NiXhEQXawf+CFmqy6z2it0NfCv2redyHr6B+x8Ek2lPclcWSzyzZLt6ccnCFJnyHirtQLboFD6JuAGfRwY7X6VVzMO9WEho6uiSyNQJQfRybeOZj0631XN/p/EYBEF75qzqtNcyGHoYiRQiY9jEa6KRTSR7kuKWWSHgJzhpQi/1r03ylHcdwBV0ZfyaCARoKVwGjAmCEpYRXWfxug6DjyHTzuh2fU6Nyc4Kf/l7QWbXshJcauGpG9gpc9b923bHEawxcatn3MTqrECroHvZvgXP4/lkfpvg221PZFgwBgWz6+ctpUea3wtiMygFl/QDA6W8kMmKum1lnpMBJchXXi4KE5t7NGGaWBmjtl3qV5MgQ5tB6XtOgMlc8NI5TIAcIf1tOM+2fucQM5CVLWE0cJ70nb8rE7/n90wCpzL4BfEZqyVbxdgQbJetr28YOuSi9EG/bdAKBijnzM4KwWrGWDGOZWUJEjQRbFbrXzsxkgoCxRwD9yHro13AuL43AW9bKYvMNL/nWCrVQ04txFjM0SYyMBjOjD7HktXSLu6tIIOL4yDjRpDdvTMEy7EyqrQhI1Pgxdgqzo8MJwsjtiS2yxIPgiAIgiAIgiAIgiAYJSKwPkuxminYLGOg+YK/YPYS/R4EQRAEQRAEQRAEQRAEU08E1oMgCIIgCIIgCIIgCIIgCIKgD6LGehAEQRAEQRAEQRAEQRAEQRD0QQTWgyAIgiAIgiAIgiAIgiAIgqAPIrAeBEEQBEEQBEEQBEEQBEEQBH0QgfUgCIIgCIIgCIIgCIIgCIIg6IMIrAdBEARBEARBEARBEARBEARBH0RgPQiCIAiCIAiCIAiCIAiCIAj6IALrQRAEQRAEQRAEQRAEQRAEQdAHEVgPgiAIgiAIgiAIgiAIgiAIgj6IwHoQBEEQBEEQBEEQBEEQBEEQ9EEE1oMgCIIgCIIgCIIgCIIgCIIg9c7/AxmIkbuzkPrVAAAAAElFTkSuQmCC",
-      "text/plain": [
-       "<Figure size 1500x600 with 1 Axes>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "ename": "",
-     "evalue": "",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[1;31mnotebook controller is DISPOSED. \n",
-      "\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
-     ]
-    }
-   ],
-   "source": [
-    "import matplotlib.pyplot as plt\n",
-    "import numpy as np\n",
-    "import pandas as pd\n",
-    "from matplotlib.legend_handler import HandlerTuple  # Added import\n",
-    "\n",
-    "\n",
-    "# Assuming pivot_df is your original dataframe\n",
-    "models = pivot_df[\"model_id\"].unique()\n",
-    "sources = pivot_df[\"source\"].unique()\n",
-    "\n",
-    "# Create figure and axis\n",
-    "plt.style.use(\"seaborn-v0_8-white\")\n",
-    "fig, ax = plt.subplots(figsize=(15, 6))\n",
-    "\n",
-    "# Set the width of each bar group and positions of the bars\n",
-    "width = 0.15  # width of each bar\n",
-    "spacing = 0.02  # space between bars within a group\n",
-    "group_spacing = 0.2  # space between model groups\n",
-    "\n",
-    "# Calculate positions for the bars\n",
-    "num_sources = len(sources)\n",
-    "total_width_per_group = (width + spacing) * num_sources * 2  # *2 for agent and vanilla\n",
-    "x = np.arange(len(models)) * (total_width_per_group + group_spacing)\n",
-    "\n",
-    "# Plot bars for each source\n",
-    "for i, source in enumerate(sources):\n",
-    "    source_data = pivot_df[pivot_df[\"source\"] == source]\n",
-    "    agent_scores = [\n",
-    "        source_data[source_data[\"model_id\"] == model][\"code\"].values[0]\n",
-    "        if len(source_data[source_data[\"model_id\"] == model]) > 0\n",
-    "        else np.nan\n",
-    "        for model in models\n",
-    "    ]\n",
-    "    vanilla_scores = [\n",
-    "        source_data[source_data[\"model_id\"] == model][\"vanilla\"].values[0]\n",
-    "        if len(source_data[source_data[\"model_id\"] == model]) > 0\n",
-    "        else np.nan\n",
-    "        for model in models\n",
-    "    ]\n",
-    "\n",
-    "    # Position calculation for each pair of bars\n",
-    "    pos = x + i * (width * 2 + spacing)\n",
-    "\n",
-    "    agent_bars = ax.bar(pos, agent_scores, width, label=f\"{source} (Agent)\", alpha=0.8)\n",
-    "    vanilla_bars = ax.bar(\n",
-    "        pos + width * 0.6,\n",
-    "        vanilla_scores,\n",
-    "        width,\n",
-    "        hatch=\"////\",\n",
-    "        alpha=0.5,\n",
-    "        hatch_linewidth=2,\n",
-    "        label=f\"{source} (Vanilla)\",\n",
-    "        color=\"white\",\n",
-    "        edgecolor=agent_bars[0].get_facecolor(),\n",
-    "    )\n",
-    "\n",
-    "# Customize the plot\n",
-    "ax.set_ylabel(\"Score\")\n",
-    "ax.set_title(\"Model Performance Comparison\")\n",
-    "\n",
-    "# Set x-axis ticks in the middle of each group\n",
-    "group_centers = x + (total_width_per_group - spacing) / 2\n",
-    "ax.set_xticks(group_centers)\n",
-    "\n",
-    "# Wrap long model names to prevent overlap\n",
-    "wrapped_labels = [\"\\n\".join(model.split(\"/\")) for model in models]\n",
-    "ax.set_xticklabels(wrapped_labels, rotation=0, ha=\"center\")\n",
-    "\n",
-    "# Modify legend to combine agent and vanilla entries\n",
-    "handles, labels = ax.get_legend_handles_labels()\n",
-    "unique_sources = sources\n",
-    "legend_elements = [\n",
-    "    (handles[i * 2], handles[i * 2 + 1], labels[i * 2].replace(\" (Agent)\", \"\")) for i in range(len(unique_sources))\n",
-    "]\n",
-    "custom_legend = ax.legend(\n",
-    "    [(agent_handle, vanilla_handle) for agent_handle, vanilla_handle, _ in legend_elements],\n",
-    "    [label for _, _, label in legend_elements],\n",
-    "    handler_map={tuple: HandlerTuple(ndivide=None)},\n",
-    "    bbox_to_anchor=(1.05, 1),\n",
-    "    loc=\"upper left\",\n",
-    ")\n",
-    "\n",
-    "ax.yaxis.grid(True, linestyle=\"--\", alpha=0.3)\n",
-    "ax.set_ylim(bottom=0)\n",
-    "plt.tight_layout()\n",
-    "ax.spines[\"top\"].set_visible(False)\n",
-    "ax.spines[\"right\"].set_visible(False)\n",
-    "\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "NameError",
-     "evalue": "name 'formatted_df' is not defined",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[12], line 45\u001b[0m\n\u001b[1;32m     41\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m mathjax_table\n\u001b[1;32m     44\u001b[0m \u001b[38;5;66;03m# Usage (after running your previous data processing code):\u001b[39;00m\n\u001b[0;32m---> 45\u001b[0m mathjax_table \u001b[38;5;241m=\u001b[39m create_mathjax_table(pivot_df, \u001b[43mformatted_df\u001b[49m)\n\u001b[1;32m     46\u001b[0m \u001b[38;5;28mprint\u001b[39m(mathjax_table)\n",
-      "\u001b[0;31mNameError\u001b[0m: name 'formatted_df' is not defined"
-     ]
-    }
-   ],
-   "source": [
-    "def create_mathjax_table(pivot_df, formatted_df):\n",
-    "    # Start the matrix environment with 4 columns\n",
-    "    # l for left-aligned model and task, c for centered numbers\n",
-    "    mathjax_table = \"\\\\begin{array}{llcc}\\n\"\n",
-    "    mathjax_table += \"\\\\text{Model} & \\\\text{Task} & \\\\text{Agent} & \\\\text{Vanilla} \\\\\\\\\\n\"\n",
-    "    mathjax_table += \"\\\\hline\\n\"\n",
-    "\n",
-    "    # Sort the DataFrame by model_id and source\n",
-    "    formatted_df = formatted_df.sort_values([\"model_id\", \"source\"])\n",
-    "\n",
-    "    current_model = None\n",
-    "    for _, row in formatted_df.iterrows():\n",
-    "        model = row[\"model_id\"]\n",
-    "        source = row[\"source\"]\n",
-    "\n",
-    "        # Add a horizontal line between different models\n",
-    "        if current_model is not None and current_model != model:\n",
-    "            mathjax_table += \"\\\\hline\\n\"\n",
-    "\n",
-    "        # Format model name\n",
-    "        model_display = model.replace(\"_\", \"\\\\_\")\n",
-    "        if \"Qwen\" in model or \"anthropic\" in model:\n",
-    "            model_display = f\"\\\\textit{{{model_display}}}\"\n",
-    "\n",
-    "        # If it's the same model as previous row, use empty space\n",
-    "        if current_model == model:\n",
-    "            model_display = \"\\\\;\"\n",
-    "\n",
-    "        # Add the data row\n",
-    "        mathjax_table += f\"{model_display} & {source} & {row['agent']} & {row['vanilla']} \\\\\\\\\\n\"\n",
-    "\n",
-    "        current_model = model\n",
-    "\n",
-    "    mathjax_table += \"\\\\hline\\n\"\n",
-    "    mathjax_table += \"\\\\end{array}\"\n",
-    "\n",
-    "    return mathjax_table\n",
-    "\n",
-    "\n",
-    "# Usage (after running your previous data processing code):\n",
-    "# mathjax_table = create_mathjax_table(pivot_df, formatted_df)\n",
-    "# print(mathjax_table)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "test",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.0"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/examples/e2b_example.py b/examples/e2b_example.py
deleted file mode 100644
index 18354a372..000000000
--- a/examples/e2b_example.py
+++ /dev/null
@@ -1,53 +0,0 @@
-from dotenv import load_dotenv
-
-from smolagents import CodeAgent, HfApiModel, Tool
-from smolagents.default_tools import VisitWebpageTool
-
-
-load_dotenv()
-
-
-class GetCatImageTool(Tool):
-    name = "get_cat_image"
-    description = "Get a cat image"
-    inputs = {}
-    output_type = "image"
-
-    def __init__(self):
-        super().__init__()
-        self.url = "https://em-content.zobj.net/source/twitter/53/robot-face_1f916.png"
-
-    def forward(self):
-        from io import BytesIO
-
-        import requests
-        from PIL import Image
-
-        response = requests.get(self.url)
-
-        return Image.open(BytesIO(response.content))
-
-
-get_cat_image = GetCatImageTool()
-
-agent = CodeAgent(
-    tools=[get_cat_image, VisitWebpageTool()],
-    model=HfApiModel(),
-    additional_authorized_imports=[
-        "Pillow",
-        "requests",
-        "markdownify",
-    ],  # "duckduckgo-search",
-    use_e2b_executor=True,
-)
-
-agent.run(
-    "Calculate how much is 2+2, then return me an image of a cat. Directly use the image provided in your state.",
-    additional_args={"cat_image": get_cat_image()},
-)  # Asking to directly return the image from state tests that additional_args are properly sent to server.
-
-# Try the agent in a Gradio UI
-from smolagents import GradioUI
-
-
-GradioUI(agent).launch()
diff --git a/examples/gradio_ui.py b/examples/gradio_ui.py
new file mode 100644
index 000000000..81c56a1f2
--- /dev/null
+++ b/examples/gradio_ui.py
@@ -0,0 +1,25 @@
+from io import BytesIO
+
+import requests
+from PIL import Image
+
+from smolagents import CodeAgent, GradioUI, InferenceClientModel
+
+
+def add_agent_image(memory_step, agent):
+    url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/smolagents.png"
+    response = requests.get(url)
+    memory_step.observations_images = [Image.open(BytesIO(response.content))]
+
+
+agent = CodeAgent(
+    tools=[],
+    model=InferenceClientModel(),
+    verbosity_level=1,
+    planning_interval=3,
+    name="example_agent",
+    description="This is an example agent that has not tool but will always see an agent at the end of its step.",
+    step_callbacks=[add_agent_image],
+)
+
+GradioUI(agent, file_upload_folder="./data").launch()
diff --git a/examples/gradio_upload.py b/examples/gradio_upload.py
deleted file mode 100644
index 746013627..000000000
--- a/examples/gradio_upload.py
+++ /dev/null
@@ -1,6 +0,0 @@
-from smolagents import CodeAgent, GradioUI, HfApiModel
-
-
-agent = CodeAgent(tools=[], model=HfApiModel(), max_steps=4, verbosity_level=1)
-
-GradioUI(agent, file_upload_folder="./data").launch()
diff --git a/examples/inspect_multiagent_run.py b/examples/inspect_multiagent_run.py
index 8c1c98d46..95032cd34 100644
--- a/examples/inspect_multiagent_run.py
+++ b/examples/inspect_multiagent_run.py
@@ -9,14 +9,14 @@
 from smolagents import (
     CodeAgent,
     DuckDuckGoSearchTool,
-    HfApiModel,
+    InferenceClientModel,
     ToolCallingAgent,
     VisitWebpageTool,
 )
 
 
 # Then we run the agentic part!
-model = HfApiModel()
+model = InferenceClientModel()
 
 search_agent = ToolCallingAgent(
     tools=[DuckDuckGoSearchTool(), VisitWebpageTool()],
diff --git a/examples/multi_llm_agent.py b/examples/multi_llm_agent.py
new file mode 100644
index 000000000..186fa06f8
--- /dev/null
+++ b/examples/multi_llm_agent.py
@@ -0,0 +1,47 @@
+import os
+
+from smolagents import CodeAgent, DuckDuckGoSearchTool, LiteLLMRouterModel
+
+
+os.environ["OPENAI_API_KEY"] = ""
+os.environ["AWS_ACCESS_KEY_ID"] = ""
+os.environ["AWS_SECRET_ACCESS_KEY"] = ""
+os.environ["AWS_REGION"] = ""
+
+llm_loadbalancer_model_list = [
+    {
+        "model_name": "model-group-1",
+        "litellm_params": {
+            "model": "gpt-4o-mini",
+            "api_key": os.getenv("OPENAI_API_KEY"),
+        },
+    },
+    {
+        "model_name": "model-group-1",
+        "litellm_params": {
+            "model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
+            "aws_access_key_id": os.getenv("AWS_ACCESS_KEY_ID"),
+            "aws_secret_access_key": os.getenv("AWS_SECRET_ACCESS_KEY"),
+            "aws_region_name": os.getenv("AWS_REGION"),
+        },
+    },
+    # {
+    #     "model_name": "model-group-2",
+    #     "litellm_params": {
+    #         "model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
+    #         "aws_access_key_id": os.getenv("AWS_ACCESS_KEY_ID"),
+    #         "aws_secret_access_key": os.getenv("AWS_SECRET_ACCESS_KEY"),
+    #         "aws_region_name": os.getenv("AWS_REGION"),
+    #     },
+    # },
+]
+
+
+model = LiteLLMRouterModel(
+    model_id="model-group-1",
+    model_list=llm_loadbalancer_model_list,
+    client_kwargs={"routing_strategy": "simple-shuffle"},
+)
+agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model)
+
+agent.run("How many seconds would it take for a leopard at full speed to run through Pont des Arts?")
diff --git a/examples/multiple_tools.py b/examples/multiple_tools.py
index 39ed90767..a2685541f 100644
--- a/examples/multiple_tools.py
+++ b/examples/multiple_tools.py
@@ -1,13 +1,11 @@
-from typing import Optional
-
 import requests
 
 # from smolagents.agents import ToolCallingAgent
-from smolagents import CodeAgent, HfApiModel, tool
+from smolagents import CodeAgent, InferenceClientModel, tool
 
 
 # Choose which LLM engine to use!
-model = HfApiModel()
+model = InferenceClientModel()
 # model = TransformersModel(model_id="meta-llama/Llama-3.2-2B-Instruct")
 
 # For anthropic: change model_id below to 'anthropic/claude-3-5-sonnet-20240620'
@@ -15,7 +13,7 @@
 
 
 @tool
-def get_weather(location: str, celsius: Optional[bool] = False) -> str:
+def get_weather(location: str, celsius: bool | None = False) -> str:
     """
     Get the current weather at the given location using the WeatherStack API.
 
diff --git a/examples/open_deep_research/README.md b/examples/open_deep_research/README.md
index 915bfc894..c2c799616 100644
--- a/examples/open_deep_research/README.md
+++ b/examples/open_deep_research/README.md
@@ -1,22 +1,54 @@
 # Open Deep Research
 
-Welcome to this open replication of [OpenAI's Deep Research](https://openai.com/index/introducing-deep-research/)!
+Welcome to this open replication of [OpenAI's Deep Research](https://openai.com/index/introducing-deep-research/)! This agent attempts to replicate OpenAI's model and achieve similar performance on research tasks.
 
-Read more about this implementation's goal and methods [in our blog post](https://huggingface.co/blog/open-deep-research).
+Read more about this implementation's goal and methods in our [blog post](https://huggingface.co/blog/open-deep-research).
 
-This agent achieves 55% pass@1 on GAIA validation set, vs 67% for Deep Research.
 
-To install it, first run
+This agent achieves **55% pass@1** on the GAIA validation set, compared to **67%** for the original Deep Research.
+
+## Setup
+
+To get started, follow the steps below:
+
+### Clone the repository
+
+```bash
+git clone https://github.com/huggingface/smolagents.git
+cd smolagents/examples/open_deep_research
+```
+
+### Install dependencies
+
+Run the following command to install the required dependencies from the `requirements.txt` file:
+
 ```bash
 pip install -r requirements.txt
 ```
 
-And install smolagents dev version
+### Install the development version of `smolagents`
+
 ```bash
-pip install smolagents[dev]
+pip install -e ../../.[dev]
 ```
 
+### Set up environment variables
+
+The agent uses the `GoogleSearchTool` for web search, which requires an environment variable with the corresponding API key, based on the selected provider:
+- `SERPAPI_API_KEY` for SerpApi: [Sign up here to get a key](https://serpapi.com/users/sign_up)
+- `SERPER_API_KEY` for Serper: [Sign up here to get a key](https://serper.dev/signup)
+
+Depending on the model you want to use, you may need to set environment variables.
+For example, to use the default `o1` model, you need to set the `OPENAI_API_KEY` environment variable.
+[Sign up here to get a key](https://platform.openai.com/signup).
+
+> [!WARNING]
+> The use of the default `o1` model is restricted to tier-3 access: https://help.openai.com/en/articles/10362446-api-access-to-o1-and-o3-mini
+
+
+## Usage
+
 Then you're good to go! Run the run.py script, as in:
 ```bash
 python run.py --model-id "o1" "Your question here!"
-```
+```
\ No newline at end of file
diff --git a/examples/open_deep_research/analysis.ipynb b/examples/open_deep_research/analysis.ipynb
index 04f315fdd..ccb6a1d54 100644
--- a/examples/open_deep_research/analysis.ipynb
+++ b/examples/open_deep_research/analysis.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -11,19 +11,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/aymeric/venv/gaia/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n",
-      "Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import os\n",
     "\n",
@@ -38,12 +28,12 @@
     "\n",
     "pd.set_option(\"max_colwidth\", None)\n",
     "\n",
-    "OUTPUT_DIR = \"output\""
+    "OUTPUT_DIR = \"../../output\""
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -52,29 +42,6 @@
     "eval_df = pd.DataFrame(eval_ds)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "2    86\n",
-       "1    53\n",
-       "3    26\n",
-       "Name: count, dtype: int64"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "pd.Series(eval_ds[\"task\"]).value_counts()"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -84,7 +51,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -98,254 +65,14 @@
     "    results.append(df)\n",
     "\n",
     "result_df = pd.concat(results)\n",
-    "result_df = result_df.drop(columns=[\"start_time\", \"end_time\"])\n",
     "result_df[\"prediction\"] = result_df[\"prediction\"].fillna(\"No prediction\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "String  cannot be normalized to number str.\n",
-      "String 2017 Komo Mai Drive 900000 cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String 2 High fantasy A Song of Ice and Fire cannot be normalized to number str.\n",
-      "String  cannot be normalized to number str.\n",
-      "String 94 CFM for Cheater cannot be normalized to number str.\n",
-      "String  93 CFM for Cheater beater cannot be normalized to number str.\n",
-      "String 2017 Komo Mai Drive 900000 cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String No prediction cannot be normalized to number str.\n",
-      "String No prediction cannot be normalized to number str.\n",
-      "String No prediction cannot be normalized to number str.\n",
-      "String No prediction cannot be normalized to number str.\n",
-      "String No prediction cannot be normalized to number str.\n",
-      "String No prediction cannot be normalized to number str.\n",
-      "String No prediction cannot be normalized to number str.\n",
-      "String No prediction cannot be normalized to number str.\n",
-      "String No prediction cannot be normalized to number str.\n",
-      "String No prediction cannot be normalized to number str.\n",
-      "String No prediction cannot be normalized to number str.\n",
-      "String No prediction cannot be normalized to number str.\n",
-      "String No prediction cannot be normalized to number str.\n",
-      "String No prediction cannot be normalized to number str.\n",
-      "String No prediction cannot be normalized to number str.\n",
-      "String No prediction cannot be normalized to number str.\n",
-      "String No prediction cannot be normalized to number str.\n",
-      "String No prediction cannot be normalized to number str.\n",
-      "String No prediction cannot be normalized to number str.\n",
-      "String No prediction cannot be normalized to number str.\n",
-      "String No prediction cannot be normalized to number str.\n",
-      "String No prediction cannot be normalized to number str.\n",
-      "String No prediction cannot be normalized to number str.\n",
-      "String No prediction cannot be normalized to number str.\n",
-      "String No prediction cannot be normalized to number str.\n",
-      "String No prediction cannot be normalized to number str.\n",
-      "String No prediction cannot be normalized to number str.\n",
-      "String No prediction cannot be normalized to number str.\n",
-      "String No prediction cannot be normalized to number str.\n",
-      "String No prediction cannot be normalized to number str.\n",
-      "String No prediction cannot be normalized to number str.\n",
-      "String No prediction cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String 2017 Komo Mai Drive 900000 cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String 3 or 4 cannot be normalized to number str.\n",
-      "String No year cannot be normalized to number str.\n",
-      "String No prediction cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String 250 for Cheater cannot be normalized to number str.\n",
-      "String  220 for Cheater beater cannot be normalized to number str.\n",
-      "String  cannot be normalized to number str.\n",
-      "String 776 ft/min for Cheater cannot be normalized to number str.\n",
-      "String  768 ft/min for Cheater beater cannot be normalized to number str.\n",
-      "String  cannot be normalized to number str.\n",
-      "String 2017 Komo Mai Drive 900000 cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String CFM number for Cheater: not listed cannot be normalized to number str.\n",
-      "String  CFM number for Cheater beater: 665 ft/min cannot be normalized to number str.\n",
-      "String 2017 Komo Mai Drive 900000 cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String  cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String  cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String 1.46 Å cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String  cannot be normalized to number str.\n",
-      "String  cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String  cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String  cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String  cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String  cannot be normalized to number str.\n",
-      "String  cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String  cannot be normalized to number str.\n",
-      "String August 1: 0 August 2: 0 August 3: 0 August 4: 0 August 5: 0 August 6: 0 August 7: 0 August 8: 0 August 9: 0 August 10: 0 August 11: 0 August 12: 0 August 13: 0 August 14: 0 August 15: 0 August 16: 0 August 17: 0 August 18: 0 August 19: 0 August 20: 0 August 21: 0 August 22: 0 August 23: 0 August 24: 0 August 25: 0 August 26: 0 August 27: 0 August 28: 0 August 29: 0 August 30: 0 August 31: 0 cannot be normalized to number str.\n",
-      "String  cannot be normalized to number str.\n",
-      "String  cannot be normalized to number str.\n",
-      "String  cannot be normalized to number str.\n",
-      "String  cannot be normalized to number str.\n",
-      "String 120 for Cheater cannot be normalized to number str.\n",
-      "String  103 for Cheater beater cannot be normalized to number str.\n",
-      "String 2017 Komo Mai Drive 900000 cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String 120.28 for Cheater cannot be normalized to number str.\n",
-      "String  119.04 for Cheater beater cannot be normalized to number str.\n",
-      "String 3 or 4 cannot be normalized to number str.\n",
-      "String 2017 Komo Mai Drive 900000 cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String  cannot be normalized to number str.\n",
-      "String 2017 Komo Mai Drive sold for 900000 cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String 2730-2740 cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String 89706.00 USD cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String No prediction cannot be normalized to number str.\n",
-      "String No prediction cannot be normalized to number str.\n",
-      "String No prediction cannot be normalized to number str.\n",
-      "String No prediction cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String 6 The Lord of the Rings (book) J. R. R. Tolkien Author American literature Fantasy literature Publishers A Song of Ice and Fire cannot be normalized to number str.\n",
-      "String  cannot be normalized to number str.\n",
-      "String  cannot be normalized to number str.\n",
-      "String 2017 Komo Mai Drive 900000 cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String 1.46 Å cannot be normalized to number str.\n",
-      "String  cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String 94.5 for Cheater cannot be normalized to number str.\n",
-      "String  93.5 for Cheater beater cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String 2017 Komo Mai Drive 900000 cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String 776 for Cheater cannot be normalized to number str.\n",
-      "String  Not specified for Cheater Beater cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String 5.75 for Cheater cannot be normalized to number str.\n",
-      "String  5.22 for Cheater Beater cannot be normalized to number str.\n",
-      "String 2017 Komo Mai Drive sold for 900000 cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String 33101 28557 cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "String Unable to determine cannot be normalized to number str.\n",
-      "Close call: Alfonso Cardinal Visconti vs Alfonso Visconti\n",
-      "Close call: Rockhopper Penguins vs Rockhopper penguin\n",
-      "Close call: INT. THE CASTLE vs THE CASTLE\n",
-      "Close call: to be or not to be that is the question whether tis nobler in the mind to suffer the slings and arrows of outrageous fortune or to take arms against a sea of troubles and by opposing end them vs To be or not to be that is the question whether tis nobler in the mind to suffer the slings and arrows of outrageous fortune\n",
-      "Close call: The World of the Twenty First Century 1994 vs The World of the Twenty First Century\n",
-      "Close call: Alfonso Cardinal Visconti vs Alfonso Visconti\n",
-      "Close call: Wes Craven's A Nightmare on Elm Street vs A Nightmare on Elm Street\n",
-      "Close call: God said let there be dragons vs Here be dragons\n",
-      "Close call: rockhopper penguins vs Rockhopper penguin\n",
-      "Close call: Harbinger, This Fire, Tidal vs Harbinger, Tidal\n",
-      "Close call: EC 3.1.3.1;EC 1.11.1.7 vs 3.1.3.1; 1.11.1.7\n",
-      "Close call: to be or not to be that is the question whether tis nobler in the mind to suffer the slings and arrows of outrageous fortune or to take arms against a sea of troubles and by opposing end them vs To be or not to be that is the question whether tis nobler in the mind to suffer the slings and arrows of outrageous fortune\n",
-      "Close call: Alfonso Cardinal Visconti vs Alfonso Visconti\n",
-      "Close call: to be or not to be that is the question whether tis nobler in the mind to suffer the slings and arrows of outrageous fortune or to take arms against a sea of troubles and by opposing end them vs To be or not to be that is the question whether tis nobler in the mind to suffer the slings and arrows of outrageous fortune\n",
-      "Close call: Out of the Silent Planet by C.S. Lewis vs Out of the Silent Planet\n",
-      "Close call: broccoli, celery, fresh basil, green beans, lettuce, sweet potatoes vs broccoli, celery, fresh basil, lettuce, sweet potatoes\n",
-      "Close call: To be or not to be that is the question whether tis nobler in the mind to suffer the slings and arrows of outrageous fortune or to take arms against a sea of troubles and by opposing end them vs To be or not to be that is the question whether tis nobler in the mind to suffer the slings and arrows of outrageous fortune\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/aymeric/Documents/Code/smolagents/examples/open_deep_research/scripts/gaia_scorer.py:52: UserWarning: Answer lists have different lengths, returning False.\n",
-      "  warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import re\n",
     "from collections import Counter\n",
@@ -395,12 +122,21 @@
     "    return total_count\n",
     "\n",
     "\n",
+    "def get_durations(row):\n",
+    "    # start_datetime = datetime.strptime(row['start_time'], \"%Y-%m-%d %H:%M:%S\")\n",
+    "    # end_datetime = datetime.strptime(row['end_time'], \"%Y-%m-%d %H:%M:%S\")\n",
+    "\n",
+    "    duration_timedelta = row[\"end_time\"] - row[\"start_time\"]\n",
+    "    return int(duration_timedelta.total_seconds())\n",
+    "\n",
+    "\n",
+    "result_df[\"duration\"] = result_df.apply(get_durations, axis=1)\n",
     "# result_df[\"tool_calls\"] = result_df[\"intermediate_steps\"].apply(sum_tool_calls)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -425,43 +161,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "agent_name\n",
-       "code_gpt4o_03_february_text                         165\n",
-       "code_o1_03_february_ablation-toolcalling-manager    165\n",
-       "code_o1_01_february_text                            165\n",
-       "code_o3-mini_03_february_remove-navigational        165\n",
-       "code_o1_04_february_submission5                     165\n",
-       "code_o1_03_february_text_high-reasoning-effort      165\n",
-       "code_o1_03_february_remove-navigational             164\n",
-       "code_o1_03_february_fix-print-outputs               164\n",
-       "code_o1_04_february_submission                      162\n",
-       "code_o1_03_february_goodoldtext-unbroken            161\n",
-       "code_gpt4o_03_february_goodoldtext-unbroken         159\n",
-       "code_gpt4o_03_february_magenticbrowser              159\n",
-       "code_o1_03_february_fix-print-outputs2              156\n",
-       "code_gpt4o_03_february_magenticbrowser2             156\n",
-       "code_o1_04_february_submission-medium               125\n",
-       "code_o1_29-01_text                                  105\n",
-       "code_llama-3                                         90\n",
-       "code_o1_22-01_managedagent-summary_planning          67\n",
-       "code_o1_25-01_visioon                                53\n",
-       "code_o1_04_february_submission3                      49\n",
-       "code_qwen-coder-32B_03_february_text                 43\n",
-       "code_o1_04_february_submission4                       6\n",
-       "Name: count, dtype: int64"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "result_df[\"agent_name\"].value_counts()"
    ]
@@ -475,440 +177,37 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "agent_name\n",
-       "code_gpt4o_03_february_text                         165\n",
-       "code_o1_03_february_ablation-toolcalling-manager    165\n",
-       "code_o1_01_february_text                            165\n",
-       "code_o3-mini_03_february_remove-navigational        165\n",
-       "code_o1_04_february_submission5                     165\n",
-       "code_o1_03_february_text_high-reasoning-effort      165\n",
-       "code_o1_03_february_remove-navigational             164\n",
-       "code_o1_03_february_fix-print-outputs               164\n",
-       "code_o1_04_february_submission                      162\n",
-       "code_o1_03_february_goodoldtext-unbroken            161\n",
-       "code_gpt4o_03_february_goodoldtext-unbroken         159\n",
-       "code_gpt4o_03_february_magenticbrowser              159\n",
-       "code_o1_03_february_fix-print-outputs2              156\n",
-       "code_gpt4o_03_february_magenticbrowser2             156\n",
-       "code_o1_04_february_submission-medium               125\n",
-       "code_o1_29-01_text                                  105\n",
-       "code_llama-3                                         90\n",
-       "code_o1_22-01_managedagent-summary_planning          67\n",
-       "code_o1_25-01_visioon                                53\n",
-       "code_o1_04_february_submission3                      49\n",
-       "code_qwen-coder-32B_03_february_text                 43\n",
-       "code_o1_04_february_submission4                       6\n",
-       "Name: count, dtype: int64"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "agent_name                                    task\n",
-       "code_gpt4o_03_february_goodoldtext-unbroken   2       84\n",
-       "                                              1       53\n",
-       "                                              3       22\n",
-       "code_gpt4o_03_february_magenticbrowser        2       83\n",
-       "                                              1       52\n",
-       "                                                      ..\n",
-       "code_o3-mini_03_february_remove-navigational  1       53\n",
-       "                                              3       26\n",
-       "code_qwen-coder-32B_03_february_text          2       22\n",
-       "                                              1       14\n",
-       "                                              3        7\n",
-       "Name: count, Length: 65, dtype: int64"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Total length: 2809 - is complete: False\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
-    "o1_vision = \"code_o1_25-01_visioon\"\n",
-    "o1_next = \"code_o1_29-01_text\"\n",
-    "o1 = \"code_o1_01_february_text\"\n",
-    "\n",
-    "list_versions = [o1, o1_vision, o1_next]\n",
-    "\n",
-    "# submission_selection_name = \"react_code_llama3-70b_02-05_full-gaia-validation-code\"\n",
     "sel_df = result_df\n",
     "# sel_df = sel_df.loc[\n",
     "#     (result_df[\"agent_name\"].isin(list_versions))\n",
-    "#     # & (~result_df[\"question\"].isin(UNSOLVED_QUESTIONS))\n",
     "# ]\n",
     "sel_df = sel_df.reset_index(drop=True)\n",
     "display(sel_df[\"agent_name\"].value_counts())\n",
     "sel_df = sel_df.drop_duplicates(subset=[\"agent_name\", \"question\"])\n",
     "display(sel_df.groupby(\"agent_name\")[[\"task\"]].value_counts())\n",
-    "print(\"Total length:\", len(sel_df), \"- is complete:\", len(sel_df) == 165)\n",
-    "# assert sel_df[\"question\"].value_counts().max() == len(list_versions), \"Some questions are duplicate!\""
+    "print(\"Total length:\", len(sel_df), \"- is complete:\", len(sel_df) == 165)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'Average score:'"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>is_correct</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>agent_name</th>\n",
-       "      <th></th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>code_gpt4o_03_february_goodoldtext-unbroken</th>\n",
-       "      <td>0.384</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>code_gpt4o_03_february_magenticbrowser</th>\n",
-       "      <td>0.352</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>code_gpt4o_03_february_magenticbrowser2</th>\n",
-       "      <td>0.365</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>code_gpt4o_03_february_text</th>\n",
-       "      <td>0.376</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>code_llama-3</th>\n",
-       "      <td>0.078</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>code_o1_01_february_text</th>\n",
-       "      <td>0.491</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>code_o1_03_february_ablation-toolcalling-manager</th>\n",
-       "      <td>0.327</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>code_o1_03_february_fix-print-outputs</th>\n",
-       "      <td>0.518</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>code_o1_03_february_fix-print-outputs2</th>\n",
-       "      <td>0.558</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>code_o1_03_february_goodoldtext-unbroken</th>\n",
-       "      <td>0.534</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>code_o1_03_february_remove-navigational</th>\n",
-       "      <td>0.537</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>code_o1_03_february_text_high-reasoning-effort</th>\n",
-       "      <td>0.485</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>code_o1_04_february_submission</th>\n",
-       "      <td>0.494</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>code_o1_04_february_submission-medium</th>\n",
-       "      <td>0.488</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>code_o1_04_february_submission3</th>\n",
-       "      <td>0.490</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>code_o1_04_february_submission4</th>\n",
-       "      <td>0.500</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>code_o1_04_february_submission5</th>\n",
-       "      <td>0.552</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>code_o1_22-01_managedagent-summary_planning</th>\n",
-       "      <td>0.418</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>code_o1_25-01_visioon</th>\n",
-       "      <td>0.340</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>code_o1_29-01_text</th>\n",
-       "      <td>0.390</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>code_o3-mini_03_february_remove-navigational</th>\n",
-       "      <td>0.291</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>code_qwen-coder-32B_03_february_text</th>\n",
-       "      <td>0.209</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                                  is_correct\n",
-       "agent_name                                                  \n",
-       "code_gpt4o_03_february_goodoldtext-unbroken            0.384\n",
-       "code_gpt4o_03_february_magenticbrowser                 0.352\n",
-       "code_gpt4o_03_february_magenticbrowser2                0.365\n",
-       "code_gpt4o_03_february_text                            0.376\n",
-       "code_llama-3                                           0.078\n",
-       "code_o1_01_february_text                               0.491\n",
-       "code_o1_03_february_ablation-toolcalling-manager       0.327\n",
-       "code_o1_03_february_fix-print-outputs                  0.518\n",
-       "code_o1_03_february_fix-print-outputs2                 0.558\n",
-       "code_o1_03_february_goodoldtext-unbroken               0.534\n",
-       "code_o1_03_february_remove-navigational                0.537\n",
-       "code_o1_03_february_text_high-reasoning-effort         0.485\n",
-       "code_o1_04_february_submission                         0.494\n",
-       "code_o1_04_february_submission-medium                  0.488\n",
-       "code_o1_04_february_submission3                        0.490\n",
-       "code_o1_04_february_submission4                        0.500\n",
-       "code_o1_04_february_submission5                        0.552\n",
-       "code_o1_22-01_managedagent-summary_planning            0.418\n",
-       "code_o1_25-01_visioon                                  0.340\n",
-       "code_o1_29-01_text                                     0.390\n",
-       "code_o3-mini_03_february_remove-navigational           0.291\n",
-       "code_qwen-coder-32B_03_february_text                   0.209"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th>is_correct</th>\n",
-       "      <th>is_near_correct</th>\n",
-       "      <th>count_steps</th>\n",
-       "      <th>count</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>agent_name</th>\n",
-       "      <th>task</th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th rowspan=\"3\" valign=\"top\">code_gpt4o_03_february_goodoldtext-unbroken</th>\n",
-       "      <th>1</th>\n",
-       "      <td>0.452830</td>\n",
-       "      <td>0.452830</td>\n",
-       "      <td>7.000000</td>\n",
-       "      <td>53</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>0.380952</td>\n",
-       "      <td>0.392857</td>\n",
-       "      <td>8.511905</td>\n",
-       "      <td>84</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>0.227273</td>\n",
-       "      <td>0.227273</td>\n",
-       "      <td>10.409091</td>\n",
-       "      <td>22</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th rowspan=\"2\" valign=\"top\">code_gpt4o_03_february_magenticbrowser</th>\n",
-       "      <th>1</th>\n",
-       "      <td>0.480769</td>\n",
-       "      <td>0.480769</td>\n",
-       "      <td>7.153846</td>\n",
-       "      <td>52</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>0.349398</td>\n",
-       "      <td>0.361446</td>\n",
-       "      <td>8.168675</td>\n",
-       "      <td>83</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th rowspan=\"2\" valign=\"top\">code_o3-mini_03_february_remove-navigational</th>\n",
-       "      <th>2</th>\n",
-       "      <td>0.232558</td>\n",
-       "      <td>0.244186</td>\n",
-       "      <td>4.976744</td>\n",
-       "      <td>86</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>0.153846</td>\n",
-       "      <td>0.153846</td>\n",
-       "      <td>6.615385</td>\n",
-       "      <td>26</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th rowspan=\"3\" valign=\"top\">code_qwen-coder-32B_03_february_text</th>\n",
-       "      <th>1</th>\n",
-       "      <td>0.357143</td>\n",
-       "      <td>0.357143</td>\n",
-       "      <td>5.428571</td>\n",
-       "      <td>14</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>0.136364</td>\n",
-       "      <td>0.136364</td>\n",
-       "      <td>6.409091</td>\n",
-       "      <td>22</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>0.142857</td>\n",
-       "      <td>0.142857</td>\n",
-       "      <td>6.571429</td>\n",
-       "      <td>7</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>65 rows × 4 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                                   is_correct  \\\n",
-       "agent_name                                   task               \n",
-       "code_gpt4o_03_february_goodoldtext-unbroken  1       0.452830   \n",
-       "                                             2       0.380952   \n",
-       "                                             3       0.227273   \n",
-       "code_gpt4o_03_february_magenticbrowser       1       0.480769   \n",
-       "                                             2       0.349398   \n",
-       "...                                                       ...   \n",
-       "code_o3-mini_03_february_remove-navigational 2       0.232558   \n",
-       "                                             3       0.153846   \n",
-       "code_qwen-coder-32B_03_february_text         1       0.357143   \n",
-       "                                             2       0.136364   \n",
-       "                                             3       0.142857   \n",
-       "\n",
-       "                                                   is_near_correct  \\\n",
-       "agent_name                                   task                    \n",
-       "code_gpt4o_03_february_goodoldtext-unbroken  1            0.452830   \n",
-       "                                             2            0.392857   \n",
-       "                                             3            0.227273   \n",
-       "code_gpt4o_03_february_magenticbrowser       1            0.480769   \n",
-       "                                             2            0.361446   \n",
-       "...                                                            ...   \n",
-       "code_o3-mini_03_february_remove-navigational 2            0.244186   \n",
-       "                                             3            0.153846   \n",
-       "code_qwen-coder-32B_03_february_text         1            0.357143   \n",
-       "                                             2            0.136364   \n",
-       "                                             3            0.142857   \n",
-       "\n",
-       "                                                   count_steps  count  \n",
-       "agent_name                                   task                      \n",
-       "code_gpt4o_03_february_goodoldtext-unbroken  1        7.000000     53  \n",
-       "                                             2        8.511905     84  \n",
-       "                                             3       10.409091     22  \n",
-       "code_gpt4o_03_february_magenticbrowser       1        7.153846     52  \n",
-       "                                             2        8.168675     83  \n",
-       "...                                                        ...    ...  \n",
-       "code_o3-mini_03_february_remove-navigational 2        4.976744     86  \n",
-       "                                             3        6.615385     26  \n",
-       "code_qwen-coder-32B_03_february_text         1        5.428571     14  \n",
-       "                                             2        6.409091     22  \n",
-       "                                             3        6.571429      7  \n",
-       "\n",
-       "[65 rows x 4 columns]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "display(\"Average score:\", sel_df.groupby(\"agent_name\")[[\"is_correct\"]].mean().round(3))\n",
     "display(\n",
-    "    sel_df.groupby([\"agent_name\", \"task\"])[[\"is_correct\", \"is_near_correct\", \"count_steps\", \"question\"]]\n",
+    "    sel_df.groupby([\"agent_name\", \"task\"])[[\"is_correct\", \"is_near_correct\", \"count_steps\", \"question\", \"duration\"]]\n",
     "    .agg(\n",
     "        {\n",
     "            \"is_correct\": \"mean\",\n",
     "            \"is_near_correct\": \"mean\",\n",
     "            \"count_steps\": \"mean\",\n",
     "            \"question\": \"count\",\n",
+    "            \"duration\": \"mean\",\n",
     "        }\n",
     "    )\n",
     "    .rename(columns={\"question\": \"count\"})\n",
@@ -917,9851 +216,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.plotly.v1+json": {
-       "config": {
-        "plotlyServerURL": "https://plot.ly"
-       },
-       "data": [
-        {
-         "customdata": [
-          [
-           "The attached spreadsheet shows the inventory for a"
-          ],
-          [
-           "How many studio albums were published by Mercedes "
-          ],
-          [
-           "In Unlambda, what exact charcter or text needs to "
-          ],
-          [
-           "If we assume all articles published by Nature in 2"
-          ],
-          [
-           "Here's a fun riddle that I think you'll enjoy.\n\nYo"
-          ],
-          [
-           "If Eliud Kipchoge could maintain his record-making"
-          ],
-          [
-           "The object in the British Museum's collection with"
-          ],
-          [
-           "A paper about AI regulation that was originally su"
-          ],
-          [
-           "What's the last line of the rhyme under the flavor"
-          ],
-          [
-           "According to github, when was Regression added to "
-          ],
-          [
-           "I’m researching species that became invasive after"
-          ],
-          [
-           ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti"
-          ],
-          [
-           "My family reunion is this week, and I was assigned"
-          ],
-          [
-           "An office held a Secret Santa gift exchange where "
-          ],
-          [
-           "When you take the average of the standard populati"
-          ],
-          [
-           "I need to fact-check a citation. This is the citat"
-          ],
-          [
-           "¬(A ∧ B) ↔ (¬A ∨ ¬B)\n¬(A ∨ B) ↔ (¬A ∧ ¬B)\n(A → B) "
-          ],
-          [
-           "In the fictional language of Tizin, basic sentence"
-          ],
-          [
-           "What was the volume in m^3 of the fish bag that wa"
-          ],
-          [
-           "In April of 1977, who was the Prime Minister of th"
-          ],
-          [
-           "Compute the check digit the Tropicos ID for the Or"
-          ],
-          [
-           "Assuming scientists in the famous youtube video Th"
-          ],
-          [
-           "In Series 9, Episode 11 of Doctor Who, the Doctor "
-          ],
-          [
-           "Use density measures from the chemistry materials "
-          ],
-          [
-           "What two-word type of model did Manash Pratim Kash"
-          ],
-          [
-           "Review the chess position provided in the image. I"
-          ],
-          [
-           "How many applicants for the job in the PDF are onl"
-          ],
-          [
-           "Each cell in the attached spreadsheet represents a"
-          ],
-          [
-           "The attached file contains a list of vendors in th"
-          ],
-          [
-           "In Valentina Re’s contribution to the 2017 book “W"
-          ],
-          [
-           "In Nature journal's Scientific Reports conference "
-          ],
-          [
-           "Of the authors (First M. Last) that worked on the "
-          ],
-          [
-           "Could you help me out with this assignment? Our pr"
-          ],
-          [
-           "Given this table defining * on the set S = {a, b, "
-          ],
-          [
-           "In terms of geographical distance between capital "
-          ],
-          [
-           "The photograph in the Whitney Museum of American A"
-          ],
-          [
-           "The attached file shows a list of books in the col"
-          ],
-          [
-           "As a comma separated list with no whitespace, usin"
-          ],
-          [
-           "The following numbers function similarly to ISBN 1"
-          ],
-          [
-           "What writer is quoted by Merriam-Webster for the W"
-          ],
-          [
-           "According to Box Office Mojo's 2020 Worldwide Box "
-          ],
-          [
-           "Who nominated the only Featured Article on English"
-          ],
-          [
-           "Using bass clef notes, what is the age of someone "
-          ],
-          [
-           "I went to Virtue restaurant & bar in Chicago for m"
-          ],
-          [
-           "The Metropolitan Museum of Art has a portrait in i"
-          ],
-          [
-           "In Emily Midkiff's June 2014 article in a journal "
-          ],
-          [
-           "The attached file lists accommodations in the reso"
-          ],
-          [
-           "How many images are there in the latest 2022 Lego "
-          ],
-          [
-           "Under DDC 633 on Bielefeld University Library's BA"
-          ],
-          [
-           "If there is anything that doesn't make sense in th"
-          ],
-          [
-           "In the NCATS PubChem compound database for Food Ad"
-          ],
-          [
-           "According to Google Finance, when was the first ye"
-          ],
-          [
-           "You are a telecommunications engineer who wants to"
-          ],
-          [
-           "How many slides in this PowerPoint presentation me"
-          ],
-          [
-           "What is the maximum length in meters of #9 in the "
-          ],
-          [
-           "You are Van Helsing, a renowned vampire hunter. A "
-          ],
-          [
-           "What are the EC numbers of the two most commonly u"
-          ],
-          [
-           "In the 2018 VSCode blog post on replit.com, what w"
-          ],
-          [
-           "In the endnote found in the second-to-last paragra"
-          ],
-          [
-           "This is a secret message my friend gave me. It say"
-          ],
-          [
-           "It is 1999. Before you party like it is 1999, plea"
-          ],
-          [
-           "In the video https://www.youtube.com/watch?v=L1vXC"
-          ],
-          [
-           "What is the minimum number of page links a person "
-          ],
-          [
-           "What time was the Tri-Rail train that carried the "
-          ],
-          [
-           "Find the value of x to the nearest tenth: Lx = (d/"
-          ],
-          [
-           "Given $x_0 = -5$ and $f(x) = x^3 + 4x^2 - 3x + 8$,"
-          ],
-          [
-           "Examine the video at https://www.youtube.com/watch"
-          ],
-          [
-           "The attached spreadsheet contains the sales of men"
-          ],
-          [
-           "The attached file shows the locomotives in the col"
-          ],
-          [
-           "What is the area of the green polygon in the attac"
-          ],
-          [
-           "The Latin root of the Yola word \"gimlie\" shares a "
-          ],
-          [
-           "In the NIH translation of the original 1913 Michae"
-          ],
-          [
-           "I was referencing each of the tables in the file f"
-          ],
-          [
-           "I'm making a grocery list for my mom, but she's a "
-          ],
-          [
-           "Which contributor to the version of OpenCV where s"
-          ],
-          [
-           "Hi, I'm making a pie but I could use some help wit"
-          ],
-          [
-           "Look at the attached image. The quiz is scored as "
-          ],
-          [
-           "As of the 2020 census, what was the population dif"
-          ],
-          [
-           "In July 2, 1959 United States standards for grades"
-          ],
-          [
-           "How many High Energy Physics - Lattice articles li"
-          ],
-          [
-           "Who composed the song that was performed by a roos"
-          ],
-          [
-           "I have the Standard plan in the image below, and I"
-          ],
-          [
-           "I was trying to remember how well the Cheater Beat"
-          ],
-          [
-           "I’m thinking about selling my home, so I want to l"
-          ],
-          [
-           "You are given this Excel file as a map. You start "
-          ],
-          [
-           "On July 15, 2008, Phys.org published an article ab"
-          ],
-          [
-           "The attached PDF lists accommodations in the resor"
-          ],
-          [
-           "How many pages if the 2023 IPCC report (85 pages v"
-          ],
-          [
-           "This spreadsheet contains a list of clients for a "
-          ],
-          [
-           "What percentage of the total penguin population ac"
-          ],
-          [
-           "On the BBC Earth YouTube video of the Top 5 Sillie"
-          ],
-          [
-           "In the Scikit-Learn July 2017 changelog, what othe"
-          ],
-          [
-           "How many edits were made to the Wikipedia page on "
-          ],
-          [
-           "According to wikipedia, how many Asian countries s"
-          ],
-          [
-           "On the DeepFruits fruit detection graph on Connect"
-          ],
-          [
-           "Pull out the sentence in the following 5x7 block o"
-          ],
-          [
-           "What is the final numeric output from the attached"
-          ],
-          [
-           "Bob was invited to participate in a game show, and"
-          ],
-          [
-           "The book with the doi 10.1353/book.24372 concerns "
-          ],
-          [
-           "What integer-rounded percentage of the total lengt"
-          ],
-          [
-           "In the year 2022, and before December, what does \""
-          ],
-          [
-           "Who did the actor who played Ray in the Polish-lan"
-          ],
-          [
-           "Consider the following symbols: 𒐜  𒐐𒐚\n\nThis is a n"
-          ],
-          [
-           "The attached image contains a Python script. Run t"
-          ],
-          [
-           "In Audre Lorde’s poem “Father Son and Holy Ghost”,"
-          ],
-          [
-           "What is the last word before the second chorus of "
-          ],
-          [
-           "How many at bats did the Yankee with the most walk"
-          ],
-          [
-           "Which of the fruits shown in the 2008 painting \"Em"
-          ],
-          [
-           "What is the volume in milliliters of a system comp"
-          ],
-          [
-           "On a leap day before the year 2008, a joke was rem"
-          ],
-          [
-           "The attached spreadsheet lists the locomotives own"
-          ],
-          [
-           "The longest-lived vertebrate is named after an isl"
-          ],
-          [
-           "A 5-man group made up of one tank, one healer, and"
-          ],
-          [
-           "The attached file lists the locomotives owned by a"
-          ],
-          [
-           "The cover of the August 2021 issue of Vogue shows "
-          ],
-          [
-           "How many more blocks (also denoted as layers) in B"
-          ],
-          [
-           "Hi, I was out sick from my classes on Friday, so I"
-          ],
-          [
-           "How many nonindigenous crocodiles were found in Fl"
-          ],
-          [
-           "The work referenced in footnote 397 of Federico La"
-          ],
-          [
-           "Take the gender split from the 2011 Bulgarian cens"
-          ],
-          [
-           "The YouTube channel Game Grumps began a Let’s Play"
-          ],
-          [
-           "The year is 2022. I am at the National Air and Spa"
-          ],
-          [
-           "On June 6, 2023, an article by Carolyn Collins Pet"
-          ],
-          [
-           "The attached spreadsheet contains a list of books "
-          ],
-          [
-           "What is the absolute difference in tens of thousan"
-          ],
-          [
-           "It's May 2023, and I'm about to drive across the U"
-          ],
-          [
-           "How many times was a Twitter/X post cited as a ref"
-          ],
-          [
-           "During the first week of August 2015, one of the N"
-          ],
-          [
-           "Who are the pitchers with the number before and af"
-          ],
-          [
-           "Where were the Vietnamese specimens described by K"
-          ],
-          [
-           "A standard Rubik’s cube has been broken into cubes"
-          ],
-          [
-           "All of the individuals who formally held the posit"
-          ],
-          [
-           "What is the first name of the only Malko Competiti"
-          ],
-          [
-           "What is the latest chronological year date written"
-          ],
-          [
-           "If this whole pint is made up of ice cream, how ma"
-          ],
-          [
-           "According to Girls Who Code, how long did it take "
-          ],
-          [
-           "Of the cities within the United States where U.S. "
-          ],
-          [
-           "What was the actual enrollment count of the clinic"
-          ],
-          [
-           "What country had the least number of athletes at t"
-          ],
-          [
-           "In the 2015 Metropolitan Museum of Art exhibition "
-          ],
-          [
-           "On ScienceDirect, what is the difference to 3 deci"
-          ],
-          [
-           "On Cornell Law School website's legal information "
-          ],
-          [
-           "What is the surname of the equine veterinarian men"
-          ],
-          [
-           "According to Openreview.net, at the NeurIPS 2022 C"
-          ],
-          [
-           "I'd like to learn more about some popular reality "
-          ],
-          [
-           "The attached Excel file contains the sales of menu"
-          ],
-          [
-           "As of May 2023, how many stops are between South S"
-          ],
-          [
-           "In the film Goldfinger, what color was the object "
-          ],
-          [
-           "What was the complete title of the book in which t"
-          ],
-          [
-           "The brand that makes these harnesses the dogs are "
-          ],
-          [
-           "Eva Draconis has a personal website which can be a"
-          ],
-          [
-           "When was a picture of St. Thomas Aquinas first add"
-          ],
-          [
-           "In NASA's Astronomy Picture of the Day on 2006 Jan"
-          ],
-          [
-           "I'm curious about how much information is availabl"
-          ],
-          [
-           "At the two-minute mark in the YouTube video upload"
-          ],
-          [
-           "I read a paper about multiwavelength observations "
-          ],
-          [
-           "I thought we could try a fun word puzzle together "
-          ],
-          [
-           "According to the USGS, in what year was the Americ"
-          ],
-          [
-           "As of August 2023, who is the only winner of the U"
-          ]
-         ],
-         "hovertemplate": "agent_name=code_gpt4o_03_february_goodoldtext-unbroken<br>index=%{x}<br>is_correct=%{y}<br>question=%{customdata[0]}<extra></extra>",
-         "legendgroup": "code_gpt4o_03_february_goodoldtext-unbroken",
-         "line": {
-          "color": "#636efa",
-          "dash": "solid"
-         },
-         "marker": {
-          "symbol": "circle"
-         },
-         "mode": "lines",
-         "name": "code_gpt4o_03_february_goodoldtext-unbroken",
-         "showlegend": true,
-         "type": "scattergl",
-         "x": {
-          "bdata": "AAABAAIAAwAEAAUABgAHAAgACQAKAAsADAANAA4ADwAQABEAEgATABQAFQAWABcAGAAZABoAGwAcAB0AHgAfACAAIQAiACMAJAAlACYAJwAoACkAKgArACwALQAuAC8AMAAxADIAMwA0ADUANgA3ADgAOQA6ADsAPAA9AD4APwBAAEEAQgBDAEQARQBGAEcASABJAEoASwBMAE0ATgBPAFAAUQBSAFMAVABVAFYAVwBYAFkAWgBbAFwAXQBeAF8AYABhAGIAYwBkAGUAZgBnAGgAaQBqAGsAbABtAG4AbwBwAHEAcgBzAHQAdQB2AHcAeAB5AHoAewB8AH0AfgB/AIAAgQCCAIMAhACFAIYAhwCIAIkAigCLAIwAjQCOAI8AkACRAJIAkwCUAJUAlgCXAJgAmQCaAJsAnACdAJ4A",
-          "dtype": "i2"
-         },
-         "xaxis": "x",
-         "y": {
-          "bdata": "AAAAAAAA8D8AAAAAAADwP1VVVVVVVeU/AAAAAAAA6D8zMzMzMzPjP1VVVVVVVeU/t23btm3b5j8AAAAAAADoP1VVVVVVVeU/MzMzMzMz4z9ddNFFF13kP1VVVVVVVeU/dmIndmIn5j8lSZIkSZLkPzMzMzMzM+M/AAAAAAAA4j/T0tLS0tLiP3Icx3Ecx+E/bCivobyG4j+amZmZmZnhP5IkSZIkSeI/dNFFF1104T8LWchCFrLgP1VVVVVVVeE/7FG4HoXr4T+xEzuxEzvhP3sJ7SW0l+A/AAAAAAAA4D/d0wjLPY3gPxEREREREeE/hBBCCCGE4D8AAAAAAADgPwgffPDBB98/AAAAAAAA4D9f8RVf8RXfP47jOI7jON4/fdYNpshn3T8bymsor6HcP1y+5Vu+5ds/zczMzMzM3D8ZnI/B+RjcPz3P8zzP89w/EnfEHXFH3D+jiy666KLbPxzHcRzHcdw/05ve9KY33T94Nuo7G/XdP1VVVVVVVd0/L6fg5RS83D9xPQrXo3DdP93c3Nzc3Nw/7MRO7MRO3D8iNcF4K/vcPxPaS2gvod0/F1100UUX3T8lSZIkSZLcPx/BfQT3Edw/GmG5pxGW2z91Xx5bETTcP7y7u7u7u9s/Q7CONu9T3D/fe++9997bP9u2bdu2bds/AAAAAAAA2z9bqZVaqZXaPyebbLLJJts/NSbSA5Wz2z88PDw8PDzcP8y1A3PtwNw/fMVXfMVX3D8LmwOJVtjcPxzHcRzHcdw/4MCBAwcO3D/QusEU+azbP08b6LSBTts/KK+hvIby2j++Y2pg75jaPxqkQRqkQdo/2TMQlY7s2T+amZmZmZnZP+Dp1vywSNk/+hicj8H52D+q82sPuazYP0mSJEmSJNk/2djY2NjY2D9T1pQ1ZU3ZPzv0m61Dv9k/L7rooosu2j+e8YxnPOPZP5qZmZmZmdk/WqAFWqAF2j+c3vSmN73ZP3bZZZdddtk/Z6O+s1Hf2T+amZmZmZnZPwAAAAAAANo/Grab5Ulk2j+IxvrQWB/aPywFav1Kgdo/PQrXo3A92j8ZvhEFJp3aP/v6+vr6+to/G0PTHey32j87sRM7sRPbP9u2bdu2bds/ln0OqQnG2z8T6J26loPbPya0l9BeQts/JrBpP1kC2z/D2jesfcPaP5ax/Y5eGds/27Zt27Zt2z80+bJBky/bPyivobyG8to/q8FzBIq22j8+jbDc0wjbP5u1WZu1Wds/BA0ndV8e2z+bCOSaCOTaPzMzMzMzM9s/hYn3I6f52j+f4pIhWEfbPw8b6bCRDts/W2uttdZa2z/ZzvdT46XbP/y+7/u+79s/7na73W632z8AAAAAAADcP/KGvCFvyNs/HLmRG7mR2z8j+oDq2FvbPyebbLLJJts/27Zt27Zt2z9YYyI9UDnbP1uwBVuwBds/09LS0tLS2j/TVwljs6DaP6c3velNb9o/D+jGPH202j87qIM6qIPaP2le/ImEU9o/gkQrbA4k2j9r/N08QvXZP3Icx3Ecx9k/mpmZmZmZ2T/Lli1btmzZP2x21CLkr9k/I591gyny2T9SkPx5lcXZP5qZmZmZmdk/y7hl3DJu2T82lNdQXkPZP9ouhNkuhNk/EWflJ8RZ2T8wWf6S5S/ZP2mQBmmQBtk/fo/ICcLd2D9rcRPmd7XYP3bpMX+vjdg/",
-          "dtype": "f8"
-         },
-         "yaxis": "y"
-        },
-        {
-         "customdata": [
-          [
-           "I need to fact-check a citation. This is the citat"
-          ],
-          [
-           "If Eliud Kipchoge could maintain his record-making"
-          ],
-          [
-           "What are the EC numbers of the two most commonly u"
-          ],
-          [
-           "In Series 9, Episode 11 of Doctor Who, the Doctor "
-          ],
-          [
-           "Use density measures from the chemistry materials "
-          ],
-          [
-           "In the NCATS PubChem compound database for Food Ad"
-          ],
-          [
-           "Of the authors (First M. Last) that worked on the "
-          ],
-          [
-           "If we assume all articles published by Nature in 2"
-          ],
-          [
-           "In July 2, 1959 United States standards for grades"
-          ],
-          [
-           ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti"
-          ],
-          [
-           "An office held a Secret Santa gift exchange where "
-          ],
-          [
-           "When you take the average of the standard populati"
-          ],
-          [
-           "The attached spreadsheet shows the inventory for a"
-          ],
-          [
-           "What was the volume in m^3 of the fish bag that wa"
-          ],
-          [
-           "What's the last line of the rhyme under the flavor"
-          ],
-          [
-           "Here's a fun riddle that I think you'll enjoy.\n\nYo"
-          ],
-          [
-           "The object in the British Museum's collection with"
-          ],
-          [
-           "In Unlambda, what exact charcter or text needs to "
-          ],
-          [
-           "In April of 1977, who was the Prime Minister of th"
-          ],
-          [
-           "In the video https://www.youtube.com/watch?v=L1vXC"
-          ],
-          [
-           "How many studio albums were published by Mercedes "
-          ],
-          [
-           "How many High Energy Physics - Lattice articles li"
-          ],
-          [
-           "What two-word type of model did Manash Pratim Kash"
-          ],
-          [
-           "My family reunion is this week, and I was assigned"
-          ],
-          [
-           "What integer-rounded percentage of the total lengt"
-          ],
-          [
-           "Each cell in the attached spreadsheet represents a"
-          ],
-          [
-           "What is the maximum length in meters of #9 in the "
-          ],
-          [
-           "The photograph in the Whitney Museum of American A"
-          ],
-          [
-           "¬(A ∧ B) ↔ (¬A ∨ ¬B)\n¬(A ∨ B) ↔ (¬A ∧ ¬B)\n(A → B) "
-          ],
-          [
-           "According to github, when was Regression added to "
-          ],
-          [
-           "Assuming scientists in the famous youtube video Th"
-          ],
-          [
-           "A paper about AI regulation that was originally su"
-          ],
-          [
-           "How many applicants for the job in the PDF are onl"
-          ],
-          [
-           "I went to Virtue restaurant & bar in Chicago for m"
-          ],
-          [
-           "In terms of geographical distance between capital "
-          ],
-          [
-           "In the fictional language of Tizin, basic sentence"
-          ],
-          [
-           "I’m researching species that became invasive after"
-          ],
-          [
-           "Compute the check digit the Tropicos ID for the Or"
-          ],
-          [
-           "In the 2018 VSCode blog post on replit.com, what w"
-          ],
-          [
-           "What is the minimum number of page links a person "
-          ],
-          [
-           "Review the chess position provided in the image. I"
-          ],
-          [
-           "The attached file contains a list of vendors in th"
-          ],
-          [
-           "It is 1999. Before you party like it is 1999, plea"
-          ],
-          [
-           "Given this table defining * on the set S = {a, b, "
-          ],
-          [
-           "Could you help me out with this assignment? Our pr"
-          ],
-          [
-           "What writer is quoted by Merriam-Webster for the W"
-          ],
-          [
-           "According to Box Office Mojo's 2020 Worldwide Box "
-          ],
-          [
-           "The Metropolitan Museum of Art has a portrait in i"
-          ],
-          [
-           "In Emily Midkiff's June 2014 article in a journal "
-          ],
-          [
-           "Which contributor to the version of OpenCV where s"
-          ],
-          [
-           "The following numbers function similarly to ISBN 1"
-          ],
-          [
-           "In Nature journal's Scientific Reports conference "
-          ],
-          [
-           "As a comma separated list with no whitespace, usin"
-          ],
-          [
-           "The attached file shows a list of books in the col"
-          ],
-          [
-           "Who nominated the only Featured Article on English"
-          ],
-          [
-           "According to Google Finance, when was the first ye"
-          ],
-          [
-           "What animals that were mentioned in both Ilias Lag"
-          ],
-          [
-           "The attached file lists accommodations in the reso"
-          ],
-          [
-           "In Valentina Re’s contribution to the 2017 book “W"
-          ],
-          [
-           "How many images are there in the latest 2022 Lego "
-          ],
-          [
-           "Using bass clef notes, what is the age of someone "
-          ],
-          [
-           "If there is anything that doesn't make sense in th"
-          ],
-          [
-           "In the NIH translation of the original 1913 Michae"
-          ],
-          [
-           "How many edits were made to the Wikipedia page on "
-          ],
-          [
-           "You are a telecommunications engineer who wants to"
-          ],
-          [
-           "On July 15, 2008, Phys.org published an article ab"
-          ],
-          [
-           "Find the value of x to the nearest tenth: Lx = (d/"
-          ],
-          [
-           "Under DDC 633 on Bielefeld University Library's BA"
-          ],
-          [
-           "How many slides in this PowerPoint presentation me"
-          ],
-          [
-           "How many pages if the 2023 IPCC report (85 pages v"
-          ],
-          [
-           "Given $x_0 = -5$ and $f(x) = x^3 + 4x^2 - 3x + 8$,"
-          ],
-          [
-           "You are Van Helsing, a renowned vampire hunter. A "
-          ],
-          [
-           "This is a secret message my friend gave me. It say"
-          ],
-          [
-           "The attached file shows the locomotives in the col"
-          ],
-          [
-           "Examine the video at https://www.youtube.com/watch"
-          ],
-          [
-           "What is the area of the green polygon in the attac"
-          ],
-          [
-           "As of the 2020 census, what was the population dif"
-          ],
-          [
-           "What is the volume in milliliters of a system comp"
-          ],
-          [
-           "The attached spreadsheet contains the sales of men"
-          ],
-          [
-           "What time was the Tri-Rail train that carried the "
-          ],
-          [
-           "I was referencing each of the tables in the file f"
-          ],
-          [
-           "According to wikipedia, how many Asian countries s"
-          ],
-          [
-           "Who composed the song that was performed by a roos"
-          ],
-          [
-           "On a leap day before the year 2008, a joke was rem"
-          ],
-          [
-           "What percentage of the total penguin population ac"
-          ],
-          [
-           "Look at the attached image. The quiz is scored as "
-          ],
-          [
-           "You are given this Excel file as a map. You start "
-          ],
-          [
-           "The work referenced in footnote 397 of Federico La"
-          ],
-          [
-           "In the endnote found in the second-to-last paragra"
-          ],
-          [
-           "The Latin root of the Yola word \"gimlie\" shares a "
-          ],
-          [
-           "Hi, I'm making a pie but I could use some help wit"
-          ],
-          [
-           "I thought we could try a fun word puzzle together "
-          ],
-          [
-           "I have the Standard plan in the image below, and I"
-          ],
-          [
-           "I was trying to remember how well the Cheater Beat"
-          ],
-          [
-           "What is the last word before the second chorus of "
-          ],
-          [
-           "I’m thinking about selling my home, so I want to l"
-          ],
-          [
-           "The attached PDF lists accommodations in the resor"
-          ],
-          [
-           "This spreadsheet contains a list of clients for a "
-          ],
-          [
-           "The attached image contains a Python script. Run t"
-          ],
-          [
-           "On ScienceDirect, what is the difference to 3 deci"
-          ],
-          [
-           "In the year 2022, and before December, what does \""
-          ],
-          [
-           "What is the final numeric output from the attached"
-          ],
-          [
-           "How many nonindigenous crocodiles were found in Fl"
-          ],
-          [
-           "What is the surname of the equine veterinarian men"
-          ],
-          [
-           "It's May 2023, and I'm about to drive across the U"
-          ],
-          [
-           "Who did the actor who played Ray in the Polish-lan"
-          ],
-          [
-           "In the Scikit-Learn July 2017 changelog, what othe"
-          ],
-          [
-           "On the BBC Earth YouTube video of the Top 5 Sillie"
-          ],
-          [
-           "Pull out the sentence in the following 5x7 block o"
-          ],
-          [
-           "The YouTube channel Game Grumps began a Let’s Play"
-          ],
-          [
-           "How many times was a Twitter/X post cited as a ref"
-          ],
-          [
-           "Which of the fruits shown in the 2008 painting \"Em"
-          ],
-          [
-           "The attached spreadsheet contains a list of books "
-          ],
-          [
-           "The longest-lived vertebrate is named after an isl"
-          ],
-          [
-           "During the first week of August 2015, one of the N"
-          ],
-          [
-           "What is the latest chronological year date written"
-          ],
-          [
-           "Consider the following symbols: 𒐜  𒐐𒐚\n\nThis is a n"
-          ],
-          [
-           "Bob was invited to participate in a game show, and"
-          ],
-          [
-           "How many more blocks (also denoted as layers) in B"
-          ],
-          [
-           "In Audre Lorde’s poem “Father Son and Holy Ghost”,"
-          ],
-          [
-           "According to Girls Who Code, how long did it take "
-          ],
-          [
-           "On the DeepFruits fruit detection graph on Connect"
-          ],
-          [
-           "All of the individuals who formally held the posit"
-          ],
-          [
-           "The attached file lists the locomotives owned by a"
-          ],
-          [
-           "The cover of the August 2021 issue of Vogue shows "
-          ],
-          [
-           "Hi, I was out sick from my classes on Friday, so I"
-          ],
-          [
-           "What is the absolute difference in tens of thousan"
-          ],
-          [
-           "The year is 2022. I am at the National Air and Spa"
-          ],
-          [
-           "The attached spreadsheet lists the locomotives own"
-          ],
-          [
-           "The brand that makes these harnesses the dogs are "
-          ],
-          [
-           "On June 6, 2023, an article by Carolyn Collins Pet"
-          ],
-          [
-           "Eva Draconis has a personal website which can be a"
-          ],
-          [
-           "Take the gender split from the 2011 Bulgarian cens"
-          ],
-          [
-           "A standard Rubik’s cube has been broken into cubes"
-          ],
-          [
-           "I'm making a grocery list for my mom, but she's a "
-          ],
-          [
-           "If this whole pint is made up of ice cream, how ma"
-          ],
-          [
-           "What country had the least number of athletes at t"
-          ],
-          [
-           "Where were the Vietnamese specimens described by K"
-          ],
-          [
-           "How many at bats did the Yankee with the most walk"
-          ],
-          [
-           "The attached Excel file contains the sales of menu"
-          ],
-          [
-           "What was the complete title of the book in which t"
-          ],
-          [
-           "What was the actual enrollment count of the clinic"
-          ],
-          [
-           "Who are the pitchers with the number before and af"
-          ],
-          [
-           "In the YouTube 360 VR video from March 2018 narrat"
-          ],
-          [
-           "As of May 2023, how many stops are between South S"
-          ],
-          [
-           "I'd like to learn more about some popular reality "
-          ],
-          [
-           "In the film Goldfinger, what color was the object "
-          ],
-          [
-           "A 5-man group made up of one tank, one healer, and"
-          ],
-          [
-           "According to the USGS, in what year was the Americ"
-          ],
-          [
-           "What is the first name of the only Malko Competiti"
-          ],
-          [
-           "I'm curious about how much information is availabl"
-          ],
-          [
-           "When was a picture of St. Thomas Aquinas first add"
-          ],
-          [
-           "I read a paper about multiwavelength observations "
-          ],
-          [
-           "At the two-minute mark in the YouTube video upload"
-          ],
-          [
-           "In the 2015 Metropolitan Museum of Art exhibition "
-          ],
-          [
-           "According to Openreview.net, at the NeurIPS 2022 C"
-          ],
-          [
-           "In NASA's Astronomy Picture of the Day on 2006 Jan"
-          ],
-          [
-           "As of August 2023, who is the only winner of the U"
-          ],
-          [
-           "Of the cities within the United States where U.S. "
-          ]
-         ],
-         "hovertemplate": "agent_name=code_gpt4o_03_february_magenticbrowser<br>index=%{x}<br>is_correct=%{y}<br>question=%{customdata[0]}<extra></extra>",
-         "legendgroup": "code_gpt4o_03_february_magenticbrowser",
-         "line": {
-          "color": "#EF553B",
-          "dash": "solid"
-         },
-         "marker": {
-          "symbol": "circle"
-         },
-         "mode": "lines",
-         "name": "code_gpt4o_03_february_magenticbrowser",
-         "showlegend": true,
-         "type": "scattergl",
-         "x": {
-          "bdata": "AAABAAIAAwAEAAUABgAHAAgACQAKAAsADAANAA4ADwAQABEAEgATABQAFQAWABcAGAAZABoAGwAcAB0AHgAfACAAIQAiACMAJAAlACYAJwAoACkAKgArACwALQAuAC8AMAAxADIAMwA0ADUANgA3ADgAOQA6ADsAPAA9AD4APwBAAEEAQgBDAEQARQBGAEcASABJAEoASwBMAE0ATgBPAFAAUQBSAFMAVABVAFYAVwBYAFkAWgBbAFwAXQBeAF8AYABhAGIAYwBkAGUAZgBnAGgAaQBqAGsAbABtAG4AbwBwAHEAcgBzAHQAdQB2AHcAeAB5AHoAewB8AH0AfgB/AIAAgQCCAIMAhACFAIYAhwCIAIkAigCLAIwAjQCOAI8AkACRAJIAkwCUAJUAlgCXAJgAmQCaAJsAnACdAJ4A",
-          "dtype": "i2"
-         },
-         "xaxis": "x",
-         "y": {
-          "bdata": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACamZmZmZnJP1VVVVVVVcU/kiRJkiRJwj8AAAAAAADAPxzHcRzHcbw/mpmZmZmZyT900UUXXXTRPwAAAAAAANA/FDuxEzux0z+3bdu2bdvWP1VVVVVVVdU/AAAAAAAA1D/T0tLS0tLSP3Icx3Ecx9E/eQ3lNZTX0D8AAAAAAADQP5IkSZIkSdI/dNFFF1100T84velNb3rTP1VVVVVVVdU/exSuR+F61D8UO7ETO7HTP2gvob2E9tI/kiRJkiRJ0j8Jyz2NsNzTPzMzMzMzM9M/lVJKKaWU0j8AAAAAAADUP1VVVVVVVdU/tbS0tLS01D/UQR3UQR3UP+Q4juM4jtM/HEyRz7rB1D9RXkN5DeXVP1VVVVVVVdU/ZmZmZmZm1j/blahdidrVP7dt27Zt29Y/lTVlTVlT1j9GF1100UXXPxdswRZswdY/etOb3vSm1z9dQUyuICbXPwAAAAAAANg/4eUUvJyC1z8K16NwPQrXP9jX19fX19c/J3ZiJ3Zi1z9ln0NqgvHWP0xoL6G9hNY/RhdddNFF1z+3bdu2bdvWP0xnMZ3FdNY/fBphuacR1j/QcFL35bHVP1VVVVVVVdU/yRCso8371D+ttdZaa63VP1VVVVVVVdU/AAAAAAAA1T/VSq3USq3UP1VVVVVVVdU/0gOVs1v41T+mpaWlpaXVP1VVVVVVVdU/Fl/xFV/x1T8g0QqbA4nWP47jOI7jONY/r169evXq1T/JZ91ginzWPwrXo3A9Ctc/ymsor6G81j8oxFn5CXHWP3ZiJ3ZiJ9Y/Xi1uwvyu1j9mZmZmZmbWP6QMPN2aH9Y/25WoXYna1T80dX7tIZfVP1VVVVVVVdU/FRUVFRUV1T82ZU1ZU9bUPy+QSfECmdQ/XXTRRRdd1D9CEYpQhCLUP5Q+6ZM+6dM/VEZlVEZl1D9DFrKQhSzUP6WUUkoppdQ/Ut/ZqO9s1D8mTv2eW+LUP1VVVVVVVdU/1g86KvDF1T9jfWisD43VP1DrVwrU+tU/w/UoXI/C1T+bB7nrZ4vVP/b19fX19dU/2xia7mC/1T+e2Imd2InVP1VVVVVVVdU/2eeQmmC81T/XcnCzX4jVP9FeQnsJ7dU//mQJbNpP1j8c1r5h7RvWP49eGdvv6NU/btu2bdu21T96amGlpxbWP0xnMZ3FdNY/bTV4jkDR1j9Y7mmE5Z7WP9ZmbdZmbdY/QcNJ3ZfH1j/XRCDXRCDXP3d3d3d3d9c/RhdddNFF1z8RrKPN+xTXP+UWT27x5NY/Ouecc8451z8K16NwPQrXP9d1Xdd1Xdc/7PV6vV6v1z8AAAAAAIDXP/QFfUFf0Nc/GHqhF3qh1z/f2jDNXfDXP8IHH3zwwdc/9oDZA2YP2D9JD1TObuHXP0J7Ce0ltNc/iIeHh4eH1z82C6o9J9PXP4K5dmCuHdg/6qPVJETx1z+ogzqogzrYP2C3x1qGDtg/Zfx2qSfj1z/MknJAZLjXP+Q4juM4jtc/J0p2baJk1z+6c+fOnTvXP+HlFLycgtc/n3WDKfJZ1z99GzBU0zHXP3d3d3d3d9c/uj5dn65P1z+H8hrKayjXP1esAVesAdc/t23btm3b1j+21lprrbXWPwdpkAZpkNY/dRhlKp5r1j9eLW7C/K7WP+EMCCV3itY/",
-          "dtype": "f8"
-         },
-         "yaxis": "y"
-        },
-        {
-         "customdata": [
-          [
-           "In Unlambda, what exact charcter or text needs to "
-          ],
-          [
-           "The attached spreadsheet shows the inventory for a"
-          ],
-          [
-           "How many studio albums were published by Mercedes "
-          ],
-          [
-           "When you take the average of the standard populati"
-          ],
-          [
-           "If Eliud Kipchoge could maintain his record-making"
-          ],
-          [
-           "If we assume all articles published by Nature in 2"
-          ],
-          [
-           "The object in the British Museum's collection with"
-          ],
-          [
-           "In April of 1977, who was the Prime Minister of th"
-          ],
-          [
-           "In Series 9, Episode 11 of Doctor Who, the Doctor "
-          ],
-          [
-           "Use density measures from the chemistry materials "
-          ],
-          [
-           "In July 2, 1959 United States standards for grades"
-          ],
-          [
-           "Of the authors (First M. Last) that worked on the "
-          ],
-          [
-           "I need to fact-check a citation. This is the citat"
-          ],
-          [
-           "What was the volume in m^3 of the fish bag that wa"
-          ],
-          [
-           "Here's a fun riddle that I think you'll enjoy.\n\nYo"
-          ],
-          [
-           "What's the last line of the rhyme under the flavor"
-          ],
-          [
-           "An office held a Secret Santa gift exchange where "
-          ],
-          [
-           ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti"
-          ],
-          [
-           "In the video https://www.youtube.com/watch?v=L1vXC"
-          ],
-          [
-           "How many High Energy Physics - Lattice articles li"
-          ],
-          [
-           "Assuming scientists in the famous youtube video Th"
-          ],
-          [
-           "¬(A ∧ B) ↔ (¬A ∨ ¬B)\n¬(A ∨ B) ↔ (¬A ∧ ¬B)\n(A → B) "
-          ],
-          [
-           "The photograph in the Whitney Museum of American A"
-          ],
-          [
-           "My family reunion is this week, and I was assigned"
-          ],
-          [
-           "What integer-rounded percentage of the total lengt"
-          ],
-          [
-           "What are the EC numbers of the two most commonly u"
-          ],
-          [
-           "Each cell in the attached spreadsheet represents a"
-          ],
-          [
-           "What two-word type of model did Manash Pratim Kash"
-          ],
-          [
-           "Could you help me out with this assignment? Our pr"
-          ],
-          [
-           "I went to Virtue restaurant & bar in Chicago for m"
-          ],
-          [
-           "In Emily Midkiff's June 2014 article in a journal "
-          ],
-          [
-           "How many applicants for the job in the PDF are onl"
-          ],
-          [
-           "Compute the check digit the Tropicos ID for the Or"
-          ],
-          [
-           "In the fictional language of Tizin, basic sentence"
-          ],
-          [
-           "According to github, when was Regression added to "
-          ],
-          [
-           "In Nature journal's Scientific Reports conference "
-          ],
-          [
-           "The attached file contains a list of vendors in th"
-          ],
-          [
-           "Review the chess position provided in the image. I"
-          ],
-          [
-           "In Valentina Re’s contribution to the 2017 book “W"
-          ],
-          [
-           "In the 2018 VSCode blog post on replit.com, what w"
-          ],
-          [
-           "It is 1999. Before you party like it is 1999, plea"
-          ],
-          [
-           "What writer is quoted by Merriam-Webster for the W"
-          ],
-          [
-           "Given this table defining * on the set S = {a, b, "
-          ],
-          [
-           "What animals that were mentioned in both Ilias Lag"
-          ],
-          [
-           "Which contributor to the version of OpenCV where s"
-          ],
-          [
-           "According to Box Office Mojo's 2020 Worldwide Box "
-          ],
-          [
-           "The Metropolitan Museum of Art has a portrait in i"
-          ],
-          [
-           "The attached file shows a list of books in the col"
-          ],
-          [
-           "Who nominated the only Featured Article on English"
-          ],
-          [
-           "In the year 2022, and before December, what does \""
-          ],
-          [
-           "As a comma separated list with no whitespace, usin"
-          ],
-          [
-           "According to Google Finance, when was the first ye"
-          ],
-          [
-           "What is the minimum number of page links a person "
-          ],
-          [
-           "In the NCATS PubChem compound database for Food Ad"
-          ],
-          [
-           "Using bass clef notes, what is the age of someone "
-          ],
-          [
-           "How many images are there in the latest 2022 Lego "
-          ],
-          [
-           "Under DDC 633 on Bielefeld University Library's BA"
-          ],
-          [
-           "In terms of geographical distance between capital "
-          ],
-          [
-           "The attached file lists accommodations in the reso"
-          ],
-          [
-           "If there is anything that doesn't make sense in th"
-          ],
-          [
-           "The following numbers function similarly to ISBN 1"
-          ],
-          [
-           "You are a telecommunications engineer who wants to"
-          ],
-          [
-           "Find the value of x to the nearest tenth: Lx = (d/"
-          ],
-          [
-           "I was trying to remember how well the Cheater Beat"
-          ],
-          [
-           "What is the volume in milliliters of a system comp"
-          ],
-          [
-           "On July 15, 2008, Phys.org published an article ab"
-          ],
-          [
-           "How many slides in this PowerPoint presentation me"
-          ],
-          [
-           "In the endnote found in the second-to-last paragra"
-          ],
-          [
-           "I’m researching species that became invasive after"
-          ],
-          [
-           "What is the maximum length in meters of #9 in the "
-          ],
-          [
-           "In the NIH translation of the original 1913 Michae"
-          ],
-          [
-           "You are Van Helsing, a renowned vampire hunter. A "
-          ],
-          [
-           "Given $x_0 = -5$ and $f(x) = x^3 + 4x^2 - 3x + 8$,"
-          ],
-          [
-           "This is a secret message my friend gave me. It say"
-          ],
-          [
-           "The attached file shows the locomotives in the col"
-          ],
-          [
-           "What is the area of the green polygon in the attac"
-          ],
-          [
-           "The attached spreadsheet contains the sales of men"
-          ],
-          [
-           "Examine the video at https://www.youtube.com/watch"
-          ],
-          [
-           "The Latin root of the Yola word \"gimlie\" shares a "
-          ],
-          [
-           "I was referencing each of the tables in the file f"
-          ],
-          [
-           "What time was the Tri-Rail train that carried the "
-          ],
-          [
-           "Who composed the song that was performed by a roos"
-          ],
-          [
-           "What percentage of the total penguin population ac"
-          ],
-          [
-           "How many edits were made to the Wikipedia page on "
-          ],
-          [
-           "As of the 2020 census, what was the population dif"
-          ],
-          [
-           "Look at the attached image. The quiz is scored as "
-          ],
-          [
-           "According to wikipedia, how many Asian countries s"
-          ],
-          [
-           "Hi, I'm making a pie but I could use some help wit"
-          ],
-          [
-           "On ScienceDirect, what is the difference to 3 deci"
-          ],
-          [
-           "I’m thinking about selling my home, so I want to l"
-          ],
-          [
-           "I have the Standard plan in the image below, and I"
-          ],
-          [
-           "You are given this Excel file as a map. You start "
-          ],
-          [
-           "The attached PDF lists accommodations in the resor"
-          ],
-          [
-           "How many pages if the 2023 IPCC report (85 pages v"
-          ],
-          [
-           "What is the last word before the second chorus of "
-          ],
-          [
-           "This spreadsheet contains a list of clients for a "
-          ],
-          [
-           "In the Scikit-Learn July 2017 changelog, what othe"
-          ],
-          [
-           "Who did the actor who played Ray in the Polish-lan"
-          ],
-          [
-           "What is the final numeric output from the attached"
-          ],
-          [
-           "The work referenced in footnote 397 of Federico La"
-          ],
-          [
-           "Which of the fruits shown in the 2008 painting \"Em"
-          ],
-          [
-           "The attached image contains a Python script. Run t"
-          ],
-          [
-           "On the BBC Earth YouTube video of the Top 5 Sillie"
-          ],
-          [
-           "What is the surname of the equine veterinarian men"
-          ],
-          [
-           "Pull out the sentence in the following 5x7 block o"
-          ],
-          [
-           "On the DeepFruits fruit detection graph on Connect"
-          ],
-          [
-           "The longest-lived vertebrate is named after an isl"
-          ],
-          [
-           "Bob was invited to participate in a game show, and"
-          ],
-          [
-           "It's May 2023, and I'm about to drive across the U"
-          ],
-          [
-           "The year is 2022. I am at the National Air and Spa"
-          ],
-          [
-           "During the first week of August 2015, one of the N"
-          ],
-          [
-           "On a leap day before the year 2008, a joke was rem"
-          ],
-          [
-           "What is the latest chronological year date written"
-          ],
-          [
-           "All of the individuals who formally held the posit"
-          ],
-          [
-           "Consider the following symbols: 𒐜  𒐐𒐚\n\nThis is a n"
-          ],
-          [
-           "How many more blocks (also denoted as layers) in B"
-          ],
-          [
-           "According to Girls Who Code, how long did it take "
-          ],
-          [
-           "In Audre Lorde’s poem “Father Son and Holy Ghost”,"
-          ],
-          [
-           "According to the USGS, in what year was the Americ"
-          ],
-          [
-           "The attached spreadsheet contains a list of books "
-          ],
-          [
-           "On Cornell Law School website's legal information "
-          ],
-          [
-           "Of the cities within the United States where U.S. "
-          ],
-          [
-           "How many times was a Twitter/X post cited as a ref"
-          ],
-          [
-           "The attached file lists the locomotives owned by a"
-          ],
-          [
-           "The cover of the August 2021 issue of Vogue shows "
-          ],
-          [
-           "A 5-man group made up of one tank, one healer, and"
-          ],
-          [
-           "Eva Draconis has a personal website which can be a"
-          ],
-          [
-           "The brand that makes these harnesses the dogs are "
-          ],
-          [
-           "The attached spreadsheet lists the locomotives own"
-          ],
-          [
-           "What is the absolute difference in tens of thousan"
-          ],
-          [
-           "Hi, I was out sick from my classes on Friday, so I"
-          ],
-          [
-           "I'm curious about how much information is availabl"
-          ],
-          [
-           "Take the gender split from the 2011 Bulgarian cens"
-          ],
-          [
-           "A standard Rubik’s cube has been broken into cubes"
-          ],
-          [
-           "How many at bats did the Yankee with the most walk"
-          ],
-          [
-           "If this whole pint is made up of ice cream, how ma"
-          ],
-          [
-           "Where were the Vietnamese specimens described by K"
-          ],
-          [
-           "The attached Excel file contains the sales of menu"
-          ],
-          [
-           "What country had the least number of athletes at t"
-          ],
-          [
-           "I'd like to learn more about some popular reality "
-          ],
-          [
-           "The YouTube channel Game Grumps began a Let’s Play"
-          ],
-          [
-           "Who are the pitchers with the number before and af"
-          ],
-          [
-           "What is the first name of the only Malko Competiti"
-          ],
-          [
-           "In the YouTube 360 VR video from March 2018 narrat"
-          ],
-          [
-           "What was the complete title of the book in which t"
-          ],
-          [
-           "I thought we could try a fun word puzzle together "
-          ],
-          [
-           "As of August 2023, who is the only winner of the U"
-          ],
-          [
-           "I'm making a grocery list for my mom, but she's a "
-          ],
-          [
-           "In NASA's Astronomy Picture of the Day on 2006 Jan"
-          ],
-          [
-           "In the film Goldfinger, what color was the object "
-          ],
-          [
-           "In the 2015 Metropolitan Museum of Art exhibition "
-          ],
-          [
-           "As of May 2023, how many stops are between South S"
-          ],
-          [
-           "At the two-minute mark in the YouTube video upload"
-          ],
-          [
-           "According to Openreview.net, at the NeurIPS 2022 C"
-          ],
-          [
-           "When was a picture of St. Thomas Aquinas first add"
-          ],
-          [
-           "What was the actual enrollment count of the clinic"
-          ]
-         ],
-         "hovertemplate": "agent_name=code_gpt4o_03_february_magenticbrowser2<br>index=%{x}<br>is_correct=%{y}<br>question=%{customdata[0]}<extra></extra>",
-         "legendgroup": "code_gpt4o_03_february_magenticbrowser2",
-         "line": {
-          "color": "#00cc96",
-          "dash": "solid"
-         },
-         "marker": {
-          "symbol": "circle"
-         },
-         "mode": "lines",
-         "name": "code_gpt4o_03_february_magenticbrowser2",
-         "showlegend": true,
-         "type": "scattergl",
-         "x": {
-          "bdata": "AAABAAIAAwAEAAUABgAHAAgACQAKAAsADAANAA4ADwAQABEAEgATABQAFQAWABcAGAAZABoAGwAcAB0AHgAfACAAIQAiACMAJAAlACYAJwAoACkAKgArACwALQAuAC8AMAAxADIAMwA0ADUANgA3ADgAOQA6ADsAPAA9AD4APwBAAEEAQgBDAEQARQBGAEcASABJAEoASwBMAE0ATgBPAFAAUQBSAFMAVABVAFYAVwBYAFkAWgBbAFwAXQBeAF8AYABhAGIAYwBkAGUAZgBnAGgAaQBqAGsAbABtAG4AbwBwAHEAcgBzAHQAdQB2AHcAeAB5AHoAewB8AH0AfgB/AIAAgQCCAIMAhACFAIYAhwCIAIkAigCLAIwAjQCOAI8AkACRAJIAkwCUAJUAlgCXAJgAmQCaAJsA",
-          "dtype": "i2"
-         },
-         "xaxis": "x",
-         "y": {
-          "bdata": "AAAAAAAAAAAAAAAAAADgP1VVVVVVVeU/AAAAAAAA4D8zMzMzMzPjPwAAAAAAAOA/kiRJkiRJ4j8AAAAAAADgPxzHcRzHcdw/AAAAAAAA4D8XXXTRRRfdPwAAAAAAAOA/sRM7sRM74T+SJEmSJEniPxEREREREeE/AAAAAAAA4D8eHh4eHh7ePwAAAAAAAOA/DeU1lNdQ3j/NzMzMzMzcP57neZ7ned4/F1100UUX3T+96U1vetPbP1VVVVVVVd0/KVyPwvUo3D87sRM7sRPbPy+hvYT2Eto/27Zt27Zt2z9huacRlnvaP5qZmZmZmdk/11prrbXW2j8AAAAAAADcPxdddNFFF90/PDw8PDw83D/btm3btm3bP6uqqqqqqto/0LrBFPms2z8or6G8hvLaPxqkQRqkQdo/mpmZmZmZ2T+J2pWoXYnaP9u2bdu2bds/EnfEHXFH3D8XXXTRRRfdPxzHcRzHcdw/velNb3rT2z9yBTG5gpjcP1VVVVVVVd0/g5dT8HIK3j9xPQrXo3DdP93c3Nzc3Nw/7MRO7MRO3D8iNcF4K/vcPxzHcRzHcdw/7RvWvmHt2z8lSZIkSZLcPx/BfQT3Edw/1AjLPY2w3D91Xx5bETTcP83MzMzMzNw/532KS4Zg3T/nnHPOOefcP13XdV3Xdd0/AAAAAAAA3T/dyI3cyI3cPxdddNFFF90/rDGRHqic3T8tLS0tLS3dP8y1A3PtwNw/fMVXfMVX3D8yfrvUk/HbPxzHcRzHcdw/4MCBAwcO3D/QusEU+azbP08b6LSBTts/KK+hvIby2j/btm3btm3bP1y+5Vu+5ds/FzdhfleL2z8zMzMzMzPbP35YpAw83do/idqVqF2J2j/ksmKghDfaP3qe53me59k/mpmZmZmZ2T9T1pQ1ZU3ZPxUvkEnxAtk/dNFFF1102T+TlaxkJSvZP5qZmZmZmdk/GZVRGZVR2T+RhSxkIQvZP3bZZZdddtk/5QpicgUx2T+amZmZmZnZP1VVVVVVVdk/mYbtZnkS2T801ofG+tDYPzbZZJNNNtk/9ihcj8L12D+qeZC7frbYPxkZGRkZGdk/i/gEUsl52T+xEzuxEzvZP5qZmZmZmdk/fg6pCcZb2T/7hVhRGh/ZPzmO4ziO49g/koq51Rmp2D9wWPuGtW/YP6+M7Xf0ytg/JUmSJEmS2D/pqYWVnlrYPzqL6Syms9g/iHG/Lql82D/LPY2w3NPYP9mJndiJndg/6r48tiJo2D9YoTNYoTPYPwAAAAAAANg/Kky8HznN1z/jkiFYR5vXP2pXonYlatc/vvfee++91z+q8dJNYhDYP/h93/d939c/DAaDwWAw2D8AAAAAAADYPxT2hD1hT9g/+IEf+IEf2D/pA6pjb23YPz744IMPPtg/qYilIpaK2D8m0gOVs1vYP9iCLdiCLdg/AAAAAAAA2D9Q7TmZvkrYP4mfUeJnlNg/TGV71wHd2D/5iq/4iq/YP2JyBTG5gtg/0QqbA4lW2D/ZiZ3YiZ3YPzmO4ziO49g/0nmLIZ232D/EiBEjRozYPzTWh8b60Ng/YYp81g2m2D/oVRZntHvYP1K4HoXrUdg/waJgUbAo2D8AAAAAAADYP9jX19fX19c/1cDeMTWw1z+JV5F4FYnXPyd2Yid2Ytc/",
-          "dtype": "f8"
-         },
-         "yaxis": "y"
-        },
-        {
-         "customdata": [
-          [
-           "In Unlambda, what exact charcter or text needs to "
-          ],
-          [
-           "Here's a fun riddle that I think you'll enjoy.\n\nYo"
-          ],
-          [
-           "When you take the average of the standard populati"
-          ],
-          [
-           "The attached spreadsheet shows the inventory for a"
-          ],
-          [
-           ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti"
-          ],
-          [
-           "If we assume all articles published by Nature in 2"
-          ],
-          [
-           "In Series 9, Episode 11 of Doctor Who, the Doctor "
-          ],
-          [
-           "An office held a Secret Santa gift exchange where "
-          ],
-          [
-           "¬(A ∧ B) ↔ (¬A ∨ ¬B)\n¬(A ∨ B) ↔ (¬A ∧ ¬B)\n(A → B) "
-          ],
-          [
-           "Using the Biopython library in Python, parse the P"
-          ],
-          [
-           "Which of the text elements under CATEGORIES in the"
-          ],
-          [
-           "My family reunion is this week, and I was assigned"
-          ],
-          [
-           "What was the volume in m^3 of the fish bag that wa"
-          ],
-          [
-           "The photograph in the Whitney Museum of American A"
-          ],
-          [
-           "What are the EC numbers of the two most commonly u"
-          ],
-          [
-           "How many High Energy Physics - Lattice articles li"
-          ],
-          [
-           "In April of 1977, who was the Prime Minister of th"
-          ],
-          [
-           "How many studio albums were published by Mercedes "
-          ],
-          [
-           "What two-word type of model did Manash Pratim Kash"
-          ],
-          [
-           "In the fictional language of Tizin, basic sentence"
-          ],
-          [
-           "What is the minimum number of page links a person "
-          ],
-          [
-           "In the 2018 VSCode blog post on replit.com, what w"
-          ],
-          [
-           "The object in the British Museum's collection with"
-          ],
-          [
-           "In Emily Midkiff's June 2014 article in a journal "
-          ],
-          [
-           "Each cell in the attached spreadsheet represents a"
-          ],
-          [
-           "In the NCATS PubChem compound database for Food Ad"
-          ],
-          [
-           "A paper about AI regulation that was originally su"
-          ],
-          [
-           "In terms of geographical distance between capital "
-          ],
-          [
-           "Compute the check digit the Tropicos ID for the Or"
-          ],
-          [
-           "Review the chess position provided in the image. I"
-          ],
-          [
-           "The attached file contains a list of vendors in th"
-          ],
-          [
-           "How many applicants for the job in the PDF are onl"
-          ],
-          [
-           "Given this table defining * on the set S = {a, b, "
-          ],
-          [
-           "What's the last line of the rhyme under the flavor"
-          ],
-          [
-           "The following numbers function similarly to ISBN 1"
-          ],
-          [
-           "Use density measures from the chemistry materials "
-          ],
-          [
-           "I need to fact-check a citation. This is the citat"
-          ],
-          [
-           "It is 1999. Before you party like it is 1999, plea"
-          ],
-          [
-           "In Valentina Re’s contribution to the 2017 book “W"
-          ],
-          [
-           "The attached file shows a list of books in the col"
-          ],
-          [
-           "Of the authors (First M. Last) that worked on the "
-          ],
-          [
-           "What writer is quoted by Merriam-Webster for the W"
-          ],
-          [
-           "As a comma separated list with no whitespace, usin"
-          ],
-          [
-           "If Eliud Kipchoge could maintain his record-making"
-          ],
-          [
-           "How many images are there in the latest 2022 Lego "
-          ],
-          [
-           "Under DDC 633 on Bielefeld University Library's BA"
-          ],
-          [
-           "I went to Virtue restaurant & bar in Chicago for m"
-          ],
-          [
-           "What integer-rounded percentage of the total lengt"
-          ],
-          [
-           "According to Box Office Mojo's 2020 Worldwide Box "
-          ],
-          [
-           "Using bass clef notes, what is the age of someone "
-          ],
-          [
-           "If there is anything that doesn't make sense in th"
-          ],
-          [
-           "In July 2, 1959 United States standards for grades"
-          ],
-          [
-           "In the video https://www.youtube.com/watch?v=L1vXC"
-          ],
-          [
-           "The attached file lists accommodations in the reso"
-          ],
-          [
-           "Find the value of x to the nearest tenth: Lx = (d/"
-          ],
-          [
-           "How many slides in this PowerPoint presentation me"
-          ],
-          [
-           "You are a telecommunications engineer who wants to"
-          ],
-          [
-           "You are Van Helsing, a renowned vampire hunter. A "
-          ],
-          [
-           "According to github, when was Regression added to "
-          ],
-          [
-           "This is a secret message my friend gave me. It say"
-          ],
-          [
-           "The Metropolitan Museum of Art has a portrait in i"
-          ],
-          [
-           "What animals that were mentioned in both Ilias Lag"
-          ],
-          [
-           "What is the volume in milliliters of a system comp"
-          ],
-          [
-           "Given $x_0 = -5$ and $f(x) = x^3 + 4x^2 - 3x + 8$,"
-          ],
-          [
-           "Examine the video at https://www.youtube.com/watch"
-          ],
-          [
-           "The attached file shows the locomotives in the col"
-          ],
-          [
-           "What is the area of the green polygon in the attac"
-          ],
-          [
-           "In the NIH translation of the original 1913 Michae"
-          ],
-          [
-           "The attached spreadsheet contains the sales of men"
-          ],
-          [
-           "Which contributor to the version of OpenCV where s"
-          ],
-          [
-           "On July 15, 2008, Phys.org published an article ab"
-          ],
-          [
-           "I'm making a grocery list for my mom, but she's a "
-          ],
-          [
-           "What is the average number of pre-2020 works on th"
-          ],
-          [
-           "Who composed the song that was performed by a roos"
-          ],
-          [
-           "In Nature journal's Scientific Reports conference "
-          ],
-          [
-           "You are given this Excel file as a map. You start "
-          ],
-          [
-           "In the year 2022, and before December, what does \""
-          ],
-          [
-           "Hi, I'm making a pie but I could use some help wit"
-          ],
-          [
-           "I was trying to remember how well the Cheater Beat"
-          ],
-          [
-           "According to wikipedia, how many Asian countries s"
-          ],
-          [
-           "Look at the attached image. The quiz is scored as "
-          ],
-          [
-           "I have the Standard plan in the image below, and I"
-          ],
-          [
-           "In the endnote found in the second-to-last paragra"
-          ],
-          [
-           "Who nominated the only Featured Article on English"
-          ],
-          [
-           "According to the World Bank, which countries had g"
-          ],
-          [
-           "The attached PDF lists accommodations in the resor"
-          ],
-          [
-           "I was referencing each of the tables in the file f"
-          ],
-          [
-           "What percentage of the total penguin population ac"
-          ],
-          [
-           "This spreadsheet contains a list of clients for a "
-          ],
-          [
-           "What is the final numeric output from the attached"
-          ],
-          [
-           "Could you help me out with this assignment? Our pr"
-          ],
-          [
-           "Assuming scientists in the famous youtube video Th"
-          ],
-          [
-           "Who did the actor who played Ray in the Polish-lan"
-          ],
-          [
-           "The Latin root of the Yola word \"gimlie\" shares a "
-          ],
-          [
-           "As of the 2020 census, what was the population dif"
-          ],
-          [
-           "How many more blocks (also denoted as layers) in B"
-          ],
-          [
-           "Pull out the sentence in the following 5x7 block o"
-          ],
-          [
-           "Bob was invited to participate in a game show, and"
-          ],
-          [
-           "I’m thinking about selling my home, so I want to l"
-          ],
-          [
-           "Which of the fruits shown in the 2008 painting \"Em"
-          ],
-          [
-           "Consider the following symbols: 𒐜  𒐐𒐚\n\nThis is a n"
-          ],
-          [
-           "On the BBC Earth YouTube video of the Top 5 Sillie"
-          ],
-          [
-           "The YouTube channel Game Grumps began a Let’s Play"
-          ],
-          [
-           "The longest-lived vertebrate is named after an isl"
-          ],
-          [
-           "According to Girls Who Code, how long did it take "
-          ],
-          [
-           "How many times was a Twitter/X post cited as a ref"
-          ],
-          [
-           "What is the last word before the second chorus of "
-          ],
-          [
-           "The work referenced in footnote 397 of Federico La"
-          ],
-          [
-           "It's May 2023, and I'm about to drive across the U"
-          ],
-          [
-           "On the DeepFruits fruit detection graph on Connect"
-          ],
-          [
-           "What is the maximum length in meters of #9 in the "
-          ],
-          [
-           "The attached image contains a Python script. Run t"
-          ],
-          [
-           "I’m researching species that became invasive after"
-          ],
-          [
-           "The attached spreadsheet lists the locomotives own"
-          ],
-          [
-           "All of the individuals who formally held the posit"
-          ],
-          [
-           "What is the latest chronological year date written"
-          ],
-          [
-           "Hi, I was out sick from my classes on Friday, so I"
-          ],
-          [
-           "The attached file lists the locomotives owned by a"
-          ],
-          [
-           "A 5-man group made up of one tank, one healer, and"
-          ],
-          [
-           "The cover of the August 2021 issue of Vogue shows "
-          ],
-          [
-           "The year is 2022. I am at the National Air and Spa"
-          ],
-          [
-           "The attached spreadsheet contains a list of books "
-          ],
-          [
-           "What is the absolute difference in tens of thousan"
-          ],
-          [
-           "The book with the doi 10.1353/book.24372 concerns "
-          ],
-          [
-           "What was the complete title of the book in which t"
-          ],
-          [
-           "A standard Rubik’s cube has been broken into cubes"
-          ],
-          [
-           "If this whole pint is made up of ice cream, how ma"
-          ],
-          [
-           "On ScienceDirect, what is the difference to 3 deci"
-          ],
-          [
-           "The attached Excel file contains the sales of menu"
-          ],
-          [
-           "Where were the Vietnamese specimens described by K"
-          ],
-          [
-           "During the first week of August 2015, one of the N"
-          ],
-          [
-           "According to Google Finance, when was the first ye"
-          ],
-          [
-           "The brand that makes these harnesses the dogs are "
-          ],
-          [
-           "On Cornell Law School website's legal information "
-          ],
-          [
-           "Eva Draconis has a personal website which can be a"
-          ],
-          [
-           "As of August 2023, who is the only winner of the U"
-          ],
-          [
-           "What country had the least number of athletes at t"
-          ],
-          [
-           "Take the gender split from the 2011 Bulgarian cens"
-          ],
-          [
-           "I'm curious about how much information is availabl"
-          ],
-          [
-           "How many at bats did the Yankee with the most walk"
-          ],
-          [
-           "Who are the pitchers with the number before and af"
-          ],
-          [
-           "What is the first name of the only Malko Competiti"
-          ],
-          [
-           "In the film Goldfinger, what color was the object "
-          ],
-          [
-           "I'd like to learn more about some popular reality "
-          ],
-          [
-           "In Audre Lorde’s poem “Father Son and Holy Ghost”,"
-          ],
-          [
-           "How many pages if the 2023 IPCC report (85 pages v"
-          ],
-          [
-           "According to Openreview.net, at the NeurIPS 2022 C"
-          ],
-          [
-           "What is the surname of the equine veterinarian men"
-          ],
-          [
-           "In the YouTube 360 VR video from March 2018 narrat"
-          ],
-          [
-           "On a leap day before the year 2008, a joke was rem"
-          ],
-          [
-           "In the Scikit-Learn July 2017 changelog, what othe"
-          ],
-          [
-           "In the 2015 Metropolitan Museum of Art exhibition "
-          ],
-          [
-           "What was the actual enrollment count of the clinic"
-          ],
-          [
-           "When was a picture of St. Thomas Aquinas first add"
-          ],
-          [
-           "How many edits were made to the Wikipedia page on "
-          ],
-          [
-           "What time was the Tri-Rail train that carried the "
-          ],
-          [
-           "At the two-minute mark in the YouTube video upload"
-          ],
-          [
-           "On June 6, 2023, an article by Carolyn Collins Pet"
-          ],
-          [
-           "I read a paper about multiwavelength observations "
-          ],
-          [
-           "I thought we could try a fun word puzzle together "
-          ],
-          [
-           "As of May 2023, how many stops are between South S"
-          ],
-          [
-           "In NASA's Astronomy Picture of the Day on 2006 Jan"
-          ],
-          [
-           "Of the cities within the United States where U.S. "
-          ],
-          [
-           "How many nonindigenous crocodiles were found in Fl"
-          ],
-          [
-           "According to the USGS, in what year was the Americ"
-          ]
-         ],
-         "hovertemplate": "agent_name=code_gpt4o_03_february_text<br>index=%{x}<br>is_correct=%{y}<br>question=%{customdata[0]}<extra></extra>",
-         "legendgroup": "code_gpt4o_03_february_text",
-         "line": {
-          "color": "#ab63fa",
-          "dash": "solid"
-         },
-         "marker": {
-          "symbol": "circle"
-         },
-         "mode": "lines",
-         "name": "code_gpt4o_03_february_text",
-         "showlegend": true,
-         "type": "scattergl",
-         "x": {
-          "bdata": "AAABAAIAAwAEAAUABgAHAAgACQAKAAsADAANAA4ADwAQABEAEgATABQAFQAWABcAGAAZABoAGwAcAB0AHgAfACAAIQAiACMAJAAlACYAJwAoACkAKgArACwALQAuAC8AMAAxADIAMwA0ADUANgA3ADgAOQA6ADsAPAA9AD4APwBAAEEAQgBDAEQARQBGAEcASABJAEoASwBMAE0ATgBPAFAAUQBSAFMAVABVAFYAVwBYAFkAWgBbAFwAXQBeAF8AYABhAGIAYwBkAGUAZgBnAGgAaQBqAGsAbABtAG4AbwBwAHEAcgBzAHQAdQB2AHcAeAB5AHoAewB8AH0AfgB/AIAAgQCCAIMAhACFAIYAhwCIAIkAigCLAIwAjQCOAI8AkACRAJIAkwCUAJUAlgCXAJgAmQCaAJsAnACdAJ4AnwCgAKEAogCjAKQA",
-          "dtype": "i2"
-         },
-         "xaxis": "x",
-         "y": {
-          "bdata": "AAAAAAAAAAAAAAAAAADgP1VVVVVVVdU/AAAAAAAA4D8zMzMzMzPjP1VVVVVVVeU/kiRJkiRJ4j8AAAAAAADkP3Icx3Ecx+E/AAAAAAAA4D8XXXTRRRfdPwAAAAAAAOA/sRM7sRM74T8AAAAAAADgP97d3d3d3d0/AAAAAAAA3D9aWlpaWlraPzmO4ziO49g/KK+hvIby2j+amZmZmZnZP9u2bdu2bds/L7rooosu2j+96U1vetPbP6uqqqqqqto/mpmZmZmZ2T/ZiZ3YiZ3YPy+hvYT2Eto/27Zt27Zt2z/UCMs9jbDcP7y7u7u7u9s/55xzzjnn3D8AAAAAAADcPxdddNFFF90/PDw8PDw83D8d1EEd1EHdP47jOI7jON4/KvJZN5gi3z8N5TWU11DeP9/yLd/yLd8/AAAAAAAA4D84H4PzMTjfPwAAAAAAAOA/0Bf0BX1B3z8AAAAAAADgP5/0SZ/0Sd8/6k1vetOb3j94Nuo7G/XdP1VVVVVVVd0/L6fg5RS83D8pXI/C9SjcP93c3Nzc3Nw/7MRO7MRO3D+WfQ6pCcbbPya0l9BeQts/7RvWvmHt2z8lSZIkSZLcPx/BfQT3Edw/1AjLPY2w3D91Xx5bETTcP7y7u7u7u9s/Q7CONu9T3D/fe++9997bP9u2bdu2bds/AAAAAAAA2z8cuZEbuZHbPx988MEHH9w/NSbSA5Wz2z9LS0tLS0vbP73pTW9609s/27Zt27Zt2z8yfrvUk/HbP+Q4juM4jts/2bJly5Yt2z/QusEU+azbP08b6LSBTts/KK+hvIby2j++Y2pg75jaPxqkQRqkQdo/2TMQlY7s2T9mZmZmZmbaPy+hvYT2Eto/idqVqF2J2j+CEt5o6vzaP6uqqqqqqto/WlpaWlpa2j+zpqwpa8raP2G5pxGWe9o/L7rooosu2j+e8YxnPOPZP/qkT/qkT9o/WqAFWqAF2j+c3vSmN73ZPyeaaKKJJto/Z6O+s1Hf2T+amZmZmZnZPwAAAAAAANo/Wp5EpmG72T+IxvrQWB/aPzFvZ0jM29k/mpmZmZmZ2T96kLt+tljZP7q5ubm5udk/i/gEUsl52T+KndiJndjZP5qZmZmZmdk/fg6pCcZb2T+B3qlrObjZP7SX0F5Ce9k/XJ2RirnV2T+amZmZmZnZP+mVsf2OXtk/btu2bdu22T+hyZcNmnzZPzGdxXQW09k/mpmZmZmZ2T+oEZZ7GmHZP1qbtVmbtdk/lLovj60I2j8arNAZrNDZPyIiIiIiIto/vB85zdfq2T/8FJcMwTraP4nalahdido/U0oppZRS2j/pJjEIrBzaP3qe53me59k/bTabzWaz2T8AAAAAAIDZP3PGnDFnzNk/mpmZmZmZ2T8GfxUnpOTZP7LJJptsstk/wp8Jfyb82T+/GhPpgcrZP5qZmZmZmdk/aWlpaWlp2T+fk+mrhLHZP6BR4meU+Nk/rSYhir/I2T+amZmZmZnZP2bogN0ea9k/FjYHEq2w2T/lgMhwr4LZP3Icx3Ecx9k/famg1ZcK2j/SpEmTJk3aP4jG+tBYH9o/DqbIZ91g2j8h+fMqizPaPwc6baDTBto/z2pntbPa2T/zGsprKK/ZP9ouhNkuhNk/EWflJ8RZ2T8wWf6S5S/ZP2mQBmmQBtk/fo/ICcLd2D9rcRPmd7XYP3bpMX+vjdg/ZmZmZmZm2D+vUkzQXaXYP5Ey8HRrftg/GFuCb/NX2D8yOB+D8zHYPwyYxoBpDNg/",
-          "dtype": "f8"
-         },
-         "yaxis": "y"
-        },
-        {
-         "customdata": [
-          [
-           "I’m researching species that became invasive after"
-          ],
-          [
-           "If we assume all articles published by Nature in 2"
-          ],
-          [
-           "A paper about AI regulation that was originally su"
-          ],
-          [
-           "In Unlambda, what exact charcter or text needs to "
-          ],
-          [
-           "If Eliud Kipchoge could maintain his record-making"
-          ],
-          [
-           "The attached spreadsheet shows the inventory for a"
-          ],
-          [
-           "The object in the British Museum's collection with"
-          ],
-          [
-           "How many studio albums were published by Mercedes "
-          ],
-          [
-           "Here's a fun riddle that I think you'll enjoy.\n\nYo"
-          ],
-          [
-           "According to github, when was Regression added to "
-          ],
-          [
-           "Using the Biopython library in Python, parse the P"
-          ],
-          [
-           "In July 2, 1959 United States standards for grades"
-          ],
-          [
-           "What are the EC numbers of the two most commonly u"
-          ],
-          [
-           "In April of 1977, who was the Prime Minister of th"
-          ],
-          [
-           "Use density measures from the chemistry materials "
-          ],
-          [
-           "What's the last line of the rhyme under the flavor"
-          ],
-          [
-           "What was the volume in m^3 of the fish bag that wa"
-          ],
-          [
-           "What is the average number of pre-2020 works on th"
-          ],
-          [
-           "Of the authors (First M. Last) that worked on the "
-          ],
-          [
-           "In the video https://www.youtube.com/watch?v=L1vXC"
-          ],
-          [
-           "When you take the average of the standard populati"
-          ],
-          [
-           "In Series 9, Episode 11 of Doctor Who, the Doctor "
-          ],
-          [
-           "Assuming scientists in the famous youtube video Th"
-          ],
-          [
-           "In terms of geographical distance between capital "
-          ],
-          [
-           "I need to fact-check a citation. This is the citat"
-          ],
-          [
-           "In the NCATS PubChem compound database for Food Ad"
-          ],
-          [
-           "Which contributor to the version of OpenCV where s"
-          ],
-          [
-           "What integer-rounded percentage of the total lengt"
-          ],
-          [
-           "What is the maximum length in meters of #9 in the "
-          ],
-          [
-           "An office held a Secret Santa gift exchange where "
-          ],
-          [
-           "What animals that were mentioned in both Ilias Lag"
-          ],
-          [
-           "How many High Energy Physics - Lattice articles li"
-          ],
-          [
-           "What two-word type of model did Manash Pratim Kash"
-          ],
-          [
-           ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti"
-          ],
-          [
-           "What is the minimum number of page links a person "
-          ],
-          [
-           "The photograph in the Whitney Museum of American A"
-          ],
-          [
-           "Which of the text elements under CATEGORIES in the"
-          ],
-          [
-           "Each cell in the attached spreadsheet represents a"
-          ],
-          [
-           "I went to Virtue restaurant & bar in Chicago for m"
-          ],
-          [
-           "My family reunion is this week, and I was assigned"
-          ],
-          [
-           "¬(A ∧ B) ↔ (¬A ∨ ¬B)\n¬(A ∨ B) ↔ (¬A ∧ ¬B)\n(A → B) "
-          ],
-          [
-           "In Emily Midkiff's June 2014 article in a journal "
-          ],
-          [
-           "It is 1999. Before you party like it is 1999, plea"
-          ],
-          [
-           "In the 2018 VSCode blog post on replit.com, what w"
-          ],
-          [
-           "Under DDC 633 on Bielefeld University Library's BA"
-          ],
-          [
-           "Compute the check digit the Tropicos ID for the Or"
-          ],
-          [
-           "Could you help me out with this assignment? Our pr"
-          ],
-          [
-           "What time was the Tri-Rail train that carried the "
-          ],
-          [
-           "How many applicants for the job in the PDF are onl"
-          ],
-          [
-           "In the fictional language of Tizin, basic sentence"
-          ],
-          [
-           "In Valentina Re’s contribution to the 2017 book “W"
-          ],
-          [
-           "In Nature journal's Scientific Reports conference "
-          ],
-          [
-           "The attached file contains a list of vendors in th"
-          ],
-          [
-           "The Metropolitan Museum of Art has a portrait in i"
-          ],
-          [
-           "Review the chess position provided in the image. I"
-          ],
-          [
-           "According to Google Finance, when was the first ye"
-          ],
-          [
-           "According to Box Office Mojo's 2020 Worldwide Box "
-          ],
-          [
-           "Who nominated the only Featured Article on English"
-          ],
-          [
-           "In the year 2022, and before December, what does \""
-          ],
-          [
-           "How many pages if the 2023 IPCC report (85 pages v"
-          ],
-          [
-           "What writer is quoted by Merriam-Webster for the W"
-          ],
-          [
-           "Given this table defining * on the set S = {a, b, "
-          ],
-          [
-           "The following numbers function similarly to ISBN 1"
-          ],
-          [
-           "The attached file shows a list of books in the col"
-          ],
-          [
-           "How many images are there in the latest 2022 Lego "
-          ],
-          [
-           "As a comma separated list with no whitespace, usin"
-          ],
-          [
-           "I was trying to remember how well the Cheater Beat"
-          ],
-          [
-           "What is the volume in milliliters of a system comp"
-          ],
-          [
-           "On a leap day before the year 2008, a joke was rem"
-          ],
-          [
-           "Find the value of x to the nearest tenth: Lx = (d/"
-          ],
-          [
-           "The Latin root of the Yola word \"gimlie\" shares a "
-          ],
-          [
-           "In the endnote found in the second-to-last paragra"
-          ],
-          [
-           "Using bass clef notes, what is the age of someone "
-          ],
-          [
-           "The attached file lists accommodations in the reso"
-          ],
-          [
-           "In the NIH translation of the original 1913 Michae"
-          ],
-          [
-           "On July 15, 2008, Phys.org published an article ab"
-          ],
-          [
-           "How many edits were made to the Wikipedia page on "
-          ],
-          [
-           "If there is anything that doesn't make sense in th"
-          ],
-          [
-           "You are a telecommunications engineer who wants to"
-          ],
-          [
-           "I was referencing each of the tables in the file f"
-          ],
-          [
-           "How many nonindigenous crocodiles were found in Fl"
-          ],
-          [
-           "The work referenced in footnote 397 of Federico La"
-          ],
-          [
-           "As of the 2020 census, what was the population dif"
-          ],
-          [
-           "What percentage of the total penguin population ac"
-          ],
-          [
-           "Given $x_0 = -5$ and $f(x) = x^3 + 4x^2 - 3x + 8$,"
-          ],
-          [
-           "How many slides in this PowerPoint presentation me"
-          ],
-          [
-           "You are Van Helsing, a renowned vampire hunter. A "
-          ],
-          [
-           "The attached file shows the locomotives in the col"
-          ],
-          [
-           "This is a secret message my friend gave me. It say"
-          ],
-          [
-           "What is the area of the green polygon in the attac"
-          ]
-         ],
-         "hovertemplate": "agent_name=code_llama-3<br>index=%{x}<br>is_correct=%{y}<br>question=%{customdata[0]}<extra></extra>",
-         "legendgroup": "code_llama-3",
-         "line": {
-          "color": "#FFA15A",
-          "dash": "solid"
-         },
-         "marker": {
-          "symbol": "circle"
-         },
-         "mode": "lines",
-         "name": "code_llama-3",
-         "showlegend": true,
-         "type": "scattergl",
-         "x": {
-          "bdata": "AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZ",
-          "dtype": "i1"
-         },
-         "xaxis": "x",
-         "y": {
-          "bdata": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABEREREREbE/AAAAAAAAsD8eHh4eHh6uPxzHcRzHcaw/KK+hvIbyqj+amZmZmZmpPxiGYRiGYag/RhdddNFFpz9kIQtZyEKmP1VVVVVVVaU/exSuR+F6pD8UO7ETO7GjP2gvob2E9qI/kiRJkiRJoj+WexphuaehPxEREREREaE/hBBCCCGEoD8AAAAAAACgPwgffPDBB58/Hh4eHh4erj8d1EEd1EGtPxzHcRzHcaw/0LrBFPmsqz8or6G8hvKqPxqkQRqkQao/MzMzMzMzsz+7ErUrUbuyP5IkSZIkSbI/d8QdcUfcsT900UUXXXSxPxEREREREbE/ZCELWchCtj9XEJMriMm1P1VVVVVVVbU/OQUvp+DltD97FK5H4Xq0PxQUFBQUFLQ/FDuxEzuxsz/BeCv7HFKzP2gvob2E9rI/nhLkKUGesj+SJEmSJEmyP3AfwX0E97E/fBphuacRtj/QcFL35bG1P1VVVVVVVbU/yRCso837tD/GGGOMMca4PxiGYRiGYbg/AAAAAAAAuD8YeqEXeqG3P0YXXXTRRbc/jYn0QOXstj+XlpaWlpa2P2QhC1nIQrY/Fl/xFV/xtT9ItMLmQKK1P1VVVVVVVbU/qFChQoUKtT8cTJHPusG0P3sUrkfherQ/XkN5DeU1tD/Oyk+Is/KzP5dv+ZZv+bY/Xi1uwvyutj9mZmZmZma2P6QMPN2aH7Y/25WoXYnatT80dX7tIZe1P1VVVVVVVbU/FRUVFRUVtT82ZU1ZU9a0Py+QSfECmbQ/XXTRRRddtD9CEYpQhCK0P5Q+6ZM+6bM/",
-          "dtype": "f8"
-         },
-         "yaxis": "y"
-        },
-        {
-         "customdata": [
-          [
-           "If Eliud Kipchoge could maintain his record-making"
-          ],
-          [
-           "The attached spreadsheet shows the inventory for a"
-          ],
-          [
-           "A paper about AI regulation that was originally su"
-          ],
-          [
-           "If we assume all articles published by Nature in 2"
-          ],
-          [
-           "I’m researching species that became invasive after"
-          ],
-          [
-           "In Unlambda, what exact charcter or text needs to "
-          ],
-          [
-           "The object in the British Museum's collection with"
-          ],
-          [
-           "In April of 1977, who was the Prime Minister of th"
-          ],
-          [
-           "Using the Biopython library in Python, parse the P"
-          ],
-          [
-           "What was the volume in m^3 of the fish bag that wa"
-          ],
-          [
-           "In July 2, 1959 United States standards for grades"
-          ],
-          [
-           "Here's a fun riddle that I think you'll enjoy.\n\nYo"
-          ],
-          [
-           "Use density measures from the chemistry materials "
-          ],
-          [
-           "When you take the average of the standard populati"
-          ],
-          [
-           "How many studio albums were published by Mercedes "
-          ],
-          [
-           "In terms of geographical distance between capital "
-          ],
-          [
-           "What's the last line of the rhyme under the flavor"
-          ],
-          [
-           "Of the authors (First M. Last) that worked on the "
-          ],
-          [
-           "In Series 9, Episode 11 of Doctor Who, the Doctor "
-          ],
-          [
-           "Assuming scientists in the famous youtube video Th"
-          ],
-          [
-           "I need to fact-check a citation. This is the citat"
-          ],
-          [
-           "According to github, when was Regression added to "
-          ],
-          [
-           "In the NCATS PubChem compound database for Food Ad"
-          ],
-          [
-           "What is the maximum length in meters of #9 in the "
-          ],
-          [
-           "What two-word type of model did Manash Pratim Kash"
-          ],
-          [
-           ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti"
-          ],
-          [
-           "How many High Energy Physics - Lattice articles li"
-          ],
-          [
-           "What is the minimum number of page links a person "
-          ],
-          [
-           "Which contributor to the version of OpenCV where s"
-          ],
-          [
-           "¬(A ∧ B) ↔ (¬A ∨ ¬B)\n¬(A ∨ B) ↔ (¬A ∧ ¬B)\n(A → B) "
-          ],
-          [
-           "Each cell in the attached spreadsheet represents a"
-          ],
-          [
-           "What integer-rounded percentage of the total lengt"
-          ],
-          [
-           "My family reunion is this week, and I was assigned"
-          ],
-          [
-           "The photograph in the Whitney Museum of American A"
-          ],
-          [
-           "In Emily Midkiff's June 2014 article in a journal "
-          ],
-          [
-           "Compute the check digit the Tropicos ID for the Or"
-          ],
-          [
-           "I went to Virtue restaurant & bar in Chicago for m"
-          ],
-          [
-           "Could you help me out with this assignment? Our pr"
-          ],
-          [
-           "In the 2018 VSCode blog post on replit.com, what w"
-          ],
-          [
-           "Under DDC 633 on Bielefeld University Library's BA"
-          ],
-          [
-           "In the fictional language of Tizin, basic sentence"
-          ],
-          [
-           "The Metropolitan Museum of Art has a portrait in i"
-          ],
-          [
-           "The attached file contains a list of vendors in th"
-          ],
-          [
-           "In Valentina Re’s contribution to the 2017 book “W"
-          ],
-          [
-           "Review the chess position provided in the image. I"
-          ],
-          [
-           "In Nature journal's Scientific Reports conference "
-          ],
-          [
-           "In the year 2022, and before December, what does \""
-          ],
-          [
-           "What time was the Tri-Rail train that carried the "
-          ],
-          [
-           "According to Google Finance, when was the first ye"
-          ],
-          [
-           "What writer is quoted by Merriam-Webster for the W"
-          ],
-          [
-           "Who nominated the only Featured Article on English"
-          ],
-          [
-           "Given this table defining * on the set S = {a, b, "
-          ],
-          [
-           "The following numbers function similarly to ISBN 1"
-          ],
-          [
-           "It is 1999. Before you party like it is 1999, plea"
-          ],
-          [
-           "The attached file shows a list of books in the col"
-          ],
-          [
-           "In the video https://www.youtube.com/watch?v=L1vXC"
-          ],
-          [
-           "How many pages if the 2023 IPCC report (85 pages v"
-          ],
-          [
-           "According to Box Office Mojo's 2020 Worldwide Box "
-          ],
-          [
-           "As a comma separated list with no whitespace, usin"
-          ],
-          [
-           "What is the volume in milliliters of a system comp"
-          ],
-          [
-           "Find the value of x to the nearest tenth: Lx = (d/"
-          ],
-          [
-           "On July 15, 2008, Phys.org published an article ab"
-          ],
-          [
-           "Using bass clef notes, what is the age of someone "
-          ],
-          [
-           "The Latin root of the Yola word \"gimlie\" shares a "
-          ],
-          [
-           "In the NIH translation of the original 1913 Michae"
-          ],
-          [
-           "In the endnote found in the second-to-last paragra"
-          ],
-          [
-           "The attached file lists accommodations in the reso"
-          ],
-          [
-           "If there is anything that doesn't make sense in th"
-          ],
-          [
-           "You are a telecommunications engineer who wants to"
-          ],
-          [
-           "How many edits were made to the Wikipedia page on "
-          ],
-          [
-           "On a leap day before the year 2008, a joke was rem"
-          ],
-          [
-           "I was trying to remember how well the Cheater Beat"
-          ],
-          [
-           "How many slides in this PowerPoint presentation me"
-          ],
-          [
-           "Given $x_0 = -5$ and $f(x) = x^3 + 4x^2 - 3x + 8$,"
-          ],
-          [
-           "As of the 2020 census, what was the population dif"
-          ],
-          [
-           "You are Van Helsing, a renowned vampire hunter. A "
-          ],
-          [
-           "How many images are there in the latest 2022 Lego "
-          ],
-          [
-           "Examine the video at https://www.youtube.com/watch"
-          ],
-          [
-           "This is a secret message my friend gave me. It say"
-          ],
-          [
-           "According to wikipedia, how many Asian countries s"
-          ],
-          [
-           "What is the area of the green polygon in the attac"
-          ],
-          [
-           "Who composed the song that was performed by a roos"
-          ],
-          [
-           "The attached spreadsheet contains the sales of men"
-          ],
-          [
-           "What is the average number of pre-2020 works on th"
-          ],
-          [
-           "You are given this Excel file as a map. You start "
-          ],
-          [
-           "How many nonindigenous crocodiles were found in Fl"
-          ],
-          [
-           "I’m thinking about selling my home, so I want to l"
-          ],
-          [
-           "I'm making a grocery list for my mom, but she's a "
-          ],
-          [
-           "What is the surname of the equine veterinarian men"
-          ],
-          [
-           "How many times was a Twitter/X post cited as a ref"
-          ],
-          [
-           "The attached file shows the locomotives in the col"
-          ],
-          [
-           "I thought we could try a fun word puzzle together "
-          ],
-          [
-           "What is the last word before the second chorus of "
-          ],
-          [
-           "Look at the attached image. The quiz is scored as "
-          ],
-          [
-           "I was referencing each of the tables in the file f"
-          ],
-          [
-           "The attached image contains a Python script. Run t"
-          ],
-          [
-           "On ScienceDirect, what is the difference to 3 deci"
-          ],
-          [
-           "Hi, I'm making a pie but I could use some help wit"
-          ],
-          [
-           "According to the World Bank, which countries had g"
-          ],
-          [
-           "I have the Standard plan in the image below, and I"
-          ],
-          [
-           "The attached PDF lists accommodations in the resor"
-          ],
-          [
-           "The year is 2022. I am at the National Air and Spa"
-          ],
-          [
-           "The work referenced in footnote 397 of Federico La"
-          ],
-          [
-           "What percentage of the total penguin population ac"
-          ],
-          [
-           "This spreadsheet contains a list of clients for a "
-          ],
-          [
-           "It's May 2023, and I'm about to drive across the U"
-          ],
-          [
-           "What is the latest chronological year date written"
-          ],
-          [
-           "In the Scikit-Learn July 2017 changelog, what othe"
-          ],
-          [
-           "The longest-lived vertebrate is named after an isl"
-          ],
-          [
-           "On the BBC Earth YouTube video of the Top 5 Sillie"
-          ],
-          [
-           "What is the final numeric output from the attached"
-          ],
-          [
-           "How many more blocks (also denoted as layers) in B"
-          ],
-          [
-           "During the first week of August 2015, one of the N"
-          ],
-          [
-           "Pull out the sentence in the following 5x7 block o"
-          ],
-          [
-           "Which of the fruits shown in the 2008 painting \"Em"
-          ],
-          [
-           "All of the individuals who formally held the posit"
-          ],
-          [
-           "The YouTube channel Game Grumps began a Let’s Play"
-          ],
-          [
-           "Who did the actor who played Ray in the Polish-lan"
-          ],
-          [
-           "On the DeepFruits fruit detection graph on Connect"
-          ],
-          [
-           "Of the cities within the United States where U.S. "
-          ],
-          [
-           "The book with the doi 10.1353/book.24372 concerns "
-          ],
-          [
-           "Bob was invited to participate in a game show, and"
-          ],
-          [
-           "On Cornell Law School website's legal information "
-          ],
-          [
-           "Consider the following symbols: 𒐜  𒐐𒐚\n\nThis is a n"
-          ],
-          [
-           "As of August 2023, who is the only winner of the U"
-          ],
-          [
-           "Eva Draconis has a personal website which can be a"
-          ],
-          [
-           "According to Girls Who Code, how long did it take "
-          ],
-          [
-           "The attached spreadsheet lists the locomotives own"
-          ],
-          [
-           "How many at bats did the Yankee with the most walk"
-          ],
-          [
-           "What was the complete title of the book in which t"
-          ],
-          [
-           "The cover of the August 2021 issue of Vogue shows "
-          ],
-          [
-           "The attached file lists the locomotives owned by a"
-          ],
-          [
-           "In Audre Lorde’s poem “Father Son and Holy Ghost”,"
-          ],
-          [
-           "Hi, I was out sick from my classes on Friday, so I"
-          ],
-          [
-           "A 5-man group made up of one tank, one healer, and"
-          ],
-          [
-           "According to Openreview.net, at the NeurIPS 2022 C"
-          ],
-          [
-           "Take the gender split from the 2011 Bulgarian cens"
-          ],
-          [
-           "When was a picture of St. Thomas Aquinas first add"
-          ],
-          [
-           "If this whole pint is made up of ice cream, how ma"
-          ],
-          [
-           "What is the absolute difference in tens of thousan"
-          ],
-          [
-           "I'd like to learn more about some popular reality "
-          ],
-          [
-           "The attached spreadsheet contains a list of books "
-          ],
-          [
-           "Where were the Vietnamese specimens described by K"
-          ],
-          [
-           "A standard Rubik’s cube has been broken into cubes"
-          ],
-          [
-           "Who are the pitchers with the number before and af"
-          ],
-          [
-           "What is the first name of the only Malko Competiti"
-          ],
-          [
-           "What was the actual enrollment count of the clinic"
-          ],
-          [
-           "On June 6, 2023, an article by Carolyn Collins Pet"
-          ],
-          [
-           "I'm curious about how much information is availabl"
-          ],
-          [
-           "In the film Goldfinger, what color was the object "
-          ],
-          [
-           "In the YouTube 360 VR video from March 2018 narrat"
-          ],
-          [
-           "What country had the least number of athletes at t"
-          ],
-          [
-           "In NASA's Astronomy Picture of the Day on 2006 Jan"
-          ],
-          [
-           "As of May 2023, how many stops are between South S"
-          ],
-          [
-           "I read a paper about multiwavelength observations "
-          ],
-          [
-           "At the two-minute mark in the YouTube video upload"
-          ],
-          [
-           "According to the USGS, in what year was the Americ"
-          ],
-          [
-           "In the 2015 Metropolitan Museum of Art exhibition "
-          ],
-          [
-           "The attached Excel file contains the sales of menu"
-          ],
-          [
-           "An office held a Secret Santa gift exchange where "
-          ],
-          [
-           "What are the EC numbers of the two most commonly u"
-          ],
-          [
-           "What animals that were mentioned in both Ilias Lag"
-          ],
-          [
-           "Which of the text elements under CATEGORIES in the"
-          ],
-          [
-           "How many applicants for the job in the PDF are onl"
-          ],
-          [
-           "The brand that makes these harnesses the dogs are "
-          ]
-         ],
-         "hovertemplate": "agent_name=code_o1_01_february_text<br>index=%{x}<br>is_correct=%{y}<br>question=%{customdata[0]}<extra></extra>",
-         "legendgroup": "code_o1_01_february_text",
-         "line": {
-          "color": "#19d3f3",
-          "dash": "solid"
-         },
-         "marker": {
-          "symbol": "circle"
-         },
-         "mode": "lines",
-         "name": "code_o1_01_february_text",
-         "showlegend": true,
-         "type": "scattergl",
-         "x": {
-          "bdata": "AAABAAIAAwAEAAUABgAHAAgACQAKAAsADAANAA4ADwAQABEAEgATABQAFQAWABcAGAAZABoAGwAcAB0AHgAfACAAIQAiACMAJAAlACYAJwAoACkAKgArACwALQAuAC8AMAAxADIAMwA0ADUANgA3ADgAOQA6ADsAPAA9AD4APwBAAEEAQgBDAEQARQBGAEcASABJAEoASwBMAE0ATgBPAFAAUQBSAFMAVABVAFYAVwBYAFkAWgBbAFwAXQBeAF8AYABhAGIAYwBkAGUAZgBnAGgAaQBqAGsAbABtAG4AbwBwAHEAcgBzAHQAdQB2AHcAeAB5AHoAewB8AH0AfgB/AIAAgQCCAIMAhACFAIYAhwCIAIkAigCLAIwAjQCOAI8AkACRAJIAkwCUAJUAlgCXAJgAmQCaAJsAnACdAJ4AnwCgAKEAogCjAKQA",
-          "dtype": "i2"
-         },
-         "xaxis": "x",
-         "y": {
-          "bdata": "AAAAAAAAAAAAAAAAAADgP1VVVVVVVeU/AAAAAAAA6D+amZmZmZnpP1VVVVVVVeU/t23btm3b5j8AAAAAAADoP1VVVVVVVeU/ZmZmZmZm5j9ddNFFF13kP6uqqqqqquI/FDuxEzux4z+SJEmSJEniPxEREREREeE/AAAAAAAA4j/x8PDw8PDgPwAAAAAAAOA/DeU1lNdQ3j/NzMzMzMzcP57neZ7ned4/F1100UUX3T+96U1vetPbP6uqqqqqqto/KVyPwvUo3D+e2Imd2IndPxzHcRzHcdw/btu2bdu23T9HWO5phOXePwAAAAAAAOA/hBBCCCGE4D8AAAAAAADgP3zwwQcffOA/AAAAAAAA4D9QB3VQB3XgPzmO4ziO4+A/whT5rBtM4T95DeU1lNfgP7ETO7ETO+E/zczMzMzM4D8sUbsStSvhPzEMwzAMw+A/R9wRd8Qd4T900UUXXXThPxEREREREeE/C1nIQhay4D/E5ApicgXhP6uqqqqqquA/FbycgpdT4D+kcD0K16PgP/Hw8PDw8OA/sRM7sRM74T9vZZ9DaoLhP3Icx3Ecx+E/CfKUIE8J4j9u27Zt27bhP3AfwX0E9+E/lnsaYbmn4T8NJ3VfHlvhPxEREREREeE/DcE62rxP4T+MMcYYY4zhP1EURVEUReE/AAAAAAAA4T/RC73QC73gP/jggw8++OA/TKQHKme34D/x8PDw8PDgPxM/o8TPKOE/8RVf8RVf4T8OJFphcyDhPzmO4ziO4+A/iREjRowY4T/CFPmsG0zhPxEREREREeE/NpTXUF5D4T/lJ8RZ+QnhP7ETO7ETO+E/BqLSkT0D4T8zMzMzMzPhPyNl4OnW/OA/LFG7ErUr4T9T59ceclnhP0mSJEmSJOE/8fDw8PDw4D8w6Av6gr7gP93TCMs9jeA/XXTRRRdd4D8DF7jABS7gPwAAAAAAAOA/0AIt0AIt4D+GLGQhC1ngP4QQQgghhOA/QUyuICZX4D+yAmGkHSvgP1VVVVVVVeA/8MXVDzoq4D8VvJyCl1PgP3+lQK1fKeA/UrgehetR4D8cUWDSqXngP6GgoKCgoOA/9lttDE134D/sxE7sxE7gP3ACJ3ACJ+A/463sc0hN4D8hVpTGRybgPwAAAAAAAOA/WQKb9pMl4D8AAAAAAADgP04CcaHmJOA/kiRJkiRJ4D/3QwJvPyTgP34E9xHcR+A/AkVbDZ4j4D/uaYTlnkbgPzACIzACI+A/AAAAAAAA4D/gKLvfKLvfP3d3d3d3d98/jmVQKky83z8uGYJ1tHnfPzgfg/MxON8/+N5777333j8IrBxaZDvfP7/v+77v+94/0Ofz+Xw+3z8AAAAAAIDfP/AH/AF/wN8/IPiBH/iB3z97a8M0d8HfP4QPPvjgg98/c/TN0TdH3z9FeqBydgvfP5/0SZ/0Sd8/Dw8PDw8P3z/ZLKj2nEzfP/EzSvyMEt8/Rs6w4FLZ3j9f8RVf8RXfP31no76zUd8/lPHbpZ6M3z89QvWZtsbfPwAAAAAAAOA/Dnj84YDH3z8AAAAAAADgP/LX7KhFyN8/AAAAAAAA4D+ZS4QnBcnfPwAAAAAAAOA//iZ/k7/J3z8AAAAAAADgPyB1yh91yt8/cVZ+QpyV3z9hHxf2cWHfP9/yLd/yLd8/PiInCHdj3z9hfleLmzDfPzqkJhhvZd8/mpmZmZmZ3z+P5g82Hs3fP1ikDDzdmt8/sxpFHDpp3z+cj8H5GJzfP2vfsPYNa98/",
-          "dtype": "f8"
-         },
-         "yaxis": "y"
-        },
-        {
-         "customdata": [
-          [
-           "According to github, when was Regression added to "
-          ],
-          [
-           "In the video https://www.youtube.com/watch?v=L1vXC"
-          ],
-          [
-           "In April of 1977, who was the Prime Minister of th"
-          ],
-          [
-           "The attached spreadsheet shows the inventory for a"
-          ],
-          [
-           "The object in the British Museum's collection with"
-          ],
-          [
-           "In terms of geographical distance between capital "
-          ],
-          [
-           "Of the authors (First M. Last) that worked on the "
-          ],
-          [
-           "Which contributor to the version of OpenCV where s"
-          ],
-          [
-           "In July 2, 1959 United States standards for grades"
-          ],
-          [
-           "Assuming scientists in the famous youtube video Th"
-          ],
-          [
-           "How many studio albums were published by Mercedes "
-          ],
-          [
-           "What's the last line of the rhyme under the flavor"
-          ],
-          [
-           "A paper about AI regulation that was originally su"
-          ],
-          [
-           "When you take the average of the standard populati"
-          ],
-          [
-           "Use density measures from the chemistry materials "
-          ],
-          [
-           "How many High Energy Physics - Lattice articles li"
-          ],
-          [
-           "I need to fact-check a citation. This is the citat"
-          ],
-          [
-           "What are the EC numbers of the two most commonly u"
-          ],
-          [
-           "What was the volume in m^3 of the fish bag that wa"
-          ],
-          [
-           "What animals that were mentioned in both Ilias Lag"
-          ],
-          [
-           "What is the minimum number of page links a person "
-          ],
-          [
-           "In the 2018 VSCode blog post on replit.com, what w"
-          ],
-          [
-           "What time was the Tri-Rail train that carried the "
-          ],
-          [
-           "If Eliud Kipchoge could maintain his record-making"
-          ],
-          [
-           "What two-word type of model did Manash Pratim Kash"
-          ],
-          [
-           "An office held a Secret Santa gift exchange where "
-          ],
-          [
-           "In Valentina Re’s contribution to the 2017 book “W"
-          ],
-          [
-           "What is the average number of pre-2020 works on th"
-          ],
-          [
-           "I went to Virtue restaurant & bar in Chicago for m"
-          ],
-          [
-           "¬(A ∧ B) ↔ (¬A ∨ ¬B)\n¬(A ∨ B) ↔ (¬A ∧ ¬B)\n(A → B) "
-          ],
-          [
-           "Compute the check digit the Tropicos ID for the Or"
-          ],
-          [
-           ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti"
-          ],
-          [
-           "The photograph in the Whitney Museum of American A"
-          ],
-          [
-           "In the NCATS PubChem compound database for Food Ad"
-          ],
-          [
-           "In the fictional language of Tizin, basic sentence"
-          ],
-          [
-           "In Unlambda, what exact charcter or text needs to "
-          ],
-          [
-           "What integer-rounded percentage of the total lengt"
-          ],
-          [
-           "Each cell in the attached spreadsheet represents a"
-          ],
-          [
-           "It is 1999. Before you party like it is 1999, plea"
-          ],
-          [
-           "According to Google Finance, when was the first ye"
-          ],
-          [
-           "Review the chess position provided in the image. I"
-          ],
-          [
-           "In Emily Midkiff's June 2014 article in a journal "
-          ],
-          [
-           "The attached file contains a list of vendors in th"
-          ],
-          [
-           "Under DDC 633 on Bielefeld University Library's BA"
-          ],
-          [
-           "Who nominated the only Featured Article on English"
-          ],
-          [
-           "In the year 2022, and before December, what does \""
-          ],
-          [
-           "Given this table defining * on the set S = {a, b, "
-          ],
-          [
-           "The Metropolitan Museum of Art has a portrait in i"
-          ],
-          [
-           "According to Box Office Mojo's 2020 Worldwide Box "
-          ],
-          [
-           "Using the Biopython library in Python, parse the P"
-          ],
-          [
-           "How many pages if the 2023 IPCC report (85 pages v"
-          ],
-          [
-           "In Nature journal's Scientific Reports conference "
-          ],
-          [
-           "On July 15, 2008, Phys.org published an article ab"
-          ],
-          [
-           "My family reunion is this week, and I was assigned"
-          ],
-          [
-           "On a leap day before the year 2008, a joke was rem"
-          ],
-          [
-           "How many edits were made to the Wikipedia page on "
-          ],
-          [
-           "In the endnote found in the second-to-last paragra"
-          ],
-          [
-           "I was trying to remember how well the Cheater Beat"
-          ],
-          [
-           "What writer is quoted by Merriam-Webster for the W"
-          ],
-          [
-           "Using bass clef notes, what is the age of someone "
-          ],
-          [
-           "What percentage of the total penguin population ac"
-          ],
-          [
-           "The Latin root of the Yola word \"gimlie\" shares a "
-          ],
-          [
-           "In Series 9, Episode 11 of Doctor Who, the Doctor "
-          ],
-          [
-           "Find the value of x to the nearest tenth: Lx = (d/"
-          ],
-          [
-           "How many nonindigenous crocodiles were found in Fl"
-          ],
-          [
-           "How many applicants for the job in the PDF are onl"
-          ],
-          [
-           "How many slides in this PowerPoint presentation me"
-          ],
-          [
-           "The following numbers function similarly to ISBN 1"
-          ],
-          [
-           "Here's a fun riddle that I think you'll enjoy.\n\nYo"
-          ],
-          [
-           "How many images are there in the latest 2022 Lego "
-          ],
-          [
-           "In the NIH translation of the original 1913 Michae"
-          ],
-          [
-           "The work referenced in footnote 397 of Federico La"
-          ],
-          [
-           "If there is anything that doesn't make sense in th"
-          ],
-          [
-           "Given $x_0 = -5$ and $f(x) = x^3 + 4x^2 - 3x + 8$,"
-          ],
-          [
-           "You are Van Helsing, a renowned vampire hunter. A "
-          ],
-          [
-           "As of the 2020 census, what was the population dif"
-          ],
-          [
-           "The attached file lists accommodations in the reso"
-          ],
-          [
-           "Who composed the song that was performed by a roos"
-          ],
-          [
-           "What is the volume in milliliters of a system comp"
-          ],
-          [
-           "The attached spreadsheet contains the sales of men"
-          ],
-          [
-           "According to wikipedia, how many Asian countries s"
-          ],
-          [
-           "The attached file shows the locomotives in the col"
-          ],
-          [
-           "Could you help me out with this assignment? Our pr"
-          ],
-          [
-           "What is the surname of the equine veterinarian men"
-          ],
-          [
-           "I was referencing each of the tables in the file f"
-          ],
-          [
-           "I'm making a grocery list for my mom, but she's a "
-          ],
-          [
-           "Who did the actor who played Ray in the Polish-lan"
-          ],
-          [
-           "In the Scikit-Learn July 2017 changelog, what othe"
-          ],
-          [
-           "According to the World Bank, which countries had g"
-          ],
-          [
-           "How many times was a Twitter/X post cited as a ref"
-          ],
-          [
-           "The attached image contains a Python script. Run t"
-          ],
-          [
-           "What is the latest chronological year date written"
-          ],
-          [
-           "What is the last word before the second chorus of "
-          ],
-          [
-           "On ScienceDirect, what is the difference to 3 deci"
-          ],
-          [
-           "You are given this Excel file as a map. You start "
-          ],
-          [
-           "What is the final numeric output from the attached"
-          ],
-          [
-           "This spreadsheet contains a list of clients for a "
-          ],
-          [
-           "On the BBC Earth YouTube video of the Top 5 Sillie"
-          ],
-          [
-           "I have the Standard plan in the image below, and I"
-          ],
-          [
-           "On the DeepFruits fruit detection graph on Connect"
-          ],
-          [
-           "How many more blocks (also denoted as layers) in B"
-          ],
-          [
-           "The attached PDF lists accommodations in the resor"
-          ],
-          [
-           "Hi, I'm making a pie but I could use some help wit"
-          ],
-          [
-           "The book with the doi 10.1353/book.24372 concerns "
-          ],
-          [
-           "The longest-lived vertebrate is named after an isl"
-          ],
-          [
-           "This is a secret message my friend gave me. It say"
-          ],
-          [
-           "The attached file shows a list of books in the col"
-          ],
-          [
-           "The year is 2022. I am at the National Air and Spa"
-          ],
-          [
-           "It's May 2023, and I'm about to drive across the U"
-          ],
-          [
-           "Which of the fruits shown in the 2008 painting \"Em"
-          ],
-          [
-           "As of August 2023, who is the only winner of the U"
-          ],
-          [
-           "Examine the video at https://www.youtube.com/watch"
-          ],
-          [
-           "According to the USGS, in what year was the Americ"
-          ],
-          [
-           "The cover of the August 2021 issue of Vogue shows "
-          ],
-          [
-           "The YouTube channel Game Grumps began a Let’s Play"
-          ],
-          [
-           "All of the individuals who formally held the posit"
-          ],
-          [
-           "What was the complete title of the book in which t"
-          ],
-          [
-           "What is the area of the green polygon in the attac"
-          ],
-          [
-           "During the first week of August 2015, one of the N"
-          ],
-          [
-           "Pull out the sentence in the following 5x7 block o"
-          ],
-          [
-           "How many at bats did the Yankee with the most walk"
-          ],
-          [
-           "According to Girls Who Code, how long did it take "
-          ],
-          [
-           "Of the cities within the United States where U.S. "
-          ],
-          [
-           "In Audre Lorde’s poem “Father Son and Holy Ghost”,"
-          ],
-          [
-           "Look at the attached image. The quiz is scored as "
-          ],
-          [
-           "What is the absolute difference in tens of thousan"
-          ],
-          [
-           "According to Openreview.net, at the NeurIPS 2022 C"
-          ],
-          [
-           "When was a picture of St. Thomas Aquinas first add"
-          ],
-          [
-           "Where were the Vietnamese specimens described by K"
-          ],
-          [
-           "On June 6, 2023, an article by Carolyn Collins Pet"
-          ],
-          [
-           "What was the actual enrollment count of the clinic"
-          ],
-          [
-           "As a comma separated list with no whitespace, usin"
-          ],
-          [
-           "What country had the least number of athletes at t"
-          ],
-          [
-           "I'd like to learn more about some popular reality "
-          ],
-          [
-           "Hi, I was out sick from my classes on Friday, so I"
-          ],
-          [
-           "The attached spreadsheet contains a list of books "
-          ],
-          [
-           "I read a paper about multiwavelength observations "
-          ],
-          [
-           "Take the gender split from the 2011 Bulgarian cens"
-          ],
-          [
-           "Consider the following symbols: 𒐜  𒐐𒐚\n\nThis is a n"
-          ],
-          [
-           "On Cornell Law School website's legal information "
-          ],
-          [
-           "A 5-man group made up of one tank, one healer, and"
-          ],
-          [
-           "Eva Draconis has a personal website which can be a"
-          ],
-          [
-           "A standard Rubik’s cube has been broken into cubes"
-          ],
-          [
-           "In the YouTube 360 VR video from March 2018 narrat"
-          ],
-          [
-           "The attached spreadsheet lists the locomotives own"
-          ],
-          [
-           "As of May 2023, how many stops are between South S"
-          ],
-          [
-           "If this whole pint is made up of ice cream, how ma"
-          ],
-          [
-           "At the two-minute mark in the YouTube video upload"
-          ],
-          [
-           "The attached file lists the locomotives owned by a"
-          ],
-          [
-           "What is the first name of the only Malko Competiti"
-          ],
-          [
-           "In the 2015 Metropolitan Museum of Art exhibition "
-          ],
-          [
-           "The brand that makes these harnesses the dogs are "
-          ],
-          [
-           "You are a telecommunications engineer who wants to"
-          ],
-          [
-           "I thought we could try a fun word puzzle together "
-          ],
-          [
-           "Who are the pitchers with the number before and af"
-          ],
-          [
-           "The attached Excel file contains the sales of menu"
-          ],
-          [
-           "I'm curious about how much information is availabl"
-          ],
-          [
-           "Which of the text elements under CATEGORIES in the"
-          ],
-          [
-           "Bob was invited to participate in a game show, and"
-          ],
-          [
-           "I’m researching species that became invasive after"
-          ],
-          [
-           "If we assume all articles published by Nature in 2"
-          ],
-          [
-           "I’m thinking about selling my home, so I want to l"
-          ],
-          [
-           "What is the maximum length in meters of #9 in the "
-          ],
-          [
-           "In NASA's Astronomy Picture of the Day on 2006 Jan"
-          ],
-          [
-           "In the film Goldfinger, what color was the object "
-          ]
-         ],
-         "hovertemplate": "agent_name=code_o1_03_february_ablation-toolcalling-manager<br>index=%{x}<br>is_correct=%{y}<br>question=%{customdata[0]}<extra></extra>",
-         "legendgroup": "code_o1_03_february_ablation-toolcalling-manager",
-         "line": {
-          "color": "#FF6692",
-          "dash": "solid"
-         },
-         "marker": {
-          "symbol": "circle"
-         },
-         "mode": "lines",
-         "name": "code_o1_03_february_ablation-toolcalling-manager",
-         "showlegend": true,
-         "type": "scattergl",
-         "x": {
-          "bdata": "AAABAAIAAwAEAAUABgAHAAgACQAKAAsADAANAA4ADwAQABEAEgATABQAFQAWABcAGAAZABoAGwAcAB0AHgAfACAAIQAiACMAJAAlACYAJwAoACkAKgArACwALQAuAC8AMAAxADIAMwA0ADUANgA3ADgAOQA6ADsAPAA9AD4APwBAAEEAQgBDAEQARQBGAEcASABJAEoASwBMAE0ATgBPAFAAUQBSAFMAVABVAFYAVwBYAFkAWgBbAFwAXQBeAF8AYABhAGIAYwBkAGUAZgBnAGgAaQBqAGsAbABtAG4AbwBwAHEAcgBzAHQAdQB2AHcAeAB5AHoAewB8AH0AfgB/AIAAgQCCAIMAhACFAIYAhwCIAIkAigCLAIwAjQCOAI8AkACRAJIAkwCUAJUAlgCXAJgAmQCaAJsAnACdAJ4AnwCgAKEAogCjAKQA",
-          "dtype": "i2"
-         },
-         "xaxis": "x",
-         "y": {
-          "bdata": "AAAAAAAAAAAAAAAAAAAAAFVVVVVVVdU/AAAAAAAA4D8zMzMzMzPjP1VVVVVVVeU/kiRJkiRJ4j8AAAAAAADkP3Icx3Ecx+E/AAAAAAAA4D900UUXXXThPwAAAAAAAOA/ntiJndiJ3T/btm3btm3bP97d3d3d3d0/AAAAAAAA3D9aWlpaWlraPxzHcRzHcdw/KK+hvIby2j/NzMzMzMzcP57neZ7ned4/F1100UUX3T+96U1vetPbP6uqqqqqqto/mpmZmZmZ2T87sRM7sRPbPxzHcRzHcdw/27Zt27Zt2z9huacRlnvaP7y7u7u7u9s/11prrbXW2j8AAAAAAADcPyebbLLJJts/WlpaWlpa2j/btm3btm3bP6uqqqqqqto/I591gyny2T8or6G8hvLaP1y+5Vu+5ds/MzMzMzMz2z+J2pWoXYnaP3qe53me59k/s6asKWvK2j8vuuiiiy7aP1uwBVuwBds/velNb3rT2z9t1Hc26jvbP6uqqqqqqto/iMb60Fgf2j+amZmZmZnZPxkZGRkZGdk/2Ymd2Imd2D9+DqkJxlvZPy+hvYT2Eto/mpmZmZmZ2T9JkiRJkiTZPzqL6Syms9g/7mmE5Z5G2D+yFUHDSd3XP3d3d3d3d9c/EayjzfsU1z+21lprrbXWP5ZlWZZlWdY/AAAAAAAA1z9XaqVWaqXWP0422WSTTdY/jYn0QOXs1j+Ih4eHh4fXP3PtwFw7MNc/t23btm3b1j8g0QqbA4nWP47jOI7jONY/r169evXq1T/yWTeYIp/VPzCW/GLJL9Y/UV5DeQ3l1T8KcVZ+QpzVP3ZiJ3ZiJ9Y/v6vFTZjf1T9mZmZmZmbWP/PDImXg6dY/onYlalei1j/S1Pm1h1zWP4ZhGIZhGNY/1tXV1dXV1T9lTVlT1pTVP1VVVVVVVdU/F1100UUX1T9ObWpTm9rUP/VJn/RJn9Q/lVEZlVEZ1T9Ob3rTm97UP1VVVVVVVdU/1Hc26jsb1T8mTv2eW+LUP1VVVVVVVdU/Ffji6gcd1T85BS+n4OXUP1VVVVVVVdU/H4XrUbge1T+bB7nrZ4vVP/b19fX19dU/Iz6BVHJe1j92Yid2YifWP9dojdZojdY/n0NqgvFW1j9dy8HNfiHWP0xoL6G9hNY/Y251Rirm1j+x9g1r37DWPwJxoeYkENc/t23btm3b1j9WemphpafWP0xnMZ3FdNY/ZCELWchC1j9Y7mmE5Z7WP9ZmbdZmbdY/CRpO6r481j9W6AxW6AzWP2ZmZmZmZtY/fa2eHQI31j9t3qe4ZAjWP2DW+2W9X9Y/MsYYY4wx1j+6SQwCK4fWP7dt27Zt29Y/q9VqtVqt1j8AAAAAAIDWP7UlbUlb0tY/N3IjN3Ij1z/LiD6gOvbWP8omm2yyydY/3Wl1p9Wd1j+NifRA5ezWP61z5QHJOtc/iIeHh4eH1z8cKRrij1vXP3PtwFw7MNc/iOIvcoYF1z+3bdu2bdvWP1uGDtjtsdY/INEKmwOJ1j/Am0eoPtPWP6uqqqqqqtY/QzpvMaTz1j+2bNmyZcvWP6lFyF+zo9Y/yWfdYIp81j+vsjij3cPWP5020GkDndY/tNpZ7ax21j8N5TWU11DWP9aAK9aAK9Y/mRrYO6YG1j/iVSReReLVP3ZiJ3ZiJ9Y/SS9/2kID1j+/q8VNmN/VP9nnkJpgvNU/mpmZmZmZ1T+hu0oxQXfVP1VVVVVVVdU/0j5IBtQz1T8TtStRuxLVP/KUIE8J8tQ/",
-          "dtype": "f8"
-         },
-         "yaxis": "y"
-        },
-        {
-         "customdata": [
-          [
-           "Use density measures from the chemistry materials "
-          ],
-          [
-           "In April of 1977, who was the Prime Minister of th"
-          ],
-          [
-           "In Unlambda, what exact charcter or text needs to "
-          ],
-          [
-           "In terms of geographical distance between capital "
-          ],
-          [
-           "When you take the average of the standard populati"
-          ],
-          [
-           "Using the Biopython library in Python, parse the P"
-          ],
-          [
-           "The attached spreadsheet shows the inventory for a"
-          ],
-          [
-           "What was the volume in m^3 of the fish bag that wa"
-          ],
-          [
-           ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti"
-          ],
-          [
-           "In the video https://www.youtube.com/watch?v=L1vXC"
-          ],
-          [
-           "I need to fact-check a citation. This is the citat"
-          ],
-          [
-           "What is the maximum length in meters of #9 in the "
-          ],
-          [
-           "What are the EC numbers of the two most commonly u"
-          ],
-          [
-           "How many studio albums were published by Mercedes "
-          ],
-          [
-           "Each cell in the attached spreadsheet represents a"
-          ],
-          [
-           "An office held a Secret Santa gift exchange where "
-          ],
-          [
-           "Of the authors (First M. Last) that worked on the "
-          ],
-          [
-           "What two-word type of model did Manash Pratim Kash"
-          ],
-          [
-           "¬(A ∧ B) ↔ (¬A ∨ ¬B)\n¬(A ∨ B) ↔ (¬A ∧ ¬B)\n(A → B) "
-          ],
-          [
-           "The object in the British Museum's collection with"
-          ],
-          [
-           "If we assume all articles published by Nature in 2"
-          ],
-          [
-           "In Series 9, Episode 11 of Doctor Who, the Doctor "
-          ],
-          [
-           "My family reunion is this week, and I was assigned"
-          ],
-          [
-           "Assuming scientists in the famous youtube video Th"
-          ],
-          [
-           "What's the last line of the rhyme under the flavor"
-          ],
-          [
-           "The photograph in the Whitney Museum of American A"
-          ],
-          [
-           "Which of the text elements under CATEGORIES in the"
-          ],
-          [
-           "If Eliud Kipchoge could maintain his record-making"
-          ],
-          [
-           "In the fictional language of Tizin, basic sentence"
-          ],
-          [
-           "Compute the check digit the Tropicos ID for the Or"
-          ],
-          [
-           "Review the chess position provided in the image. I"
-          ],
-          [
-           "Which contributor to the version of OpenCV where s"
-          ],
-          [
-           "How many applicants for the job in the PDF are onl"
-          ],
-          [
-           "It is 1999. Before you party like it is 1999, plea"
-          ],
-          [
-           "I went to Virtue restaurant & bar in Chicago for m"
-          ],
-          [
-           "In the NCATS PubChem compound database for Food Ad"
-          ],
-          [
-           "The attached file contains a list of vendors in th"
-          ],
-          [
-           "In July 2, 1959 United States standards for grades"
-          ],
-          [
-           "Could you help me out with this assignment? Our pr"
-          ],
-          [
-           "In the 2018 VSCode blog post on replit.com, what w"
-          ],
-          [
-           "Given this table defining * on the set S = {a, b, "
-          ],
-          [
-           "Who nominated the only Featured Article on English"
-          ],
-          [
-           "A paper about AI regulation that was originally su"
-          ],
-          [
-           "What writer is quoted by Merriam-Webster for the W"
-          ],
-          [
-           "In Emily Midkiff's June 2014 article in a journal "
-          ],
-          [
-           "According to github, when was Regression added to "
-          ],
-          [
-           "According to Google Finance, when was the first ye"
-          ],
-          [
-           "The following numbers function similarly to ISBN 1"
-          ],
-          [
-           "Under DDC 633 on Bielefeld University Library's BA"
-          ],
-          [
-           "What time was the Tri-Rail train that carried the "
-          ],
-          [
-           "In the year 2022, and before December, what does \""
-          ],
-          [
-           "What integer-rounded percentage of the total lengt"
-          ],
-          [
-           "Here's a fun riddle that I think you'll enjoy.\n\nYo"
-          ],
-          [
-           "I’m researching species that became invasive after"
-          ],
-          [
-           "How many High Energy Physics - Lattice articles li"
-          ],
-          [
-           "The Metropolitan Museum of Art has a portrait in i"
-          ],
-          [
-           "How many slides in this PowerPoint presentation me"
-          ],
-          [
-           "As a comma separated list with no whitespace, usin"
-          ],
-          [
-           "I was trying to remember how well the Cheater Beat"
-          ],
-          [
-           "How many pages if the 2023 IPCC report (85 pages v"
-          ],
-          [
-           "Find the value of x to the nearest tenth: Lx = (d/"
-          ],
-          [
-           "The attached file lists accommodations in the reso"
-          ],
-          [
-           "The attached file shows a list of books in the col"
-          ],
-          [
-           "You are a telecommunications engineer who wants to"
-          ],
-          [
-           "According to Box Office Mojo's 2020 Worldwide Box "
-          ],
-          [
-           "In the NIH translation of the original 1913 Michae"
-          ],
-          [
-           "Given $x_0 = -5$ and $f(x) = x^3 + 4x^2 - 3x + 8$,"
-          ],
-          [
-           "This is a secret message my friend gave me. It say"
-          ],
-          [
-           "You are Van Helsing, a renowned vampire hunter. A "
-          ],
-          [
-           "If there is anything that doesn't make sense in th"
-          ],
-          [
-           "According to wikipedia, how many Asian countries s"
-          ],
-          [
-           "The attached file shows the locomotives in the col"
-          ],
-          [
-           "Who composed the song that was performed by a roos"
-          ],
-          [
-           "On a leap day before the year 2008, a joke was rem"
-          ],
-          [
-           "The attached spreadsheet contains the sales of men"
-          ],
-          [
-           "In the endnote found in the second-to-last paragra"
-          ],
-          [
-           "On July 15, 2008, Phys.org published an article ab"
-          ],
-          [
-           "In Valentina Re’s contribution to the 2017 book “W"
-          ],
-          [
-           "What is the area of the green polygon in the attac"
-          ],
-          [
-           "I'm making a grocery list for my mom, but she's a "
-          ],
-          [
-           "Using bass clef notes, what is the age of someone "
-          ],
-          [
-           "The Latin root of the Yola word \"gimlie\" shares a "
-          ],
-          [
-           "Examine the video at https://www.youtube.com/watch"
-          ],
-          [
-           "Look at the attached image. The quiz is scored as "
-          ],
-          [
-           "I have the Standard plan in the image below, and I"
-          ],
-          [
-           "Hi, I'm making a pie but I could use some help wit"
-          ],
-          [
-           "The attached PDF lists accommodations in the resor"
-          ],
-          [
-           "What is the volume in milliliters of a system comp"
-          ],
-          [
-           "I’m thinking about selling my home, so I want to l"
-          ],
-          [
-           "In Nature journal's Scientific Reports conference "
-          ],
-          [
-           "The attached image contains a Python script. Run t"
-          ],
-          [
-           "You are given this Excel file as a map. You start "
-          ],
-          [
-           "This spreadsheet contains a list of clients for a "
-          ],
-          [
-           "Who did the actor who played Ray in the Polish-lan"
-          ],
-          [
-           "What is the final numeric output from the attached"
-          ],
-          [
-           "How many more blocks (also denoted as layers) in B"
-          ],
-          [
-           "The year is 2022. I am at the National Air and Spa"
-          ],
-          [
-           "On the BBC Earth YouTube video of the Top 5 Sillie"
-          ],
-          [
-           "As of the 2020 census, what was the population dif"
-          ],
-          [
-           "All of the individuals who formally held the posit"
-          ],
-          [
-           "It's May 2023, and I'm about to drive across the U"
-          ],
-          [
-           "On ScienceDirect, what is the difference to 3 deci"
-          ],
-          [
-           "What is the last word before the second chorus of "
-          ],
-          [
-           "Pull out the sentence in the following 5x7 block o"
-          ],
-          [
-           "On Cornell Law School website's legal information "
-          ],
-          [
-           "Consider the following symbols: 𒐜  𒐐𒐚\n\nThis is a n"
-          ],
-          [
-           "Of the cities within the United States where U.S. "
-          ],
-          [
-           "What percentage of the total penguin population ac"
-          ],
-          [
-           "How many edits were made to the Wikipedia page on "
-          ],
-          [
-           "The book with the doi 10.1353/book.24372 concerns "
-          ],
-          [
-           "The YouTube channel Game Grumps began a Let’s Play"
-          ],
-          [
-           "As of August 2023, who is the only winner of the U"
-          ],
-          [
-           "On the DeepFruits fruit detection graph on Connect"
-          ],
-          [
-           "The longest-lived vertebrate is named after an isl"
-          ],
-          [
-           "The cover of the August 2021 issue of Vogue shows "
-          ],
-          [
-           "The attached file lists the locomotives owned by a"
-          ],
-          [
-           "What is the minimum number of page links a person "
-          ],
-          [
-           "How many nonindigenous crocodiles were found in Fl"
-          ],
-          [
-           "In Audre Lorde’s poem “Father Son and Holy Ghost”,"
-          ],
-          [
-           "During the first week of August 2015, one of the N"
-          ],
-          [
-           "The attached spreadsheet lists the locomotives own"
-          ],
-          [
-           "According to Girls Who Code, how long did it take "
-          ],
-          [
-           "How many at bats did the Yankee with the most walk"
-          ],
-          [
-           "How many times was a Twitter/X post cited as a ref"
-          ],
-          [
-           "What was the complete title of the book in which t"
-          ],
-          [
-           "In the Scikit-Learn July 2017 changelog, what othe"
-          ],
-          [
-           "According to the USGS, in what year was the Americ"
-          ],
-          [
-           "Hi, I was out sick from my classes on Friday, so I"
-          ],
-          [
-           "What country had the least number of athletes at t"
-          ],
-          [
-           "The work referenced in footnote 397 of Federico La"
-          ],
-          [
-           "Eva Draconis has a personal website which can be a"
-          ],
-          [
-           "A 5-man group made up of one tank, one healer, and"
-          ],
-          [
-           "Which of the fruits shown in the 2008 painting \"Em"
-          ],
-          [
-           "I was referencing each of the tables in the file f"
-          ],
-          [
-           "Take the gender split from the 2011 Bulgarian cens"
-          ],
-          [
-           "What is the absolute difference in tens of thousan"
-          ],
-          [
-           "If this whole pint is made up of ice cream, how ma"
-          ],
-          [
-           "The brand that makes these harnesses the dogs are "
-          ],
-          [
-           "Where were the Vietnamese specimens described by K"
-          ],
-          [
-           "A standard Rubik’s cube has been broken into cubes"
-          ],
-          [
-           "What was the actual enrollment count of the clinic"
-          ],
-          [
-           "What animals that were mentioned in both Ilias Lag"
-          ],
-          [
-           "The attached Excel file contains the sales of menu"
-          ],
-          [
-           "Who are the pitchers with the number before and af"
-          ],
-          [
-           "In the film Goldfinger, what color was the object "
-          ],
-          [
-           "Bob was invited to participate in a game show, and"
-          ],
-          [
-           "When was a picture of St. Thomas Aquinas first add"
-          ],
-          [
-           "What is the first name of the only Malko Competiti"
-          ],
-          [
-           "I thought we could try a fun word puzzle together "
-          ],
-          [
-           "In NASA's Astronomy Picture of the Day on 2006 Jan"
-          ],
-          [
-           "What is the average number of pre-2020 works on th"
-          ],
-          [
-           "As of May 2023, how many stops are between South S"
-          ],
-          [
-           "In the YouTube 360 VR video from March 2018 narrat"
-          ],
-          [
-           "What is the latest chronological year date written"
-          ],
-          [
-           "In the 2015 Metropolitan Museum of Art exhibition "
-          ],
-          [
-           "What is the surname of the equine veterinarian men"
-          ],
-          [
-           "I'd like to learn more about some popular reality "
-          ],
-          [
-           "On June 6, 2023, an article by Carolyn Collins Pet"
-          ],
-          [
-           "I read a paper about multiwavelength observations "
-          ],
-          [
-           "How many images are there in the latest 2022 Lego "
-          ],
-          [
-           "According to Openreview.net, at the NeurIPS 2022 C"
-          ],
-          [
-           "At the two-minute mark in the YouTube video upload"
-          ],
-          [
-           "I'm curious about how much information is availabl"
-          ],
-          [
-           "The attached spreadsheet contains a list of books "
-          ]
-         ],
-         "hovertemplate": "agent_name=code_o1_03_february_fix-print-outputs<br>index=%{x}<br>is_correct=%{y}<br>question=%{customdata[0]}<extra></extra>",
-         "legendgroup": "code_o1_03_february_fix-print-outputs",
-         "line": {
-          "color": "#B6E880",
-          "dash": "solid"
-         },
-         "marker": {
-          "symbol": "circle"
-         },
-         "mode": "lines",
-         "name": "code_o1_03_february_fix-print-outputs",
-         "showlegend": true,
-         "type": "scattergl",
-         "x": {
-          "bdata": "AAABAAIAAwAEAAUABgAHAAgACQAKAAsADAANAA4ADwAQABEAEgATABQAFQAWABcAGAAZABoAGwAcAB0AHgAfACAAIQAiACMAJAAlACYAJwAoACkAKgArACwALQAuAC8AMAAxADIAMwA0ADUANgA3ADgAOQA6ADsAPAA9AD4APwBAAEEAQgBDAEQARQBGAEcASABJAEoASwBMAE0ATgBPAFAAUQBSAFMAVABVAFYAVwBYAFkAWgBbAFwAXQBeAF8AYABhAGIAYwBkAGUAZgBnAGgAaQBqAGsAbABtAG4AbwBwAHEAcgBzAHQAdQB2AHcAeAB5AHoAewB8AH0AfgB/AIAAgQCCAIMAhACFAIYAhwCIAIkAigCLAIwAjQCOAI8AkACRAJIAkwCUAJUAlgCXAJgAmQCaAJsAnACdAJ4AnwCgAKEAogCjAA==",
-          "dtype": "i2"
-         },
-         "xaxis": "x",
-         "y": {
-          "bdata": "AAAAAAAA8D8AAAAAAADwP1VVVVVVVeU/AAAAAAAA6D8zMzMzMzPjPwAAAAAAAOA/kiRJkiRJ4j8AAAAAAADkP1VVVVVVVeU/ZmZmZmZm5j9GF1100UXnP1VVVVVVVeU/dmIndmIn5j+3bdu2bdvmP1VVVVVVVeU/AAAAAAAA5j+1tLS0tLTkP1VVVVVVVeU/UV5DeQ3l5T9mZmZmZmbmP1VVVVVVVeU/XXTRRRdd5D9Ob3rTm97kPwAAAAAAAOQ/MzMzMzMz4z9iJ3ZiJ3biP3Icx3Ecx+E/SZIkSZIk4T+WexphuafhPyIiIiIiIuI/jDHGGGOM4T8AAAAAAADhP3TRRRdddOE/4uHh4eHh4T+SJEmSJEniP3Icx3Ecx+E/mCKfdYMp4j/zGsprKK/hP7ETO7ETO+E/zczMzMzM4D8sUbsStSvhP2IYhmEYhuE/d8QdcUfc4T8vuuiiiy7iP9InfdInfeI/IQtZyEIW4j9HfWejvrPhPwAAAAAAAOI/aKwPjfWh4T9I4XoUrkfhP5KRkZGRkeE/sRM7sRM74T9vZZ9DaoLhP3Icx3Ecx+E/dNFFF1104T9JkiRJkiThP3UW01lMZ+E/uacRlnsa4T/VfXlsRdDgPxEREREREeE/DcE62rxP4T8IIYQQQgjhPzEMwzAMw+A/AAAAAAAA4T/RC73QC73gP3zwwQcffOA/TKQHKme34D94eHh4eHjgPwtZyEIWsuA/oQ7qoA7q4D8OJFphcyDhP1VVVVVVVeE/jBgxYsSI4T/CFPmsG0zhP36x5BdLfuE/8xrKayiv4T8De8fUwN7hP9IgDdIgDeI/pSN7BqLS4T+amZmZmZnhP8rA0635YeE/LFG7ErUr4T9T59ceclnhP0mSJEmSJOE/UVFRUVFR4T9f0Bf0BX3hP5Z7GmG5p+E/dNFFF1104T8UoQhFKELhPxEREREREeE/sRM7sRM74T8WspCFLGThPzTRRBNNNOE/BTG5gphc4T9BGGnHCoThP6uqqqqqquE/UoEvrn7Q4T99aKwPjfXhP29nSMzbGeI/PQrXo3A94j+LleEbUWDiPzIyMjIyMuI/Kjkvi/gE4j92Yid2YifiP7If+7Ef++E/UhOMt7LP4T+zX4gVpfHhP3Icx3Ecx+E/1hmpmFud4T+/Ye0b1r7hP18Z2+/oleE/btu2bdu24T+c6xjFuY7hP/Maymsor+E/HYGirQbP4T+E5Z5GWO7hP9IgDdIgDeI/RdBwUvfl4T9Sdr9Rdr/hP97d3d3d3eE/WQalwsT74T/ep7hkCNbhP/QxOB+D8+E/zjnnnHPO4T9Ei2zn+6nhP2IYhmEYhuE/aTQajUaj4T8AAAAAAMDhP2fMGXPGnOE/oRd6oRd64T9gxQkpeZbhP3TRRRdddOE/LBWxVMRS4T8qZ7fwqzHhP9wUo4a/TeE/aWlpaWlp4T/Ircs74EjhPxaykIUsZOE/P1pNQhR/4T+amZmZmZnhP8afSDileeE/RStsDiRa4T+xEzuxEzvhP8dxHMdxHOE/smsTJbs24T+JESNGjBjhPz801ofG+uA/TJHPusEU4T83YKimYy7hP0jhehSuR+E/ianEVGIq4T/YUF5DeQ3hP9F7JtF7JuE/5SfEWfkJ4T/uUN0O1e3gPyEN0iAN0uA/DVjSy5+24D+fgah0ZM/gP2dAKLlTtOA/mpmZmZmZ4D+aP9h4NH/gP6hb88MiZeA/axRx6KR94D+WqF2J2pXgPw==",
-          "dtype": "f8"
-         },
-         "yaxis": "y"
-        },
-        {
-         "customdata": [
-          [
-           "In April of 1977, who was the Prime Minister of th"
-          ],
-          [
-           "Use density measures from the chemistry materials "
-          ],
-          [
-           "The attached spreadsheet shows the inventory for a"
-          ],
-          [
-           "In Unlambda, what exact charcter or text needs to "
-          ],
-          [
-           "In the video https://www.youtube.com/watch?v=L1vXC"
-          ],
-          [
-           "The object in the British Museum's collection with"
-          ],
-          [
-           "Of the authors (First M. Last) that worked on the "
-          ],
-          [
-           ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti"
-          ],
-          [
-           "If we assume all articles published by Nature in 2"
-          ],
-          [
-           "An office held a Secret Santa gift exchange where "
-          ],
-          [
-           "Here's a fun riddle that I think you'll enjoy.\n\nYo"
-          ],
-          [
-           "In Series 9, Episode 11 of Doctor Who, the Doctor "
-          ],
-          [
-           "What two-word type of model did Manash Pratim Kash"
-          ],
-          [
-           "What is the minimum number of page links a person "
-          ],
-          [
-           "If Eliud Kipchoge could maintain his record-making"
-          ],
-          [
-           "What was the volume in m^3 of the fish bag that wa"
-          ],
-          [
-           "When you take the average of the standard populati"
-          ],
-          [
-           "What integer-rounded percentage of the total lengt"
-          ],
-          [
-           "I need to fact-check a citation. This is the citat"
-          ],
-          [
-           "¬(A ∧ B) ↔ (¬A ∨ ¬B)\n¬(A ∨ B) ↔ (¬A ∧ ¬B)\n(A → B) "
-          ],
-          [
-           "Each cell in the attached spreadsheet represents a"
-          ],
-          [
-           "What are the EC numbers of the two most commonly u"
-          ],
-          [
-           "In the fictional language of Tizin, basic sentence"
-          ],
-          [
-           "In July 2, 1959 United States standards for grades"
-          ],
-          [
-           "In terms of geographical distance between capital "
-          ],
-          [
-           "My family reunion is this week, and I was assigned"
-          ],
-          [
-           "According to github, when was Regression added to "
-          ],
-          [
-           "A paper about AI regulation that was originally su"
-          ],
-          [
-           "I’m researching species that became invasive after"
-          ],
-          [
-           "In Emily Midkiff's June 2014 article in a journal "
-          ],
-          [
-           "The photograph in the Whitney Museum of American A"
-          ],
-          [
-           "Under DDC 633 on Bielefeld University Library's BA"
-          ],
-          [
-           "In Valentina Re’s contribution to the 2017 book “W"
-          ],
-          [
-           "Review the chess position provided in the image. I"
-          ],
-          [
-           "The attached file contains a list of vendors in th"
-          ],
-          [
-           "The Metropolitan Museum of Art has a portrait in i"
-          ],
-          [
-           "Could you help me out with this assignment? Our pr"
-          ],
-          [
-           "In the year 2022, and before December, what does \""
-          ],
-          [
-           "Compute the check digit the Tropicos ID for the Or"
-          ],
-          [
-           "I went to Virtue restaurant & bar in Chicago for m"
-          ],
-          [
-           "Given this table defining * on the set S = {a, b, "
-          ],
-          [
-           "How many studio albums were published by Mercedes "
-          ],
-          [
-           "What writer is quoted by Merriam-Webster for the W"
-          ],
-          [
-           "Assuming scientists in the famous youtube video Th"
-          ],
-          [
-           "On July 15, 2008, Phys.org published an article ab"
-          ],
-          [
-           "In the 2018 VSCode blog post on replit.com, what w"
-          ],
-          [
-           "In the NCATS PubChem compound database for Food Ad"
-          ],
-          [
-           "The following numbers function similarly to ISBN 1"
-          ],
-          [
-           "According to Google Finance, when was the first ye"
-          ],
-          [
-           "How many High Energy Physics - Lattice articles li"
-          ],
-          [
-           "Who nominated the only Featured Article on English"
-          ],
-          [
-           "As a comma separated list with no whitespace, usin"
-          ],
-          [
-           "Which of the text elements under CATEGORIES in the"
-          ],
-          [
-           "According to Box Office Mojo's 2020 Worldwide Box "
-          ],
-          [
-           "If there is anything that doesn't make sense in th"
-          ],
-          [
-           "What is the maximum length in meters of #9 in the "
-          ],
-          [
-           "Using bass clef notes, what is the age of someone "
-          ],
-          [
-           "What time was the Tri-Rail train that carried the "
-          ],
-          [
-           "The attached file shows a list of books in the col"
-          ],
-          [
-           "Find the value of x to the nearest tenth: Lx = (d/"
-          ],
-          [
-           "Given $x_0 = -5$ and $f(x) = x^3 + 4x^2 - 3x + 8$,"
-          ],
-          [
-           "How many slides in this PowerPoint presentation me"
-          ],
-          [
-           "You are a telecommunications engineer who wants to"
-          ],
-          [
-           "It is 1999. Before you party like it is 1999, plea"
-          ],
-          [
-           "How many pages if the 2023 IPCC report (85 pages v"
-          ],
-          [
-           "In the NIH translation of the original 1913 Michae"
-          ],
-          [
-           "This is a secret message my friend gave me. It say"
-          ],
-          [
-           "According to wikipedia, how many Asian countries s"
-          ],
-          [
-           "The attached spreadsheet contains the sales of men"
-          ],
-          [
-           "The attached file shows the locomotives in the col"
-          ],
-          [
-           "The attached file lists accommodations in the reso"
-          ],
-          [
-           "I was trying to remember how well the Cheater Beat"
-          ],
-          [
-           "In Nature journal's Scientific Reports conference "
-          ],
-          [
-           "You are Van Helsing, a renowned vampire hunter. A "
-          ],
-          [
-           "Examine the video at https://www.youtube.com/watch"
-          ],
-          [
-           "What is the area of the green polygon in the attac"
-          ],
-          [
-           "What animals that were mentioned in both Ilias Lag"
-          ],
-          [
-           "On a leap day before the year 2008, a joke was rem"
-          ],
-          [
-           "What is the last word before the second chorus of "
-          ],
-          [
-           "Who composed the song that was performed by a roos"
-          ],
-          [
-           "The Latin root of the Yola word \"gimlie\" shares a "
-          ],
-          [
-           "I'm making a grocery list for my mom, but she's a "
-          ],
-          [
-           "Hi, I'm making a pie but I could use some help wit"
-          ],
-          [
-           "I have the Standard plan in the image below, and I"
-          ],
-          [
-           "I was referencing each of the tables in the file f"
-          ],
-          [
-           "The year is 2022. I am at the National Air and Spa"
-          ],
-          [
-           "How many applicants for the job in the PDF are onl"
-          ],
-          [
-           "I’m thinking about selling my home, so I want to l"
-          ],
-          [
-           "The attached image contains a Python script. Run t"
-          ],
-          [
-           "The attached PDF lists accommodations in the resor"
-          ],
-          [
-           "Look at the attached image. The quiz is scored as "
-          ],
-          [
-           "What is the final numeric output from the attached"
-          ],
-          [
-           "This spreadsheet contains a list of clients for a "
-          ],
-          [
-           "How many more blocks (also denoted as layers) in B"
-          ],
-          [
-           "Which contributor to the version of OpenCV where s"
-          ],
-          [
-           "How many times was a Twitter/X post cited as a ref"
-          ],
-          [
-           "It's May 2023, and I'm about to drive across the U"
-          ],
-          [
-           "The longest-lived vertebrate is named after an isl"
-          ],
-          [
-           "Pull out the sentence in the following 5x7 block o"
-          ],
-          [
-           "All of the individuals who formally held the posit"
-          ],
-          [
-           "On the DeepFruits fruit detection graph on Connect"
-          ],
-          [
-           "On ScienceDirect, what is the difference to 3 deci"
-          ],
-          [
-           "The book with the doi 10.1353/book.24372 concerns "
-          ],
-          [
-           "What is the volume in milliliters of a system comp"
-          ],
-          [
-           "On the BBC Earth YouTube video of the Top 5 Sillie"
-          ],
-          [
-           "On Cornell Law School website's legal information "
-          ],
-          [
-           "Who did the actor who played Ray in the Polish-lan"
-          ],
-          [
-           "During the first week of August 2015, one of the N"
-          ],
-          [
-           "How many nonindigenous crocodiles were found in Fl"
-          ],
-          [
-           "The cover of the August 2021 issue of Vogue shows "
-          ],
-          [
-           "The attached spreadsheet lists the locomotives own"
-          ],
-          [
-           "Bob was invited to participate in a game show, and"
-          ],
-          [
-           "In the Scikit-Learn July 2017 changelog, what othe"
-          ],
-          [
-           "Hi, I was out sick from my classes on Friday, so I"
-          ],
-          [
-           "According to Girls Who Code, how long did it take "
-          ],
-          [
-           "I'd like to learn more about some popular reality "
-          ],
-          [
-           "What was the complete title of the book in which t"
-          ],
-          [
-           "The attached file lists the locomotives owned by a"
-          ],
-          [
-           "What is the absolute difference in tens of thousan"
-          ],
-          [
-           "According to the USGS, in what year was the Americ"
-          ],
-          [
-           "If this whole pint is made up of ice cream, how ma"
-          ],
-          [
-           "A 5-man group made up of one tank, one healer, and"
-          ],
-          [
-           "What is the latest chronological year date written"
-          ],
-          [
-           "The YouTube channel Game Grumps began a Let’s Play"
-          ],
-          [
-           "Take the gender split from the 2011 Bulgarian cens"
-          ],
-          [
-           "In Audre Lorde’s poem “Father Son and Holy Ghost”,"
-          ],
-          [
-           "The work referenced in footnote 397 of Federico La"
-          ],
-          [
-           "Eva Draconis has a personal website which can be a"
-          ],
-          [
-           "Where were the Vietnamese specimens described by K"
-          ],
-          [
-           "A standard Rubik’s cube has been broken into cubes"
-          ],
-          [
-           "Which of the fruits shown in the 2008 painting \"Em"
-          ],
-          [
-           "The attached Excel file contains the sales of menu"
-          ],
-          [
-           "According to Openreview.net, at the NeurIPS 2022 C"
-          ],
-          [
-           "As of August 2023, who is the only winner of the U"
-          ],
-          [
-           "What is the first name of the only Malko Competiti"
-          ],
-          [
-           "How many at bats did the Yankee with the most walk"
-          ],
-          [
-           "On June 6, 2023, an article by Carolyn Collins Pet"
-          ],
-          [
-           "In NASA's Astronomy Picture of the Day on 2006 Jan"
-          ],
-          [
-           "What country had the least number of athletes at t"
-          ],
-          [
-           "Who are the pitchers with the number before and af"
-          ],
-          [
-           "In the YouTube 360 VR video from March 2018 narrat"
-          ],
-          [
-           "You are given this Excel file as a map. You start "
-          ],
-          [
-           "Consider the following symbols: 𒐜  𒐐𒐚\n\nThis is a n"
-          ],
-          [
-           "What was the actual enrollment count of the clinic"
-          ],
-          [
-           "I read a paper about multiwavelength observations "
-          ],
-          [
-           "In the 2015 Metropolitan Museum of Art exhibition "
-          ],
-          [
-           "In the film Goldfinger, what color was the object "
-          ],
-          [
-           "In the endnote found in the second-to-last paragra"
-          ],
-          [
-           "Using the Biopython library in Python, parse the P"
-          ],
-          [
-           "The attached spreadsheet contains a list of books "
-          ],
-          [
-           "Of the cities within the United States where U.S. "
-          ],
-          [
-           "I thought we could try a fun word puzzle together "
-          ],
-          [
-           "The brand that makes these harnesses the dogs are "
-          ],
-          [
-           "What is the surname of the equine veterinarian men"
-          ],
-          [
-           "When was a picture of St. Thomas Aquinas first add"
-          ],
-          [
-           "As of the 2020 census, what was the population dif"
-          ]
-         ],
-         "hovertemplate": "agent_name=code_o1_03_february_fix-print-outputs2<br>index=%{x}<br>is_correct=%{y}<br>question=%{customdata[0]}<extra></extra>",
-         "legendgroup": "code_o1_03_february_fix-print-outputs2",
-         "line": {
-          "color": "#FF97FF",
-          "dash": "solid"
-         },
-         "marker": {
-          "symbol": "circle"
-         },
-         "mode": "lines",
-         "name": "code_o1_03_february_fix-print-outputs2",
-         "showlegend": true,
-         "type": "scattergl",
-         "x": {
-          "bdata": "AAABAAIAAwAEAAUABgAHAAgACQAKAAsADAANAA4ADwAQABEAEgATABQAFQAWABcAGAAZABoAGwAcAB0AHgAfACAAIQAiACMAJAAlACYAJwAoACkAKgArACwALQAuAC8AMAAxADIAMwA0ADUANgA3ADgAOQA6ADsAPAA9AD4APwBAAEEAQgBDAEQARQBGAEcASABJAEoASwBMAE0ATgBPAFAAUQBSAFMAVABVAFYAVwBYAFkAWgBbAFwAXQBeAF8AYABhAGIAYwBkAGUAZgBnAGgAaQBqAGsAbABtAG4AbwBwAHEAcgBzAHQAdQB2AHcAeAB5AHoAewB8AH0AfgB/AIAAgQCCAIMAhACFAIYAhwCIAIkAigCLAIwAjQCOAI8AkACRAJIAkwCUAJUAlgCXAJgAmQCaAJsA",
-          "dtype": "i2"
-         },
-         "xaxis": "x",
-         "y": {
-          "bdata": "AAAAAAAA8D8AAAAAAADwPwAAAAAAAPA/AAAAAAAA6D+amZmZmZnpP6uqqqqqquo/t23btm3b5j8AAAAAAADoPzmO4ziO4+g/mpmZmZmZ6T9GF1100UXnP1VVVVVVVeU/dmIndmIn5j8lSZIkSZLkP1VVVVVVVeU/AAAAAAAA5j+1tLS0tLTkP+Q4juM4juM/bCivobyG4j8zMzMzMzPjP/Q8z/M8z+M/6aKLLrro4j84velNb3rjP6uqqqqqquI/MzMzMzMz4z8UO7ETO7HjP2gvob2E9uI/27Zt27Zt4z8Jyz2NsNzjP0REREREROQ/nXPOOeec4z8AAAAAAADjP2WTTTbZZOM/09LS0tLS4j8zMzMzMzPjP+Q4juM4juM/bzBFPusG4z/lNZTXUF7jPxQ7sRM7seM/AAAAAAAA5D9L1K5E7UrkPyVJkiRJkuQ/NmVNWVPW5D9ddNFFF13kP/VJn/RJn+Q/QxaykIUs5D/PRn1no77jPwAAAAAAAOQ/5hS8nIKX4z8zMzMzMzPjP3Nzc3Nzc+M/O7ETO7ET4z/BeCv7HFLjP2gvob2E9uI/MzMzMzMz4z+3bdu2bdviP2wor6G8huI/c08jLPc04j9+eWxF0HDiP6uqqqqqquI/sI4271Nc4j+VUkoppZTiP7Msy7Isy+I/AAAAAAAA4z8zMzMzMzPjP+miiy666OI/w6/GRHqg4j/T0tLS0tLiPzDXDsy1A+M/MzMzMzMz4z+/XerJ+O3iP6uqqqqqquI/kyZNmjRp4j+DKfJZN5jiP8aSXyz5xeI/bCivobyG4j+SJEmSJEniP9IgDdIgDeI/dWTPQFQ64j9mZmZmZmbiP8HTrflhkeI/uxK1K1G74j+Ops6vPeTiP8MwDMMwDOM/09LS0tLS4j+/oC/oC/riP+MFMileIOM/6aKLLrro4j8xhznMYQ7jPzMzMzMzM+M/0y/90i/94j+ykIUsZCHjP+2yyy677OI/TK4gJlcQ4z/PLXHq99ziP6uqqqqqquI/8yQyDdvN4j+n4OUUvJziP2r9SoFav+I/4XoUrkfh4j/zIHf9bLHiP4OCgoKCguI/cl4W8Qmk4j9iJ3ZiJ3biP5IkSZIkSeI/NcF4K/sc4j/2C7GiND7iP+0ltJfQXuI/OyMVc6sz4j9UgjwlyFPiPzUngbhQc+I/kiRJkiRJ4j94+yGBtx/iP+4juI/gPuI/IQtZyEIW4j9zTyMs9zTiP9IgDdIgDeI/4qTuy2Mr4j+SJEmSJEniP2ZmZmZmZuI/y6BUmHg/4j9Hm/cpLhniP/QxOB+D8+E/zjnnnHPO4T/sUbgehevhP3Icx3Ecx+E/aTQajUaj4T8AAAAAAMDhP3fEHXFH3OE/gh/4gR/44T/lWUb0AdXhP/DBBx988OE/3xx9c/TN4T92C78aE+nhPz0gWefKA+I/Hh4eHh4e4j/8cevyDjjiPyV+RomfUeI/oBvz9NFq4j+SJEmSJEniP8oVxOQKYuI/U0/Gb5d64j9EhnsVzJLiPxzHcRzHceI/bBMluzZR4j8SI0aMGDHiP5IkSZIkSeI/mCKfdYMp4j/TMZcITwriPyIiIiIiIuI/kuZIc6Q54j+vobyG8hriP1Kn/FGn/OE/y0+Is/IT4j/2cWEfF/bhP4qd2Imd2OE/",
-          "dtype": "f8"
-         },
-         "yaxis": "y"
-        },
-        {
-         "customdata": [
-          [
-           "In April of 1977, who was the Prime Minister of th"
-          ],
-          [
-           "The attached spreadsheet shows the inventory for a"
-          ],
-          [
-           "If Eliud Kipchoge could maintain his record-making"
-          ],
-          [
-           "Use density measures from the chemistry materials "
-          ],
-          [
-           "How many studio albums were published by Mercedes "
-          ],
-          [
-           "An office held a Secret Santa gift exchange where "
-          ],
-          [
-           ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti"
-          ],
-          [
-           "What was the volume in m^3 of the fish bag that wa"
-          ],
-          [
-           "In terms of geographical distance between capital "
-          ],
-          [
-           "What's the last line of the rhyme under the flavor"
-          ],
-          [
-           "In Unlambda, what exact charcter or text needs to "
-          ],
-          [
-           "If we assume all articles published by Nature in 2"
-          ],
-          [
-           "Each cell in the attached spreadsheet represents a"
-          ],
-          [
-           "¬(A ∧ B) ↔ (¬A ∨ ¬B)\n¬(A ∨ B) ↔ (¬A ∧ ¬B)\n(A → B) "
-          ],
-          [
-           "Compute the check digit the Tropicos ID for the Or"
-          ],
-          [
-           "When you take the average of the standard populati"
-          ],
-          [
-           "My family reunion is this week, and I was assigned"
-          ],
-          [
-           "In the video https://www.youtube.com/watch?v=L1vXC"
-          ],
-          [
-           "I need to fact-check a citation. This is the citat"
-          ],
-          [
-           "In the fictional language of Tizin, basic sentence"
-          ],
-          [
-           "In Emily Midkiff's June 2014 article in a journal "
-          ],
-          [
-           "The photograph in the Whitney Museum of American A"
-          ],
-          [
-           "Under DDC 633 on Bielefeld University Library's BA"
-          ],
-          [
-           "In the 2018 VSCode blog post on replit.com, what w"
-          ],
-          [
-           "What two-word type of model did Manash Pratim Kash"
-          ],
-          [
-           "In Series 9, Episode 11 of Doctor Who, the Doctor "
-          ],
-          [
-           "The attached file contains a list of vendors in th"
-          ],
-          [
-           "It is 1999. Before you party like it is 1999, plea"
-          ],
-          [
-           "Which contributor to the version of OpenCV where s"
-          ],
-          [
-           "Of the authors (First M. Last) that worked on the "
-          ],
-          [
-           "What are the EC numbers of the two most commonly u"
-          ],
-          [
-           "What integer-rounded percentage of the total lengt"
-          ],
-          [
-           "The object in the British Museum's collection with"
-          ],
-          [
-           "Could you help me out with this assignment? Our pr"
-          ],
-          [
-           "I’m researching species that became invasive after"
-          ],
-          [
-           "Review the chess position provided in the image. I"
-          ],
-          [
-           "The following numbers function similarly to ISBN 1"
-          ],
-          [
-           "Given this table defining * on the set S = {a, b, "
-          ],
-          [
-           "In Nature journal's Scientific Reports conference "
-          ],
-          [
-           "What writer is quoted by Merriam-Webster for the W"
-          ],
-          [
-           "In the NCATS PubChem compound database for Food Ad"
-          ],
-          [
-           "How many applicants for the job in the PDF are onl"
-          ],
-          [
-           "A paper about AI regulation that was originally su"
-          ],
-          [
-           "How many High Energy Physics - Lattice articles li"
-          ],
-          [
-           "I went to Virtue restaurant & bar in Chicago for m"
-          ],
-          [
-           "What is the maximum length in meters of #9 in the "
-          ],
-          [
-           "In July 2, 1959 United States standards for grades"
-          ],
-          [
-           "The attached file shows a list of books in the col"
-          ],
-          [
-           "As a comma separated list with no whitespace, usin"
-          ],
-          [
-           "Who nominated the only Featured Article on English"
-          ],
-          [
-           "The attached file lists accommodations in the reso"
-          ],
-          [
-           "In Valentina Re’s contribution to the 2017 book “W"
-          ],
-          [
-           "According to Google Finance, when was the first ye"
-          ],
-          [
-           "The Metropolitan Museum of Art has a portrait in i"
-          ],
-          [
-           "What time was the Tri-Rail train that carried the "
-          ],
-          [
-           "According to github, when was Regression added to "
-          ],
-          [
-           "How many slides in this PowerPoint presentation me"
-          ],
-          [
-           "Using bass clef notes, what is the age of someone "
-          ],
-          [
-           "If there is anything that doesn't make sense in th"
-          ],
-          [
-           "Find the value of x to the nearest tenth: Lx = (d/"
-          ],
-          [
-           "In the year 2022, and before December, what does \""
-          ],
-          [
-           "Who composed the song that was performed by a roos"
-          ],
-          [
-           "This is a secret message my friend gave me. It say"
-          ],
-          [
-           "You are Van Helsing, a renowned vampire hunter. A "
-          ],
-          [
-           "In the NIH translation of the original 1913 Michae"
-          ],
-          [
-           "The attached file shows the locomotives in the col"
-          ],
-          [
-           "I was trying to remember how well the Cheater Beat"
-          ],
-          [
-           "The attached spreadsheet contains the sales of men"
-          ],
-          [
-           "You are a telecommunications engineer who wants to"
-          ],
-          [
-           "Given $x_0 = -5$ and $f(x) = x^3 + 4x^2 - 3x + 8$,"
-          ],
-          [
-           "According to wikipedia, how many Asian countries s"
-          ],
-          [
-           "What is the area of the green polygon in the attac"
-          ],
-          [
-           "Examine the video at https://www.youtube.com/watch"
-          ],
-          [
-           "I'm making a grocery list for my mom, but she's a "
-          ],
-          [
-           "The Latin root of the Yola word \"gimlie\" shares a "
-          ],
-          [
-           "What is the last word before the second chorus of "
-          ],
-          [
-           "According to Box Office Mojo's 2020 Worldwide Box "
-          ],
-          [
-           "How many pages if the 2023 IPCC report (85 pages v"
-          ],
-          [
-           "On July 15, 2008, Phys.org published an article ab"
-          ],
-          [
-           "Look at the attached image. The quiz is scored as "
-          ],
-          [
-           "What is the minimum number of page links a person "
-          ],
-          [
-           "How many times was a Twitter/X post cited as a ref"
-          ],
-          [
-           "The year is 2022. I am at the National Air and Spa"
-          ],
-          [
-           "Hi, I'm making a pie but I could use some help wit"
-          ],
-          [
-           "The attached image contains a Python script. Run t"
-          ],
-          [
-           "This spreadsheet contains a list of clients for a "
-          ],
-          [
-           "The attached PDF lists accommodations in the resor"
-          ],
-          [
-           "What is the final numeric output from the attached"
-          ],
-          [
-           "I have the Standard plan in the image below, and I"
-          ],
-          [
-           "How many more blocks (also denoted as layers) in B"
-          ],
-          [
-           "What is the surname of the equine veterinarian men"
-          ],
-          [
-           "It's May 2023, and I'm about to drive across the U"
-          ],
-          [
-           "In the Scikit-Learn July 2017 changelog, what othe"
-          ],
-          [
-           "On the DeepFruits fruit detection graph on Connect"
-          ],
-          [
-           "Pull out the sentence in the following 5x7 block o"
-          ],
-          [
-           "I’m thinking about selling my home, so I want to l"
-          ],
-          [
-           "All of the individuals who formally held the posit"
-          ],
-          [
-           "You are given this Excel file as a map. You start "
-          ],
-          [
-           "On a leap day before the year 2008, a joke was rem"
-          ],
-          [
-           "Who did the actor who played Ray in the Polish-lan"
-          ],
-          [
-           "The longest-lived vertebrate is named after an isl"
-          ],
-          [
-           "Of the cities within the United States where U.S. "
-          ],
-          [
-           "On the BBC Earth YouTube video of the Top 5 Sillie"
-          ],
-          [
-           "The work referenced in footnote 397 of Federico La"
-          ],
-          [
-           "On ScienceDirect, what is the difference to 3 deci"
-          ],
-          [
-           "What is the volume in milliliters of a system comp"
-          ],
-          [
-           "The attached spreadsheet contains a list of books "
-          ],
-          [
-           "On Cornell Law School website's legal information "
-          ],
-          [
-           "The YouTube channel Game Grumps began a Let’s Play"
-          ],
-          [
-           "During the first week of August 2015, one of the N"
-          ],
-          [
-           "Consider the following symbols: 𒐜  𒐐𒐚\n\nThis is a n"
-          ],
-          [
-           "As of the 2020 census, what was the population dif"
-          ],
-          [
-           "I was referencing each of the tables in the file f"
-          ],
-          [
-           "The attached spreadsheet lists the locomotives own"
-          ],
-          [
-           "The attached file lists the locomotives owned by a"
-          ],
-          [
-           "According to Girls Who Code, how long did it take "
-          ],
-          [
-           "What was the complete title of the book in which t"
-          ],
-          [
-           "The book with the doi 10.1353/book.24372 concerns "
-          ],
-          [
-           "The cover of the August 2021 issue of Vogue shows "
-          ],
-          [
-           "How many nonindigenous crocodiles were found in Fl"
-          ],
-          [
-           "In Audre Lorde’s poem “Father Son and Holy Ghost”,"
-          ],
-          [
-           "How many edits were made to the Wikipedia page on "
-          ],
-          [
-           "A 5-man group made up of one tank, one healer, and"
-          ],
-          [
-           "How many at bats did the Yankee with the most walk"
-          ],
-          [
-           "How many images are there in the latest 2022 Lego "
-          ],
-          [
-           "Which of the fruits shown in the 2008 painting \"Em"
-          ],
-          [
-           "According to the World Bank, which countries had g"
-          ],
-          [
-           "What is the absolute difference in tens of thousan"
-          ],
-          [
-           "Hi, I was out sick from my classes on Friday, so I"
-          ],
-          [
-           "Eva Draconis has a personal website which can be a"
-          ],
-          [
-           "What is the latest chronological year date written"
-          ],
-          [
-           "Bob was invited to participate in a game show, and"
-          ],
-          [
-           "Where were the Vietnamese specimens described by K"
-          ],
-          [
-           "In the endnote found in the second-to-last paragra"
-          ],
-          [
-           "A standard Rubik’s cube has been broken into cubes"
-          ],
-          [
-           "If this whole pint is made up of ice cream, how ma"
-          ],
-          [
-           "The attached Excel file contains the sales of menu"
-          ],
-          [
-           "I thought we could try a fun word puzzle together "
-          ],
-          [
-           "I'd like to learn more about some popular reality "
-          ],
-          [
-           "Here's a fun riddle that I think you'll enjoy.\n\nYo"
-          ],
-          [
-           "Take the gender split from the 2011 Bulgarian cens"
-          ],
-          [
-           "As of August 2023, who is the only winner of the U"
-          ],
-          [
-           "Who are the pitchers with the number before and af"
-          ],
-          [
-           "In the film Goldfinger, what color was the object "
-          ],
-          [
-           "What was the actual enrollment count of the clinic"
-          ],
-          [
-           "What is the first name of the only Malko Competiti"
-          ],
-          [
-           "In NASA's Astronomy Picture of the Day on 2006 Jan"
-          ],
-          [
-           "In the YouTube 360 VR video from March 2018 narrat"
-          ],
-          [
-           "As of May 2023, how many stops are between South S"
-          ],
-          [
-           "What country had the least number of athletes at t"
-          ],
-          [
-           "According to Openreview.net, at the NeurIPS 2022 C"
-          ],
-          [
-           "In the 2015 Metropolitan Museum of Art exhibition "
-          ],
-          [
-           "The brand that makes these harnesses the dogs are "
-          ],
-          [
-           "According to the USGS, in what year was the Americ"
-          ],
-          [
-           "I read a paper about multiwavelength observations "
-          ],
-          [
-           "What animals that were mentioned in both Ilias Lag"
-          ],
-          [
-           "When was a picture of St. Thomas Aquinas first add"
-          ],
-          [
-           "What percentage of the total penguin population ac"
-          ],
-          [
-           "I'm curious about how much information is availabl"
-          ],
-          [
-           "On June 6, 2023, an article by Carolyn Collins Pet"
-          ],
-          [
-           "Using the Biopython library in Python, parse the P"
-          ]
-         ],
-         "hovertemplate": "agent_name=code_o1_03_february_goodoldtext-unbroken<br>index=%{x}<br>is_correct=%{y}<br>question=%{customdata[0]}<extra></extra>",
-         "legendgroup": "code_o1_03_february_goodoldtext-unbroken",
-         "line": {
-          "color": "#FECB52",
-          "dash": "solid"
-         },
-         "marker": {
-          "symbol": "circle"
-         },
-         "mode": "lines",
-         "name": "code_o1_03_february_goodoldtext-unbroken",
-         "showlegend": true,
-         "type": "scattergl",
-         "x": {
-          "bdata": "AAABAAIAAwAEAAUABgAHAAgACQAKAAsADAANAA4ADwAQABEAEgATABQAFQAWABcAGAAZABoAGwAcAB0AHgAfACAAIQAiACMAJAAlACYAJwAoACkAKgArACwALQAuAC8AMAAxADIAMwA0ADUANgA3ADgAOQA6ADsAPAA9AD4APwBAAEEAQgBDAEQARQBGAEcASABJAEoASwBMAE0ATgBPAFAAUQBSAFMAVABVAFYAVwBYAFkAWgBbAFwAXQBeAF8AYABhAGIAYwBkAGUAZgBnAGgAaQBqAGsAbABtAG4AbwBwAHEAcgBzAHQAdQB2AHcAeAB5AHoAewB8AH0AfgB/AIAAgQCCAIMAhACFAIYAhwCIAIkAigCLAIwAjQCOAI8AkACRAJIAkwCUAJUAlgCXAJgAmQCaAJsAnACdAJ4AnwCgAA==",
-          "dtype": "i2"
-         },
-         "xaxis": "x",
-         "y": {
-          "bdata": "AAAAAAAA8D8AAAAAAADwP1VVVVVVVeU/AAAAAAAA6D8zMzMzMzPjP1VVVVVVVeU/t23btm3b5j8AAAAAAADoPzmO4ziO4+g/ZmZmZmZm5j9ddNFFF13kP1VVVVVVVeU/dmIndmIn5j+3bdu2bdvmP3d3d3d3d+c/AAAAAAAA5j+XlpaWlpbmP8dxHMdxHOc/UV5DeQ3l5T9mZmZmZmbmP7dt27Zt2+Y/0UUXXXTR5T9Ob3rTm97kP1VVVVVVVeU/w/UoXI/C5T/FTuzETuzkP1VVVVVVVeU/btu2bdu25T98GmG5pxHmP1VVVVVVVeU/rbXWWmut5T8AAAAAAADlP1VVVVVVVeU/tbS0tLS05D91UAd1UAflPxzHcRzHceQ/HEyRz7rB5D/YUF5DeQ3lPzVIgzRIg+Q/zczMzMzM5D9L1K5E7UrkPyVJkiRJkuQ/NmVNWVPW5D9ddNFFF13kP/VJn/RJn+Q/QxaykIUs5D/PRn1no77jPwAAAAAAAOQ/5hS8nIKX4z/Xo3A9CtfjP3Nzc3Nzc+M/O7ETO7ET4z/7HFITjLfiP+0ltJfQXuI/CfKUIE8J4j9u27Zt27bhP3AfwX0E9+E/lnsaYbmn4T8NJ3VfHlvhP5qZmZmZmeE/3qe4ZAjW4T8RQgghhBDiP3Icx3Ecx+E/AAAAAAAA4j+SG7mRG7nhP/DBBx988OE/CCpnt/Cr4T/i4eHh4eHhPyELWchCFuI/kiRJkiRJ4j9TT8Zvl3riP47jOI7jOOI/kyZNmjRp4j+YIp91gyniP+xRuB6F6+E/r6G8hvIa4j8De8fUwN7hP9IgDdIgDeI/dWTPQFQ64j8AAAAAAADiPxl4ujU/LOI/9DE4H4Pz4T/xRlPn1x7iP5IkSZIkSeI/cnJycnJy4j+PuCPuiDviP7xAJsULZOI/jC666KKL4j8rWclKVrLiP4Mt2IIt2OI/0y/90i/94j+ykIUsZCHjP+2yyy677OI/C2JyBTG54j/PLXHq99ziP6uqqqqqquI/8yQyDdvN4j+8nIKXU/DiP2r9SoFav+I/4XoUrkfh4j9brAzfiALjPyMjIyMjI+M/FvEJpJLz4j9P7MRO7MTiP3Mpl3Ipl+I/GG9ln0Nq4j85uNkvxIriP+0ltJfQXuI/OyMVc6sz4j9UgjwlyFPiP5gin3WDKeI/AAAAAAAA4j+Kcx2jONfhP3AfwX0E9+E/IQtZyEIW4j+E5Z5GWO7hP3Icx3Ecx+E/RdBwUvfl4T9yTQRyTQTiP97d3d3d3eE/52v17BC44T/ep7hkCNbhP7GRDhvpsOE/zjnnnHPO4T9Ei2zn+6nhP2IYhmEYhuE/WSwWi8Vi4T8AAAAAAIDhP2fMGXPGnOE/khu5kRu54T9gxQkpeZbhP3TRRRdddOE/BhkXZFyQ4T8IKme38KvhP3Icx3Ecx+E/pqWlpaWl4T/ij1uXd8DhPxolfkaJn+E/l8r2rgO64T+amZmZmZnhP8afSDileeE/ezJ+u9ST4T900UUXXXThP+Q4juM4juE/pPMWQzpv4T+MGDFixIjhP1uE/DU7auE/whT5rBtM4T83YKimYy7hP0jhehSuR+E/ianEVGIq4T/YUF5DeQ3hP9F7JtF7JuE/rfyEOCs/4T8j8SoSryLhP7ETO7ETO+E/OUG4G/se4T8GotKRPQPhP+vSY/5eG+E/MzMzMzMz4T/ti6jW2RfhPw==",
-          "dtype": "f8"
-         },
-         "yaxis": "y"
-        },
-        {
-         "customdata": [
-          [
-           "In April of 1977, who was the Prime Minister of th"
-          ],
-          [
-           "The attached spreadsheet shows the inventory for a"
-          ],
-          [
-           "Using the Biopython library in Python, parse the P"
-          ],
-          [
-           "In Unlambda, what exact charcter or text needs to "
-          ],
-          [
-           "The object in the British Museum's collection with"
-          ],
-          [
-           "If Eliud Kipchoge could maintain his record-making"
-          ],
-          [
-           "How many studio albums were published by Mercedes "
-          ],
-          [
-           "Use density measures from the chemistry materials "
-          ],
-          [
-           "If we assume all articles published by Nature in 2"
-          ],
-          [
-           "Here's a fun riddle that I think you'll enjoy.\n\nYo"
-          ],
-          [
-           "What was the volume in m^3 of the fish bag that wa"
-          ],
-          [
-           "In terms of geographical distance between capital "
-          ],
-          [
-           "What are the EC numbers of the two most commonly u"
-          ],
-          [
-           "When you take the average of the standard populati"
-          ],
-          [
-           "An office held a Secret Santa gift exchange where "
-          ],
-          [
-           "Of the authors (First M. Last) that worked on the "
-          ],
-          [
-           "According to github, when was Regression added to "
-          ],
-          [
-           "In the video https://www.youtube.com/watch?v=L1vXC"
-          ],
-          [
-           "In Series 9, Episode 11 of Doctor Who, the Doctor "
-          ],
-          [
-           "A paper about AI regulation that was originally su"
-          ],
-          [
-           "I need to fact-check a citation. This is the citat"
-          ],
-          [
-           "I’m researching species that became invasive after"
-          ],
-          [
-           ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti"
-          ],
-          [
-           "What two-word type of model did Manash Pratim Kash"
-          ],
-          [
-           "What's the last line of the rhyme under the flavor"
-          ],
-          [
-           "In July 2, 1959 United States standards for grades"
-          ],
-          [
-           "Which contributor to the version of OpenCV where s"
-          ],
-          [
-           "Assuming scientists in the famous youtube video Th"
-          ],
-          [
-           "¬(A ∧ B) ↔ (¬A ∨ ¬B)\n¬(A ∨ B) ↔ (¬A ∧ ¬B)\n(A → B) "
-          ],
-          [
-           "What is the minimum number of page links a person "
-          ],
-          [
-           "My family reunion is this week, and I was assigned"
-          ],
-          [
-           "What integer-rounded percentage of the total lengt"
-          ],
-          [
-           "Each cell in the attached spreadsheet represents a"
-          ],
-          [
-           "In Emily Midkiff's June 2014 article in a journal "
-          ],
-          [
-           "Under DDC 633 on Bielefeld University Library's BA"
-          ],
-          [
-           "I went to Virtue restaurant & bar in Chicago for m"
-          ],
-          [
-           "Compute the check digit the Tropicos ID for the Or"
-          ],
-          [
-           "How many High Energy Physics - Lattice articles li"
-          ],
-          [
-           "It is 1999. Before you party like it is 1999, plea"
-          ],
-          [
-           "Could you help me out with this assignment? Our pr"
-          ],
-          [
-           "In the fictional language of Tizin, basic sentence"
-          ],
-          [
-           "The photograph in the Whitney Museum of American A"
-          ],
-          [
-           "Review the chess position provided in the image. I"
-          ],
-          [
-           "The attached file contains a list of vendors in th"
-          ],
-          [
-           "In the 2018 VSCode blog post on replit.com, what w"
-          ],
-          [
-           "How many applicants for the job in the PDF are onl"
-          ],
-          [
-           "What is the maximum length in meters of #9 in the "
-          ],
-          [
-           "In Valentina Re’s contribution to the 2017 book “W"
-          ],
-          [
-           "In the year 2022, and before December, what does \""
-          ],
-          [
-           "Given this table defining * on the set S = {a, b, "
-          ],
-          [
-           "What animals that were mentioned in both Ilias Lag"
-          ],
-          [
-           "According to Box Office Mojo's 2020 Worldwide Box "
-          ],
-          [
-           "In Nature journal's Scientific Reports conference "
-          ],
-          [
-           "What writer is quoted by Merriam-Webster for the W"
-          ],
-          [
-           "What time was the Tri-Rail train that carried the "
-          ],
-          [
-           "The Metropolitan Museum of Art has a portrait in i"
-          ],
-          [
-           "The following numbers function similarly to ISBN 1"
-          ],
-          [
-           "Who nominated the only Featured Article on English"
-          ],
-          [
-           "According to Google Finance, when was the first ye"
-          ],
-          [
-           "The attached file shows a list of books in the col"
-          ],
-          [
-           "As a comma separated list with no whitespace, usin"
-          ],
-          [
-           "I was trying to remember how well the Cheater Beat"
-          ],
-          [
-           "On July 15, 2008, Phys.org published an article ab"
-          ],
-          [
-           "The attached file lists accommodations in the reso"
-          ],
-          [
-           "What is the volume in milliliters of a system comp"
-          ],
-          [
-           "How many pages if the 2023 IPCC report (85 pages v"
-          ],
-          [
-           "Using bass clef notes, what is the age of someone "
-          ],
-          [
-           "The Latin root of the Yola word \"gimlie\" shares a "
-          ],
-          [
-           "In the NIH translation of the original 1913 Michae"
-          ],
-          [
-           "Find the value of x to the nearest tenth: Lx = (d/"
-          ],
-          [
-           "How many slides in this PowerPoint presentation me"
-          ],
-          [
-           "You are a telecommunications engineer who wants to"
-          ],
-          [
-           "If there is anything that doesn't make sense in th"
-          ],
-          [
-           "On a leap day before the year 2008, a joke was rem"
-          ],
-          [
-           "In the NCATS PubChem compound database for Food Ad"
-          ],
-          [
-           "This is a secret message my friend gave me. It say"
-          ],
-          [
-           "You are Van Helsing, a renowned vampire hunter. A "
-          ],
-          [
-           "Examine the video at https://www.youtube.com/watch"
-          ],
-          [
-           "The attached file shows the locomotives in the col"
-          ],
-          [
-           "Given $x_0 = -5$ and $f(x) = x^3 + 4x^2 - 3x + 8$,"
-          ],
-          [
-           "What is the area of the green polygon in the attac"
-          ],
-          [
-           "How many images are there in the latest 2022 Lego "
-          ],
-          [
-           "In the endnote found in the second-to-last paragra"
-          ],
-          [
-           "According to wikipedia, how many Asian countries s"
-          ],
-          [
-           "Who composed the song that was performed by a roos"
-          ],
-          [
-           "The attached spreadsheet contains the sales of men"
-          ],
-          [
-           "You are given this Excel file as a map. You start "
-          ],
-          [
-           "I'm making a grocery list for my mom, but she's a "
-          ],
-          [
-           "How many times was a Twitter/X post cited as a ref"
-          ],
-          [
-           "What is the last word before the second chorus of "
-          ],
-          [
-           "Hi, I'm making a pie but I could use some help wit"
-          ],
-          [
-           "As of the 2020 census, what was the population dif"
-          ],
-          [
-           "What is the surname of the equine veterinarian men"
-          ],
-          [
-           "I’m thinking about selling my home, so I want to l"
-          ],
-          [
-           "The work referenced in footnote 397 of Federico La"
-          ],
-          [
-           "On ScienceDirect, what is the difference to 3 deci"
-          ],
-          [
-           "The attached image contains a Python script. Run t"
-          ],
-          [
-           "Look at the attached image. The quiz is scored as "
-          ],
-          [
-           "I have the Standard plan in the image below, and I"
-          ],
-          [
-           "The attached PDF lists accommodations in the resor"
-          ],
-          [
-           "How many nonindigenous crocodiles were found in Fl"
-          ],
-          [
-           "The year is 2022. I am at the National Air and Spa"
-          ],
-          [
-           "This spreadsheet contains a list of clients for a "
-          ],
-          [
-           "What is the final numeric output from the attached"
-          ],
-          [
-           "On the BBC Earth YouTube video of the Top 5 Sillie"
-          ],
-          [
-           "It's May 2023, and I'm about to drive across the U"
-          ],
-          [
-           "The longest-lived vertebrate is named after an isl"
-          ],
-          [
-           "How many more blocks (also denoted as layers) in B"
-          ],
-          [
-           "How many edits were made to the Wikipedia page on "
-          ],
-          [
-           "What percentage of the total penguin population ac"
-          ],
-          [
-           "On the DeepFruits fruit detection graph on Connect"
-          ],
-          [
-           "All of the individuals who formally held the posit"
-          ],
-          [
-           "Pull out the sentence in the following 5x7 block o"
-          ],
-          [
-           "I was referencing each of the tables in the file f"
-          ],
-          [
-           "The YouTube channel Game Grumps began a Let’s Play"
-          ],
-          [
-           "During the first week of August 2015, one of the N"
-          ],
-          [
-           "On Cornell Law School website's legal information "
-          ],
-          [
-           "In the Scikit-Learn July 2017 changelog, what othe"
-          ],
-          [
-           "Which of the fruits shown in the 2008 painting \"Em"
-          ],
-          [
-           "Bob was invited to participate in a game show, and"
-          ],
-          [
-           "Consider the following symbols: 𒐜  𒐐𒐚\n\nThis is a n"
-          ],
-          [
-           "Of the cities within the United States where U.S. "
-          ],
-          [
-           "The book with the doi 10.1353/book.24372 concerns "
-          ],
-          [
-           "The attached spreadsheet lists the locomotives own"
-          ],
-          [
-           "Who did the actor who played Ray in the Polish-lan"
-          ],
-          [
-           "The cover of the August 2021 issue of Vogue shows "
-          ],
-          [
-           "The attached spreadsheet contains a list of books "
-          ],
-          [
-           "In Audre Lorde’s poem “Father Son and Holy Ghost”,"
-          ],
-          [
-           "The attached file lists the locomotives owned by a"
-          ],
-          [
-           "How many at bats did the Yankee with the most walk"
-          ],
-          [
-           "According to Girls Who Code, how long did it take "
-          ],
-          [
-           "What was the complete title of the book in which t"
-          ],
-          [
-           "What is the absolute difference in tens of thousan"
-          ],
-          [
-           "Eva Draconis has a personal website which can be a"
-          ],
-          [
-           "Hi, I was out sick from my classes on Friday, so I"
-          ],
-          [
-           "A 5-man group made up of one tank, one healer, and"
-          ],
-          [
-           "According to the USGS, in what year was the Americ"
-          ],
-          [
-           "If this whole pint is made up of ice cream, how ma"
-          ],
-          [
-           "I'd like to learn more about some popular reality "
-          ],
-          [
-           "Take the gender split from the 2011 Bulgarian cens"
-          ],
-          [
-           "The brand that makes these harnesses the dogs are "
-          ],
-          [
-           "As of August 2023, who is the only winner of the U"
-          ],
-          [
-           "Where were the Vietnamese specimens described by K"
-          ],
-          [
-           "A standard Rubik’s cube has been broken into cubes"
-          ],
-          [
-           "The attached Excel file contains the sales of menu"
-          ],
-          [
-           "What was the actual enrollment count of the clinic"
-          ],
-          [
-           "According to Openreview.net, at the NeurIPS 2022 C"
-          ],
-          [
-           "What country had the least number of athletes at t"
-          ],
-          [
-           "In the film Goldfinger, what color was the object "
-          ],
-          [
-           "What is the first name of the only Malko Competiti"
-          ],
-          [
-           "Who are the pitchers with the number before and af"
-          ],
-          [
-           "When was a picture of St. Thomas Aquinas first add"
-          ],
-          [
-           "In NASA's Astronomy Picture of the Day on 2006 Jan"
-          ],
-          [
-           "I read a paper about multiwavelength observations "
-          ],
-          [
-           "In the YouTube 360 VR video from March 2018 narrat"
-          ],
-          [
-           "I'm curious about how much information is availabl"
-          ],
-          [
-           "On June 6, 2023, an article by Carolyn Collins Pet"
-          ],
-          [
-           "As of May 2023, how many stops are between South S"
-          ],
-          [
-           "In the 2015 Metropolitan Museum of Art exhibition "
-          ],
-          [
-           "At the two-minute mark in the YouTube video upload"
-          ],
-          [
-           "I thought we could try a fun word puzzle together "
-          ],
-          [
-           "What is the average number of pre-2020 works on th"
-          ],
-          [
-           "Which of the text elements under CATEGORIES in the"
-          ],
-          [
-           "What is the latest chronological year date written"
-          ]
-         ],
-         "hovertemplate": "agent_name=code_o1_03_february_remove-navigational<br>index=%{x}<br>is_correct=%{y}<br>question=%{customdata[0]}<extra></extra>",
-         "legendgroup": "code_o1_03_february_remove-navigational",
-         "line": {
-          "color": "#636efa",
-          "dash": "solid"
-         },
-         "marker": {
-          "symbol": "circle"
-         },
-         "mode": "lines",
-         "name": "code_o1_03_february_remove-navigational",
-         "showlegend": true,
-         "type": "scattergl",
-         "x": {
-          "bdata": "AAABAAIAAwAEAAUABgAHAAgACQAKAAsADAANAA4ADwAQABEAEgATABQAFQAWABcAGAAZABoAGwAcAB0AHgAfACAAIQAiACMAJAAlACYAJwAoACkAKgArACwALQAuAC8AMAAxADIAMwA0ADUANgA3ADgAOQA6ADsAPAA9AD4APwBAAEEAQgBDAEQARQBGAEcASABJAEoASwBMAE0ATgBPAFAAUQBSAFMAVABVAFYAVwBYAFkAWgBbAFwAXQBeAF8AYABhAGIAYwBkAGUAZgBnAGgAaQBqAGsAbABtAG4AbwBwAHEAcgBzAHQAdQB2AHcAeAB5AHoAewB8AH0AfgB/AIAAgQCCAIMAhACFAIYAhwCIAIkAigCLAIwAjQCOAI8AkACRAJIAkwCUAJUAlgCXAJgAmQCaAJsAnACdAJ4AnwCgAKEAogCjAA==",
-          "dtype": "i2"
-         },
-         "xaxis": "x",
-         "y": {
-          "bdata": "AAAAAAAA8D8AAAAAAADwP1VVVVVVVeU/AAAAAAAA4D8zMzMzMzPjPwAAAAAAAOA/kiRJkiRJ4j8AAAAAAADkP1VVVVVVVeU/MzMzMzMz4z9ddNFFF13kP1VVVVVVVeU/FDuxEzux4z+SJEmSJEniPzMzMzMzM+M/AAAAAAAA4j/x8PDw8PDgPwAAAAAAAOA/DeU1lNdQ3j8AAAAAAADgPzEMwzAMw+A/dNFFF1104T8hC1nIQhbiP6uqqqqqquI/7FG4HoXr4T+xEzuxEzvhP3Icx3Ecx+E/SZIkSZIk4T+WexphuafhPyIiIiIiIuI/lVJKKaWU4j8AAAAAAADiP22yySabbOI/09LS0tLS4j+SJEmSJEniP3Icx3Ecx+E/mCKfdYMp4j/zGsprKK/hP7ETO7ETO+E/zczMzMzM4D8sUbsStSvhPzEMwzAMw+A/GPQFfUFf4D+66KKLLrrgPxEREREREeE/FrKQhSxk4T/E5ApicgXhP1VVVVVVVeE/aKwPjfWh4T/sUbgehevhP5KRkZGRkeE/sRM7sRM74T+pCcZb2efgP/cS2ktoL+E/37D2DWvf4D9JkiRJkiThP3UW01lMZ+E/lnsaYbmn4T8NJ3VfHlvhP5qZmZmZmeE/DcE62rxP4T8IIYQQQgjhP1EURVEUReE/AAAAAAAA4T/RC73QC73gP/jggw8++OA/TKQHKme34D/x8PDw8PDgPwtZyEIWsuA/oQ7qoA7q4D8OJFphcyDhP1VVVVVVVeE/jBgxYsSI4T/CFPmsG0zhPxEREREREeE/eQ3lNZTX4D/lJ8RZ+QnhP7ETO7ETO+E/1uImzO9q4T8zMzMzMzPhPyNl4OnW/OA/yOB8DM7H4D+FN5o6v/bgP0mSJEmSJOE/UVFRUVFR4T9f0Bf0BX3hPwOZFC+QSeE/dNFFF1104T8UoQhFKELhP8EWbMEWbOE/UhmVURmV4T8WspCFLGThPzTRRBNNNOE/xOQKYnIF4T95DeU1lNfgP6uqqqqqquA/sd0sTyLT4D8qeDkFL6fgP3o7Q2LezuA/9ihcj8L14D/sZ4uV4RvhP0FBQUFBQeE/PoFUcl4W4T+xEzuxEzvhP/EVX/EVX+E/b2WfQ2qC4T9ws1+IFaXhP3Icx3Ecx+E/1hmpmFud4T900UUXXXThP8IU+awbTOE/27Zt27Zt4T+c6xjFuY7hP3UW01lMZ+E/FG01eI5A4T+oEZZ7GmHhP7ETO7ETO+E/cVL35bEV4T/x8PDw8PDgP83MzMzMzOA/HgI3lkGp4D/S5n2KS4bgP6cQaAqBpuA/xhhjjDHG4D+kcD0K16PgPzEMwzAMw+A/OBwOh8Ph4D8AAAAAAADhP0fcEXfEHeE/sRM7sRM74T9WnJCSZxnhP/jggw8++OA/UxFLRSwV4T+7hV+NifTgPxEREREREeE/8fDw8PDw4D+7vAOOFA3hPw/MtQNz7eA/jnn6aDUJ4T9JkiRJkiThP8TkCmJyBeE/DiRaYXMg4T+xEzuxEzvhP1VVVVVVVeE/pPMWQzpv4T8LFSpUqFDhP01c6d6AMuE/whT5rBtM4T+eFCR/XmXhP36x5BdLfuE/jVvGLeOW4T+U11BeQ3nhP5KRkZGRkeE/dNFFF1104T+MMcYYY4zhP0IapEEapOE/+x6RE4S74T8+A1HpyJ7hP29ln0NqguE/ZmZmZmZm4T9epZigu0rhP/cS2ktoL+E/fJu/wqxG4T8sUbsStSvhPw==",
-          "dtype": "f8"
-         },
-         "yaxis": "y"
-        },
-        {
-         "customdata": [
-          [
-           "The attached spreadsheet shows the inventory for a"
-          ],
-          [
-           "If Eliud Kipchoge could maintain his record-making"
-          ],
-          [
-           "In Unlambda, what exact charcter or text needs to "
-          ],
-          [
-           "A paper about AI regulation that was originally su"
-          ],
-          [
-           "Using the Biopython library in Python, parse the P"
-          ],
-          [
-           "What are the EC numbers of the two most commonly u"
-          ],
-          [
-           "Here's a fun riddle that I think you'll enjoy.\n\nYo"
-          ],
-          [
-           "In July 2, 1959 United States standards for grades"
-          ],
-          [
-           "In April of 1977, who was the Prime Minister of th"
-          ],
-          [
-           "The object in the British Museum's collection with"
-          ],
-          [
-           "Use density measures from the chemistry materials "
-          ],
-          [
-           "How many studio albums were published by Mercedes "
-          ],
-          [
-           "I’m researching species that became invasive after"
-          ],
-          [
-           "If we assume all articles published by Nature in 2"
-          ],
-          [
-           "According to github, when was Regression added to "
-          ],
-          [
-           "When you take the average of the standard populati"
-          ],
-          [
-           "What was the volume in m^3 of the fish bag that wa"
-          ],
-          [
-           "Assuming scientists in the famous youtube video Th"
-          ],
-          [
-           "In Series 9, Episode 11 of Doctor Who, the Doctor "
-          ],
-          [
-           "In the video https://www.youtube.com/watch?v=L1vXC"
-          ],
-          [
-           "In terms of geographical distance between capital "
-          ],
-          [
-           "Of the authors (First M. Last) that worked on the "
-          ],
-          [
-           "Which contributor to the version of OpenCV where s"
-          ],
-          [
-           "What's the last line of the rhyme under the flavor"
-          ],
-          [
-           "An office held a Secret Santa gift exchange where "
-          ],
-          [
-           "I need to fact-check a citation. This is the citat"
-          ],
-          [
-           "What two-word type of model did Manash Pratim Kash"
-          ],
-          [
-           ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti"
-          ],
-          [
-           "What is the maximum length in meters of #9 in the "
-          ],
-          [
-           "The photograph in the Whitney Museum of American A"
-          ],
-          [
-           "What integer-rounded percentage of the total lengt"
-          ],
-          [
-           "Each cell in the attached spreadsheet represents a"
-          ],
-          [
-           "¬(A ∧ B) ↔ (¬A ∨ ¬B)\n¬(A ∨ B) ↔ (¬A ∧ ¬B)\n(A → B) "
-          ],
-          [
-           "My family reunion is this week, and I was assigned"
-          ],
-          [
-           "What is the minimum number of page links a person "
-          ],
-          [
-           "How many High Energy Physics - Lattice articles li"
-          ],
-          [
-           "Which of the text elements under CATEGORIES in the"
-          ],
-          [
-           "Under DDC 633 on Bielefeld University Library's BA"
-          ],
-          [
-           "What animals that were mentioned in both Ilias Lag"
-          ],
-          [
-           "I went to Virtue restaurant & bar in Chicago for m"
-          ],
-          [
-           "In the 2018 VSCode blog post on replit.com, what w"
-          ],
-          [
-           "It is 1999. Before you party like it is 1999, plea"
-          ],
-          [
-           "In the NCATS PubChem compound database for Food Ad"
-          ],
-          [
-           "Compute the check digit the Tropicos ID for the Or"
-          ],
-          [
-           "Could you help me out with this assignment? Our pr"
-          ],
-          [
-           "In the fictional language of Tizin, basic sentence"
-          ],
-          [
-           "The Metropolitan Museum of Art has a portrait in i"
-          ],
-          [
-           "The attached file contains a list of vendors in th"
-          ],
-          [
-           "How many applicants for the job in the PDF are onl"
-          ],
-          [
-           "Review the chess position provided in the image. I"
-          ],
-          [
-           "In the year 2022, and before December, what does \""
-          ],
-          [
-           "In Nature journal's Scientific Reports conference "
-          ],
-          [
-           "What time was the Tri-Rail train that carried the "
-          ],
-          [
-           "In Valentina Re’s contribution to the 2017 book “W"
-          ],
-          [
-           "What is the average number of pre-2020 works on th"
-          ],
-          [
-           "Given this table defining * on the set S = {a, b, "
-          ],
-          [
-           "Who nominated the only Featured Article on English"
-          ],
-          [
-           "According to Google Finance, when was the first ye"
-          ],
-          [
-           "According to Box Office Mojo's 2020 Worldwide Box "
-          ],
-          [
-           "What writer is quoted by Merriam-Webster for the W"
-          ],
-          [
-           "The following numbers function similarly to ISBN 1"
-          ],
-          [
-           "How many pages if the 2023 IPCC report (85 pages v"
-          ],
-          [
-           "As a comma separated list with no whitespace, usin"
-          ],
-          [
-           "What is the volume in milliliters of a system comp"
-          ],
-          [
-           "In Emily Midkiff's June 2014 article in a journal "
-          ],
-          [
-           "The attached file shows a list of books in the col"
-          ],
-          [
-           "Using bass clef notes, what is the age of someone "
-          ],
-          [
-           "On a leap day before the year 2008, a joke was rem"
-          ],
-          [
-           "The Latin root of the Yola word \"gimlie\" shares a "
-          ],
-          [
-           "The attached file lists accommodations in the reso"
-          ],
-          [
-           "Find the value of x to the nearest tenth: Lx = (d/"
-          ],
-          [
-           "On July 15, 2008, Phys.org published an article ab"
-          ],
-          [
-           "I was trying to remember how well the Cheater Beat"
-          ],
-          [
-           "If there is anything that doesn't make sense in th"
-          ],
-          [
-           "You are a telecommunications engineer who wants to"
-          ],
-          [
-           "In the NIH translation of the original 1913 Michae"
-          ],
-          [
-           "In the endnote found in the second-to-last paragra"
-          ],
-          [
-           "How many slides in this PowerPoint presentation me"
-          ],
-          [
-           "Given $x_0 = -5$ and $f(x) = x^3 + 4x^2 - 3x + 8$,"
-          ],
-          [
-           "As of the 2020 census, what was the population dif"
-          ],
-          [
-           "You are Van Helsing, a renowned vampire hunter. A "
-          ],
-          [
-           "Examine the video at https://www.youtube.com/watch"
-          ],
-          [
-           "The attached file shows the locomotives in the col"
-          ],
-          [
-           "This is a secret message my friend gave me. It say"
-          ],
-          [
-           "According to wikipedia, how many Asian countries s"
-          ],
-          [
-           "What is the area of the green polygon in the attac"
-          ],
-          [
-           "Who composed the song that was performed by a roos"
-          ],
-          [
-           "The attached spreadsheet contains the sales of men"
-          ],
-          [
-           "How many edits were made to the Wikipedia page on "
-          ],
-          [
-           "How many nonindigenous crocodiles were found in Fl"
-          ],
-          [
-           "What percentage of the total penguin population ac"
-          ],
-          [
-           "The work referenced in footnote 397 of Federico La"
-          ],
-          [
-           "I was referencing each of the tables in the file f"
-          ],
-          [
-           "I'm making a grocery list for my mom, but she's a "
-          ],
-          [
-           "I’m thinking about selling my home, so I want to l"
-          ],
-          [
-           "How many images are there in the latest 2022 Lego "
-          ],
-          [
-           "According to the World Bank, which countries had g"
-          ],
-          [
-           "What is the last word before the second chorus of "
-          ],
-          [
-           "Look at the attached image. The quiz is scored as "
-          ],
-          [
-           "The attached image contains a Python script. Run t"
-          ],
-          [
-           "I have the Standard plan in the image below, and I"
-          ],
-          [
-           "Hi, I'm making a pie but I could use some help wit"
-          ],
-          [
-           "On ScienceDirect, what is the difference to 3 deci"
-          ],
-          [
-           "The attached PDF lists accommodations in the resor"
-          ],
-          [
-           "The year is 2022. I am at the National Air and Spa"
-          ],
-          [
-           "I thought we could try a fun word puzzle together "
-          ],
-          [
-           "How many times was a Twitter/X post cited as a ref"
-          ],
-          [
-           "What is the surname of the equine veterinarian men"
-          ],
-          [
-           "Which of the fruits shown in the 2008 painting \"Em"
-          ],
-          [
-           "It's May 2023, and I'm about to drive across the U"
-          ],
-          [
-           "What is the latest chronological year date written"
-          ],
-          [
-           "Who did the actor who played Ray in the Polish-lan"
-          ],
-          [
-           "You are given this Excel file as a map. You start "
-          ],
-          [
-           "What is the final numeric output from the attached"
-          ],
-          [
-           "This spreadsheet contains a list of clients for a "
-          ],
-          [
-           "How many more blocks (also denoted as layers) in B"
-          ],
-          [
-           "On the DeepFruits fruit detection graph on Connect"
-          ],
-          [
-           "The YouTube channel Game Grumps began a Let’s Play"
-          ],
-          [
-           "The book with the doi 10.1353/book.24372 concerns "
-          ],
-          [
-           "In the Scikit-Learn July 2017 changelog, what othe"
-          ],
-          [
-           "On the BBC Earth YouTube video of the Top 5 Sillie"
-          ],
-          [
-           "The longest-lived vertebrate is named after an isl"
-          ],
-          [
-           "During the first week of August 2015, one of the N"
-          ],
-          [
-           "All of the individuals who formally held the posit"
-          ],
-          [
-           "Pull out the sentence in the following 5x7 block o"
-          ],
-          [
-           "Of the cities within the United States where U.S. "
-          ],
-          [
-           "Consider the following symbols: 𒐜  𒐐𒐚\n\nThis is a n"
-          ],
-          [
-           "On Cornell Law School website's legal information "
-          ],
-          [
-           "According to Girls Who Code, how long did it take "
-          ],
-          [
-           "What was the complete title of the book in which t"
-          ],
-          [
-           "As of August 2023, who is the only winner of the U"
-          ],
-          [
-           "Eva Draconis has a personal website which can be a"
-          ],
-          [
-           "The cover of the August 2021 issue of Vogue shows "
-          ],
-          [
-           "The attached spreadsheet lists the locomotives own"
-          ],
-          [
-           "Bob was invited to participate in a game show, and"
-          ],
-          [
-           "The attached spreadsheet contains a list of books "
-          ],
-          [
-           "How many at bats did the Yankee with the most walk"
-          ],
-          [
-           "The attached file lists the locomotives owned by a"
-          ],
-          [
-           "Hi, I was out sick from my classes on Friday, so I"
-          ],
-          [
-           "A 5-man group made up of one tank, one healer, and"
-          ],
-          [
-           "What is the absolute difference in tens of thousan"
-          ],
-          [
-           "According to the USGS, in what year was the Americ"
-          ],
-          [
-           "In Audre Lorde’s poem “Father Son and Holy Ghost”,"
-          ],
-          [
-           "If this whole pint is made up of ice cream, how ma"
-          ],
-          [
-           "I'd like to learn more about some popular reality "
-          ],
-          [
-           "Take the gender split from the 2011 Bulgarian cens"
-          ],
-          [
-           "The brand that makes these harnesses the dogs are "
-          ],
-          [
-           "According to Openreview.net, at the NeurIPS 2022 C"
-          ],
-          [
-           "What was the actual enrollment count of the clinic"
-          ],
-          [
-           "A standard Rubik’s cube has been broken into cubes"
-          ],
-          [
-           "On June 6, 2023, an article by Carolyn Collins Pet"
-          ],
-          [
-           "Where were the Vietnamese specimens described by K"
-          ],
-          [
-           "Who are the pitchers with the number before and af"
-          ],
-          [
-           "The attached Excel file contains the sales of menu"
-          ],
-          [
-           "When was a picture of St. Thomas Aquinas first add"
-          ],
-          [
-           "I'm curious about how much information is availabl"
-          ],
-          [
-           "In NASA's Astronomy Picture of the Day on 2006 Jan"
-          ],
-          [
-           "What is the first name of the only Malko Competiti"
-          ],
-          [
-           "What country had the least number of athletes at t"
-          ],
-          [
-           "In the film Goldfinger, what color was the object "
-          ],
-          [
-           "As of May 2023, how many stops are between South S"
-          ],
-          [
-           "I read a paper about multiwavelength observations "
-          ],
-          [
-           "In the YouTube 360 VR video from March 2018 narrat"
-          ],
-          [
-           "In the 2015 Metropolitan Museum of Art exhibition "
-          ],
-          [
-           "At the two-minute mark in the YouTube video upload"
-          ]
-         ],
-         "hovertemplate": "agent_name=code_o1_03_february_text_high-reasoning-effort<br>index=%{x}<br>is_correct=%{y}<br>question=%{customdata[0]}<extra></extra>",
-         "legendgroup": "code_o1_03_february_text_high-reasoning-effort",
-         "line": {
-          "color": "#EF553B",
-          "dash": "solid"
-         },
-         "marker": {
-          "symbol": "circle"
-         },
-         "mode": "lines",
-         "name": "code_o1_03_february_text_high-reasoning-effort",
-         "showlegend": true,
-         "type": "scattergl",
-         "x": {
-          "bdata": "AAABAAIAAwAEAAUABgAHAAgACQAKAAsADAANAA4ADwAQABEAEgATABQAFQAWABcAGAAZABoAGwAcAB0AHgAfACAAIQAiACMAJAAlACYAJwAoACkAKgArACwALQAuAC8AMAAxADIAMwA0ADUANgA3ADgAOQA6ADsAPAA9AD4APwBAAEEAQgBDAEQARQBGAEcASABJAEoASwBMAE0ATgBPAFAAUQBSAFMAVABVAFYAVwBYAFkAWgBbAFwAXQBeAF8AYABhAGIAYwBkAGUAZgBnAGgAaQBqAGsAbABtAG4AbwBwAHEAcgBzAHQAdQB2AHcAeAB5AHoAewB8AH0AfgB/AIAAgQCCAIMAhACFAIYAhwCIAIkAigCLAIwAjQCOAI8AkACRAJIAkwCUAJUAlgCXAJgAmQCaAJsAnACdAJ4AnwCgAKEAogCjAKQA",
-          "dtype": "i2"
-         },
-         "xaxis": "x",
-         "y": {
-          "bdata": "AAAAAAAA8D8AAAAAAADgP1VVVVVVVdU/AAAAAAAA4D+amZmZmZnZPwAAAAAAAOA/27Zt27Zt2z8AAAAAAADYPxzHcRzHcdw/AAAAAAAA4D900UUXXXThPwAAAAAAAOA/sRM7sRM74T8AAAAAAADgP97d3d3d3d0/AAAAAAAA3D8eHh4eHh7ePxzHcRzHcdw/KK+hvIby2j+amZmZmZnZP9u2bdu2bds/L7rooosu2j+96U1vetPbP6uqqqqqqto/mpmZmZmZ2T/ZiZ3YiZ3YPy+hvYT2Eto/27Zt27Zt2z9huacRlnvaP5qZmZmZmdk/xhhjjDHG2D8AAAAAAADaPyebbLLJJts/PDw8PDw83D8d1EEd1EHdPxzHcRzHcdw/0LrBFPms2z8or6G8hvLaPxqkQRqkQdo/mpmZmZmZ2T+J2pWoXYnaP9u2bdu2bds/s6asKWvK2j+jiy666KLbP1uwBVuwBds/velNb3rT2z9t1Hc26jvbPwAAAAAAANw/27Zt27Zt2z/hehSuR+HaP5ybm5ubm9s/O7ETO7ET2z8KxlvZ55DaPya0l9BeQts/w9o3rH3D2j/btm3btm3bPx/BfQT3Edw/GmG5pxGW2z8EDSd1Xx7bP7y7u7u7u9s/Q7CONu9T3D/nnHPOOefcPxzHcRzHcdw/AAAAAAAA3D/dyI3cyI3cPx988MEHH9w/NSbSA5Wz2z9LS0tLS0vbP64dmGsH5to/27Zt27Zt2z8yfrvUk/HbPxzHcRzHcdw/4MCBAwcO3D/QusEU+azbPylcj8L1KNw/oryG8hrK2z/5CXFWfkLcP33Lt3zLt9w/VDqyZyAq3T/NzMzMzMzcP2t+WKQMPN0/qV2J2pWo3T/3kMuKgRLeP27btm3btt0/Hh4eHh4e3j9xR9wRd8TdPyCT4gUyKd4/jC666KKL3j/vda973evePz/pkz7pk94/3uM93uM93j/f9KY3vendP5dddtlll90/eDbqOxv13T9G2rECYaTdPwAAAAAAAN4/3ixPItOw3T+Dl1PwcgrePw2JeTtDYt4/uB6F61G43j/IXT9brAzfP19fX19fX98/FEgl52UR3z8ndmIndmLfPyD7sR/7sd8/OqQmGG9l3z83+4VYURrfPwntJbSX0N4/hOjxXTiI3j/WvmHtG9beP/DolbH9jt4/kiRJkiRJ3j9bWOmphZXePwnuI7iP4N4/6k1vetOb3j9HWO5phOXePx/qoR7qod4/VwQNJ3Vf3j9fzKdezKfeP2ZmZmZmZt4/4MYyKBUm3j+KS4ZgHW3ePy6e3OLJLd4/dM4555xz3j+4HoXrUbjeP57neZ7ned4/j8fj8Xg83j8AAAAAAADeP3FH3BF3xN0/ntiJndiJ3T9Ux97aMM3dP5NNNtlkk90/Wt1pdafV3T+K9EDl7BbeP97d3d3d3d0/Hh4eHh4e3j+kaIg/bl3eP+JnlPgZJd4/le1dB3Rj3j++4iu+4iveP3rxJxJOad4/u9ST8dul3j+qz7Q1/m7eP47jOI7jON4/Y0jnLYZ03j/16tWrV6/eP7o3oExc6d4/PusGU+Sz3j8uEZ4UJH/eP7gehetRuN4/+MJ74b3w3j+H8hrKayjfP59J9J5J9N4/4qz8hDgr3z/43nvvvffeP0/sxE7sxN4/EjlBuBv73j9hfleLmzDfPzqkJhhvZd8/mpmZmZmZ3z+P5g82Hs3fP1ikDDzdmt8/sxpFHDpp3z84H4PzMTjfPwgffPDBB98/",
-          "dtype": "f8"
-         },
-         "yaxis": "y"
-        },
-        {
-         "customdata": [
-          [
-           "The attached spreadsheet shows the inventory for a"
-          ],
-          [
-           "An office held a Secret Santa gift exchange where "
-          ],
-          [
-           "In April of 1977, who was the Prime Minister of th"
-          ],
-          [
-           "Use density measures from the chemistry materials "
-          ],
-          [
-           "In Unlambda, what exact charcter or text needs to "
-          ],
-          [
-           "Using the Biopython library in Python, parse the P"
-          ],
-          [
-           "If Eliud Kipchoge could maintain his record-making"
-          ],
-          [
-           "What was the volume in m^3 of the fish bag that wa"
-          ],
-          [
-           "The photograph in the Whitney Museum of American A"
-          ],
-          [
-           "What are the EC numbers of the two most commonly u"
-          ],
-          [
-           "In terms of geographical distance between capital "
-          ],
-          [
-           "Of the authors (First M. Last) that worked on the "
-          ],
-          [
-           "What two-word type of model did Manash Pratim Kash"
-          ],
-          [
-           "¬(A ∧ B) ↔ (¬A ∨ ¬B)\n¬(A ∨ B) ↔ (¬A ∧ ¬B)\n(A → B) "
-          ],
-          [
-           "When you take the average of the standard populati"
-          ],
-          [
-           "In the video https://www.youtube.com/watch?v=L1vXC"
-          ],
-          [
-           "I need to fact-check a citation. This is the citat"
-          ],
-          [
-           "What is the minimum number of page links a person "
-          ],
-          [
-           ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti"
-          ],
-          [
-           "A paper about AI regulation that was originally su"
-          ],
-          [
-           "In Series 9, Episode 11 of Doctor Who, the Doctor "
-          ],
-          [
-           "My family reunion is this week, and I was assigned"
-          ],
-          [
-           "According to github, when was Regression added to "
-          ],
-          [
-           "What is the maximum length in meters of #9 in the "
-          ],
-          [
-           "Could you help me out with this assignment? Our pr"
-          ],
-          [
-           "Here's a fun riddle that I think you'll enjoy.\n\nYo"
-          ],
-          [
-           "How many applicants for the job in the PDF are onl"
-          ],
-          [
-           "If we assume all articles published by Nature in 2"
-          ],
-          [
-           "How many studio albums were published by Mercedes "
-          ],
-          [
-           "In the fictional language of Tizin, basic sentence"
-          ],
-          [
-           "What animals that were mentioned in both Ilias Lag"
-          ],
-          [
-           "In the year 2022, and before December, what does \""
-          ],
-          [
-           "Under DDC 633 on Bielefeld University Library's BA"
-          ],
-          [
-           "The attached file contains a list of vendors in th"
-          ],
-          [
-           "Given this table defining * on the set S = {a, b, "
-          ],
-          [
-           "Assuming scientists in the famous youtube video Th"
-          ],
-          [
-           "The object in the British Museum's collection with"
-          ],
-          [
-           "The Metropolitan Museum of Art has a portrait in i"
-          ],
-          [
-           "According to Box Office Mojo's 2020 Worldwide Box "
-          ],
-          [
-           "In Valentina Re’s contribution to the 2017 book “W"
-          ],
-          [
-           "The following numbers function similarly to ISBN 1"
-          ],
-          [
-           "Which of the text elements under CATEGORIES in the"
-          ],
-          [
-           "I’m researching species that became invasive after"
-          ],
-          [
-           "Compute the check digit the Tropicos ID for the Or"
-          ],
-          [
-           "In Emily Midkiff's June 2014 article in a journal "
-          ],
-          [
-           "How many High Energy Physics - Lattice articles li"
-          ],
-          [
-           "Using bass clef notes, what is the age of someone "
-          ],
-          [
-           "Review the chess position provided in the image. I"
-          ],
-          [
-           "I was trying to remember how well the Cheater Beat"
-          ],
-          [
-           "The attached file shows a list of books in the col"
-          ],
-          [
-           "What is the volume in milliliters of a system comp"
-          ],
-          [
-           "The attached file lists accommodations in the reso"
-          ],
-          [
-           "How many slides in this PowerPoint presentation me"
-          ],
-          [
-           "Given $x_0 = -5$ and $f(x) = x^3 + 4x^2 - 3x + 8$,"
-          ],
-          [
-           "As of the 2020 census, what was the population dif"
-          ],
-          [
-           "It is 1999. Before you party like it is 1999, plea"
-          ],
-          [
-           "Each cell in the attached spreadsheet represents a"
-          ],
-          [
-           "Find the value of x to the nearest tenth: Lx = (d/"
-          ],
-          [
-           "If there is anything that doesn't make sense in th"
-          ],
-          [
-           "As a comma separated list with no whitespace, usin"
-          ],
-          [
-           "You are Van Helsing, a renowned vampire hunter. A "
-          ],
-          [
-           "The attached file shows the locomotives in the col"
-          ],
-          [
-           "This is a secret message my friend gave me. It say"
-          ],
-          [
-           "According to wikipedia, how many Asian countries s"
-          ],
-          [
-           "Who composed the song that was performed by a roos"
-          ],
-          [
-           "You are given this Excel file as a map. You start "
-          ],
-          [
-           "What is the area of the green polygon in the attac"
-          ],
-          [
-           "You are a telecommunications engineer who wants to"
-          ],
-          [
-           "Who nominated the only Featured Article on English"
-          ],
-          [
-           "What writer is quoted by Merriam-Webster for the W"
-          ],
-          [
-           "The attached spreadsheet contains the sales of men"
-          ],
-          [
-           "What is the last word before the second chorus of "
-          ],
-          [
-           "Examine the video at https://www.youtube.com/watch"
-          ],
-          [
-           "I'm making a grocery list for my mom, but she's a "
-          ],
-          [
-           "In the NIH translation of the original 1913 Michae"
-          ],
-          [
-           "Look at the attached image. The quiz is scored as "
-          ],
-          [
-           "Hi, I'm making a pie but I could use some help wit"
-          ],
-          [
-           "According to Google Finance, when was the first ye"
-          ],
-          [
-           "On July 15, 2008, Phys.org published an article ab"
-          ],
-          [
-           "The attached image contains a Python script. Run t"
-          ],
-          [
-           "How many times was a Twitter/X post cited as a ref"
-          ],
-          [
-           "I have the Standard plan in the image below, and I"
-          ],
-          [
-           "On ScienceDirect, what is the difference to 3 deci"
-          ],
-          [
-           "I’m thinking about selling my home, so I want to l"
-          ],
-          [
-           "In the 2018 VSCode blog post on replit.com, what w"
-          ],
-          [
-           "The year is 2022. I am at the National Air and Spa"
-          ],
-          [
-           "This spreadsheet contains a list of clients for a "
-          ],
-          [
-           "The attached PDF lists accommodations in the resor"
-          ],
-          [
-           "I went to Virtue restaurant & bar in Chicago for m"
-          ],
-          [
-           "On the DeepFruits fruit detection graph on Connect"
-          ],
-          [
-           "What time was the Tri-Rail train that carried the "
-          ],
-          [
-           "Which contributor to the version of OpenCV where s"
-          ],
-          [
-           "How many edits were made to the Wikipedia page on "
-          ],
-          [
-           "What is the final numeric output from the attached"
-          ],
-          [
-           "It's May 2023, and I'm about to drive across the U"
-          ],
-          [
-           "In Nature journal's Scientific Reports conference "
-          ],
-          [
-           "What percentage of the total penguin population ac"
-          ],
-          [
-           "The longest-lived vertebrate is named after an isl"
-          ],
-          [
-           "In the NCATS PubChem compound database for Food Ad"
-          ],
-          [
-           "In the Scikit-Learn July 2017 changelog, what othe"
-          ],
-          [
-           "The Latin root of the Yola word \"gimlie\" shares a "
-          ],
-          [
-           "On the BBC Earth YouTube video of the Top 5 Sillie"
-          ],
-          [
-           "Pull out the sentence in the following 5x7 block o"
-          ],
-          [
-           "Bob was invited to participate in a game show, and"
-          ],
-          [
-           "All of the individuals who formally held the posit"
-          ],
-          [
-           "What is the surname of the equine veterinarian men"
-          ],
-          [
-           "On Cornell Law School website's legal information "
-          ],
-          [
-           "How many pages if the 2023 IPCC report (85 pages v"
-          ],
-          [
-           "The work referenced in footnote 397 of Federico La"
-          ],
-          [
-           "I was referencing each of the tables in the file f"
-          ],
-          [
-           "What integer-rounded percentage of the total lengt"
-          ],
-          [
-           "How many more blocks (also denoted as layers) in B"
-          ],
-          [
-           "The attached spreadsheet lists the locomotives own"
-          ],
-          [
-           "On a leap day before the year 2008, a joke was rem"
-          ],
-          [
-           "The attached file lists the locomotives owned by a"
-          ],
-          [
-           "The YouTube channel Game Grumps began a Let’s Play"
-          ],
-          [
-           "Hi, I was out sick from my classes on Friday, so I"
-          ],
-          [
-           "What's the last line of the rhyme under the flavor"
-          ],
-          [
-           "I'm curious about how much information is availabl"
-          ],
-          [
-           "I'd like to learn more about some popular reality "
-          ],
-          [
-           "If this whole pint is made up of ice cream, how ma"
-          ],
-          [
-           "The cover of the August 2021 issue of Vogue shows "
-          ],
-          [
-           "Eva Draconis has a personal website which can be a"
-          ],
-          [
-           "A 5-man group made up of one tank, one healer, and"
-          ],
-          [
-           "How many nonindigenous crocodiles were found in Fl"
-          ],
-          [
-           "What was the complete title of the book in which t"
-          ],
-          [
-           "In Audre Lorde’s poem “Father Son and Holy Ghost”,"
-          ],
-          [
-           "According to Girls Who Code, how long did it take "
-          ],
-          [
-           "Take the gender split from the 2011 Bulgarian cens"
-          ],
-          [
-           "Of the cities within the United States where U.S. "
-          ],
-          [
-           "Where were the Vietnamese specimens described by K"
-          ],
-          [
-           "How many images are there in the latest 2022 Lego "
-          ],
-          [
-           "What is the absolute difference in tens of thousan"
-          ],
-          [
-           "Which of the fruits shown in the 2008 painting \"Em"
-          ],
-          [
-           "A standard Rubik’s cube has been broken into cubes"
-          ],
-          [
-           "Who did the actor who played Ray in the Polish-lan"
-          ],
-          [
-           "According to the USGS, in what year was the Americ"
-          ],
-          [
-           "What was the actual enrollment count of the clinic"
-          ],
-          [
-           "How many at bats did the Yankee with the most walk"
-          ],
-          [
-           "The brand that makes these harnesses the dogs are "
-          ],
-          [
-           "According to Openreview.net, at the NeurIPS 2022 C"
-          ],
-          [
-           "Who are the pitchers with the number before and af"
-          ],
-          [
-           "What country had the least number of athletes at t"
-          ],
-          [
-           "As of August 2023, who is the only winner of the U"
-          ],
-          [
-           "The attached Excel file contains the sales of menu"
-          ],
-          [
-           "In the 2015 Metropolitan Museum of Art exhibition "
-          ],
-          [
-           "When was a picture of St. Thomas Aquinas first add"
-          ],
-          [
-           "What is the first name of the only Malko Competiti"
-          ],
-          [
-           "The attached spreadsheet contains a list of books "
-          ],
-          [
-           "In the YouTube 360 VR video from March 2018 narrat"
-          ],
-          [
-           "In the endnote found in the second-to-last paragra"
-          ],
-          [
-           "In NASA's Astronomy Picture of the Day on 2006 Jan"
-          ],
-          [
-           "In the film Goldfinger, what color was the object "
-          ],
-          [
-           "What is the latest chronological year date written"
-          ],
-          [
-           "Consider the following symbols: 𒐜  𒐐𒐚\n\nThis is a n"
-          ],
-          [
-           "On June 6, 2023, an article by Carolyn Collins Pet"
-          ],
-          [
-           "As of May 2023, how many stops are between South S"
-          ],
-          [
-           "The book with the doi 10.1353/book.24372 concerns "
-          ],
-          [
-           "I read a paper about multiwavelength observations "
-          ],
-          [
-           "During the first week of August 2015, one of the N"
-          ],
-          [
-           "At the two-minute mark in the YouTube video upload"
-          ],
-          [
-           "What is the average number of pre-2020 works on th"
-          ]
-         ],
-         "hovertemplate": "agent_name=code_o1_04_february_submission<br>index=%{x}<br>is_correct=%{y}<br>question=%{customdata[0]}<extra></extra>",
-         "legendgroup": "code_o1_04_february_submission",
-         "line": {
-          "color": "#00cc96",
-          "dash": "solid"
-         },
-         "marker": {
-          "symbol": "circle"
-         },
-         "mode": "lines",
-         "name": "code_o1_04_february_submission",
-         "showlegend": true,
-         "type": "scattergl",
-         "x": {
-          "bdata": "AAABAAIAAwAEAAUABgAHAAgACQAKAAsADAANAA4ADwAQABEAEgATABQAFQAWABcAGAAZABoAGwAcAB0AHgAfACAAIQAiACMAJAAlACYAJwAoACkAKgArACwALQAuAC8AMAAxADIAMwA0ADUANgA3ADgAOQA6ADsAPAA9AD4APwBAAEEAQgBDAEQARQBGAEcASABJAEoASwBMAE0ATgBPAFAAUQBSAFMAVABVAFYAVwBYAFkAWgBbAFwAXQBeAF8AYABhAGIAYwBkAGUAZgBnAGgAaQBqAGsAbABtAG4AbwBwAHEAcgBzAHQAdQB2AHcAeAB5AHoAewB8AH0AfgB/AIAAgQCCAIMAhACFAIYAhwCIAIkAigCLAIwAjQCOAI8AkACRAJIAkwCUAJUAlgCXAJgAmQCaAJsAnACdAJ4AnwCgAKEA",
-          "dtype": "i2"
-         },
-         "xaxis": "x",
-         "y": {
-          "bdata": "AAAAAAAA8D8AAAAAAADwPwAAAAAAAPA/AAAAAAAA8D+amZmZmZnpP1VVVVVVVeU/kiRJkiRJ4j8AAAAAAADgPxzHcRzHcdw/AAAAAAAA4D900UUXXXThPwAAAAAAAOA/sRM7sRM74T+SJEmSJEniPxEREREREeE/AAAAAAAA4j/T0tLS0tLiP+Q4juM4juM/XkN5DeU15D/NzMzMzMzkP/Q8z/M8z+M/XXTRRRdd5D84velNb3rjP6uqqqqqquI/7FG4HoXr4T+xEzuxEzvhP3Icx3Ecx+E/kiRJkiRJ4j9PIyz3NMLiPzMzMzMzM+M/lVJKKaWU4j8AAAAAAADjP22yySabbOI/09LS0tLS4j8zMzMzMzPjP6uqqqqqquI/bzBFPusG4z9sKK+hvIbiP9IgDdIgDeI/ZmZmZmZm4j+7ErUrUbviP5IkSZIkSeI/p6wpa8qa4j/poosuuujiP9InfdInfeI/IQtZyEIW4j9HfWejvrPhP1VVVVVVVeE/PzTWh8b64D9I4XoUrkfhP/Hw8PDw8OA/2Ymd2Imd4D+pCcZb2efgP3sJ7SW0l+A/SpCnBHlK4D8AAAAAAADgP34E9xHcR+A/3dMIyz2N4D+c1H15bEXgPwAAAAAAAOA/afM+xSVD4D+EEEIIIYTgPxAEQRAEQeA/AAAAAACA4D/RC73QC73gP3zwwQcffOA/b+FXYyI94D94eHh4eHjgPwtZyEIWsuA/oQ7qoA7q4D8OJFphcyDhP1VVVVVVVeE/jBgxYsSI4T/CFPmsG0zhPxEREREREeE/eQ3lNZTX4D9WfkKclZ/gP5AGaZAGaeA/N2F+V4ub4D/NzMzMzMzgP3sJ7SW0l+A/yOB8DM7H4D+2h1xWDJTgPxiGYRiGYeA/kZCQkJCQ4D8w6Av6gr7gP93TCMs9jeA/uuiiiy664D8Oc5jDHObgP2ELtmALtuA/cQiHcAiH4D+GLGQhC1ngPyywwAILLOA/QUyuICZX4D8WCCPtWIHgP1VVVVVVVeA/8MXVDzoq4D8VvJyCl1PgP3+lQK1fKeA/AAAAAAAA4D+YdGoe5K7fP19fX19fX98/XG0MTXew3z8ndmIndmLfPyD7sR/7sd8/AAAAAAAA4D+9U9dycLPfPwAAAAAAAOA/TvvJEti03z9r37D2DWvfPyryWTeYIt8/27Zt27Zt3z8SePshgbffPwT3EdxHcN8//HVJ5cO43z8jLPc0wnLfP6D7uZ/7ud8/yFYEDSd13z/gKLvfKLvfPwAAAAAAAOA/jmVQKky83z8AAAAAAADgPyHQFAJNIeA/AAAAAAAA4D9YObTIdr7fP9/3fd/3fd8/0Ofz+Xw+3z8AAAAAAADfP9AX9AV9Qd8/P/ADP/AD3z9xQkqeZUTfPwgffPDBB98/c/TN0TdH3z9FeqBydgvfP5/0SZ/0Sd8/Dw8PDw8P3z/ZLKj2nEzfP/EzSvyMEt8/964DujFP3z9f8RVf8RXfP3usZeiA3d4/u9ST8dul3j8wS8oBkeHeP8dxHMdxHN8/Kmj1pYJW3z/58ePHjx/fP7o3oExc6d4/KvJZN5gi3z/L4ox2D1vfP5NfLPnFkt8//iZ/k7/J3z9DeQ3lNZTfPyB1yh91yt8/cVZ+QpyV3z/LX7L8JcvfPwAAAAAAAOA/S3r50xYa4D8AAAAAAADgP742Yl16zN8/AAAAAAAA4D+P5g82Hs3fP1ikDDzdmt8/",
-          "dtype": "f8"
-         },
-         "yaxis": "y"
-        },
-        {
-         "customdata": [
-          [
-           "Using the Biopython library in Python, parse the P"
-          ],
-          [
-           "Use density measures from the chemistry materials "
-          ],
-          [
-           "What are the EC numbers of the two most commonly u"
-          ],
-          [
-           "If Eliud Kipchoge could maintain his record-making"
-          ],
-          [
-           "In April of 1977, who was the Prime Minister of th"
-          ],
-          [
-           "Here's a fun riddle that I think you'll enjoy.\n\nYo"
-          ],
-          [
-           "In terms of geographical distance between capital "
-          ],
-          [
-           ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti"
-          ],
-          [
-           "When you take the average of the standard populati"
-          ],
-          [
-           "An office held a Secret Santa gift exchange where "
-          ],
-          [
-           "The object in the British Museum's collection with"
-          ],
-          [
-           "What is the minimum number of page links a person "
-          ],
-          [
-           "How many studio albums were published by Mercedes "
-          ],
-          [
-           "In the NCATS PubChem compound database for Food Ad"
-          ],
-          [
-           "The attached spreadsheet shows the inventory for a"
-          ],
-          [
-           "In Unlambda, what exact charcter or text needs to "
-          ],
-          [
-           "If we assume all articles published by Nature in 2"
-          ],
-          [
-           "In Series 9, Episode 11 of Doctor Who, the Doctor "
-          ],
-          [
-           "What was the volume in m^3 of the fish bag that wa"
-          ],
-          [
-           "It is 1999. Before you party like it is 1999, plea"
-          ],
-          [
-           "My family reunion is this week, and I was assigned"
-          ],
-          [
-           "In the fictional language of Tizin, basic sentence"
-          ],
-          [
-           "Of the authors (First M. Last) that worked on the "
-          ],
-          [
-           "In July 2, 1959 United States standards for grades"
-          ],
-          [
-           "¬(A ∧ B) ↔ (¬A ∨ ¬B)\n¬(A ∨ B) ↔ (¬A ∧ ¬B)\n(A → B) "
-          ],
-          [
-           "Under DDC 633 on Bielefeld University Library's BA"
-          ],
-          [
-           "A paper about AI regulation that was originally su"
-          ],
-          [
-           "I’m researching species that became invasive after"
-          ],
-          [
-           "Each cell in the attached spreadsheet represents a"
-          ],
-          [
-           "How many High Energy Physics - Lattice articles li"
-          ],
-          [
-           "I went to Virtue restaurant & bar in Chicago for m"
-          ],
-          [
-           "Could you help me out with this assignment? Our pr"
-          ],
-          [
-           "Assuming scientists in the famous youtube video Th"
-          ],
-          [
-           "In the video https://www.youtube.com/watch?v=L1vXC"
-          ],
-          [
-           "What is the maximum length in meters of #9 in the "
-          ],
-          [
-           "In Nature journal's Scientific Reports conference "
-          ],
-          [
-           "Which contributor to the version of OpenCV where s"
-          ],
-          [
-           "In the year 2022, and before December, what does \""
-          ],
-          [
-           "The attached file contains a list of vendors in th"
-          ],
-          [
-           "What two-word type of model did Manash Pratim Kash"
-          ],
-          [
-           "Given this table defining * on the set S = {a, b, "
-          ],
-          [
-           "What's the last line of the rhyme under the flavor"
-          ],
-          [
-           "How many applicants for the job in the PDF are onl"
-          ],
-          [
-           "Which of the text elements under CATEGORIES in the"
-          ],
-          [
-           "According to github, when was Regression added to "
-          ],
-          [
-           "What writer is quoted by Merriam-Webster for the W"
-          ],
-          [
-           "The following numbers function similarly to ISBN 1"
-          ],
-          [
-           "The photograph in the Whitney Museum of American A"
-          ],
-          [
-           "As a comma separated list with no whitespace, usin"
-          ],
-          [
-           "Find the value of x to the nearest tenth: Lx = (d/"
-          ],
-          [
-           "Compute the check digit the Tropicos ID for the Or"
-          ],
-          [
-           "The Metropolitan Museum of Art has a portrait in i"
-          ],
-          [
-           "In the 2018 VSCode blog post on replit.com, what w"
-          ],
-          [
-           "Using bass clef notes, what is the age of someone "
-          ],
-          [
-           "The attached file shows a list of books in the col"
-          ],
-          [
-           "In the NIH translation of the original 1913 Michae"
-          ],
-          [
-           "How many slides in this PowerPoint presentation me"
-          ],
-          [
-           "What animals that were mentioned in both Ilias Lag"
-          ],
-          [
-           "Given $x_0 = -5$ and $f(x) = x^3 + 4x^2 - 3x + 8$,"
-          ],
-          [
-           "The attached file lists accommodations in the reso"
-          ],
-          [
-           "On July 15, 2008, Phys.org published an article ab"
-          ],
-          [
-           "You are Van Helsing, a renowned vampire hunter. A "
-          ],
-          [
-           "In Emily Midkiff's June 2014 article in a journal "
-          ],
-          [
-           "According to Google Finance, when was the first ye"
-          ],
-          [
-           "What is the area of the green polygon in the attac"
-          ],
-          [
-           "Who composed the song that was performed by a roos"
-          ],
-          [
-           "Review the chess position provided in the image. I"
-          ],
-          [
-           "The attached file shows the locomotives in the col"
-          ],
-          [
-           "This is a secret message my friend gave me. It say"
-          ],
-          [
-           "You are a telecommunications engineer who wants to"
-          ],
-          [
-           "Examine the video at https://www.youtube.com/watch"
-          ],
-          [
-           "In Valentina Re’s contribution to the 2017 book “W"
-          ],
-          [
-           "According to Box Office Mojo's 2020 Worldwide Box "
-          ],
-          [
-           "You are given this Excel file as a map. You start "
-          ],
-          [
-           "I'm making a grocery list for my mom, but she's a "
-          ],
-          [
-           "The attached spreadsheet contains the sales of men"
-          ],
-          [
-           "According to wikipedia, how many Asian countries s"
-          ],
-          [
-           "On a leap day before the year 2008, a joke was rem"
-          ],
-          [
-           "What time was the Tri-Rail train that carried the "
-          ],
-          [
-           "What is the last word before the second chorus of "
-          ],
-          [
-           "What integer-rounded percentage of the total lengt"
-          ],
-          [
-           "How many nonindigenous crocodiles were found in Fl"
-          ],
-          [
-           "The Latin root of the Yola word \"gimlie\" shares a "
-          ],
-          [
-           "Look at the attached image. The quiz is scored as "
-          ],
-          [
-           "I was trying to remember how well the Cheater Beat"
-          ],
-          [
-           "Hi, I'm making a pie but I could use some help wit"
-          ],
-          [
-           "I was referencing each of the tables in the file f"
-          ],
-          [
-           "I need to fact-check a citation. This is the citat"
-          ],
-          [
-           "I have the Standard plan in the image below, and I"
-          ],
-          [
-           "I’m thinking about selling my home, so I want to l"
-          ],
-          [
-           "This spreadsheet contains a list of clients for a "
-          ],
-          [
-           "What is the volume in milliliters of a system comp"
-          ],
-          [
-           "As of the 2020 census, what was the population dif"
-          ],
-          [
-           "Who nominated the only Featured Article on English"
-          ],
-          [
-           "In the endnote found in the second-to-last paragra"
-          ],
-          [
-           "The year is 2022. I am at the National Air and Spa"
-          ],
-          [
-           "What is the final numeric output from the attached"
-          ],
-          [
-           "The attached PDF lists accommodations in the resor"
-          ],
-          [
-           "If there is anything that doesn't make sense in th"
-          ],
-          [
-           "How many times was a Twitter/X post cited as a ref"
-          ],
-          [
-           "It's May 2023, and I'm about to drive across the U"
-          ],
-          [
-           "Pull out the sentence in the following 5x7 block o"
-          ],
-          [
-           "On the BBC Earth YouTube video of the Top 5 Sillie"
-          ],
-          [
-           "In the Scikit-Learn July 2017 changelog, what othe"
-          ],
-          [
-           "Of the cities within the United States where U.S. "
-          ],
-          [
-           "How many edits were made to the Wikipedia page on "
-          ],
-          [
-           "How many pages if the 2023 IPCC report (85 pages v"
-          ],
-          [
-           "What is the surname of the equine veterinarian men"
-          ],
-          [
-           "Bob was invited to participate in a game show, and"
-          ],
-          [
-           "How many more blocks (also denoted as layers) in B"
-          ],
-          [
-           "During the first week of August 2015, one of the N"
-          ],
-          [
-           "On Cornell Law School website's legal information "
-          ],
-          [
-           "The attached image contains a Python script. Run t"
-          ],
-          [
-           "The attached spreadsheet lists the locomotives own"
-          ],
-          [
-           "The YouTube channel Game Grumps began a Let’s Play"
-          ],
-          [
-           "What was the complete title of the book in which t"
-          ],
-          [
-           "On the DeepFruits fruit detection graph on Connect"
-          ],
-          [
-           "The attached file lists the locomotives owned by a"
-          ],
-          [
-           "What percentage of the total penguin population ac"
-          ],
-          [
-           "On ScienceDirect, what is the difference to 3 deci"
-          ],
-          [
-           "How many images are there in the latest 2022 Lego "
-          ],
-          [
-           "Who did the actor who played Ray in the Polish-lan"
-          ],
-          [
-           "I'd like to learn more about some popular reality "
-          ],
-          [
-           "What is the absolute difference in tens of thousan"
-          ],
-          [
-           "In Audre Lorde’s poem “Father Son and Holy Ghost”,"
-          ]
-         ],
-         "hovertemplate": "agent_name=code_o1_04_february_submission-medium<br>index=%{x}<br>is_correct=%{y}<br>question=%{customdata[0]}<extra></extra>",
-         "legendgroup": "code_o1_04_february_submission-medium",
-         "line": {
-          "color": "#ab63fa",
-          "dash": "solid"
-         },
-         "marker": {
-          "symbol": "circle"
-         },
-         "mode": "lines",
-         "name": "code_o1_04_february_submission-medium",
-         "showlegend": true,
-         "type": "scattergl",
-         "x": {
-          "bdata": "AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3w=",
-          "dtype": "i1"
-         },
-         "xaxis": "x",
-         "y": {
-          "bdata": "AAAAAAAAAAAAAAAAAADgP1VVVVVVVeU/AAAAAAAA4D8zMzMzMzPjPwAAAAAAAOA/kiRJkiRJ4j8AAAAAAADkP3Icx3Ecx+E/MzMzMzMz4z9ddNFFF13kP1VVVVVVVeU/dmIndmIn5j8lSZIkSZLkP1VVVVVVVeU/AAAAAAAA5D/T0tLS0tLiP3Icx3Ecx+E/bCivobyG4j+amZmZmZnhP5IkSZIkSeI/6aKLLrro4j8hC1nIQhbiP1VVVVVVVeE/7FG4HoXr4T+xEzuxEzvhP3Icx3Ecx+E/kiRJkiRJ4j9PIyz3NMLiPyIiIiIiIuI/lVJKKaWU4j8AAAAAAADiP3TRRRdddOE/4uHh4eHh4T/xFV/xFV/hPzmO4ziO4+A/6wZT5LNu4D95DeU1lNfgP7ETO7ETO+E/zczMzMzM4D8sUbsStSvhPzEMwzAMw+A/R9wRd8Qd4T+66KKLLrrgP7AFW7AFW+A/C1nIQhay4D/E5ApicgXhP6uqqqqqquA/FbycgpdT4D+kcD0K16PgP/Hw8PDw8OA/2Ymd2Imd4D+pCcZb2efgP3sJ7SW0l+A/37D2DWvf4D8lSZIkSZLgP3kN5TWU1+A/3dMIyz2N4D+c1H15bEXgPwAAAAAAAOA/afM+xSVD4D+EEEIIIYTgPzEMwzAMw+A/AAAAAACA4D/wAz/wAz/gP3zwwQcffOA/b+FXYyI94D94eHh4eHjgPwRz7cBcO+A/UAd1UAd14D82BxKtsDngPxzHcRzHceA/ggMHDhw44D8AAAAAAADgP5NfLPnFkt8/AAAAAAAA4D/H1MDeMTXgPwAAAAAAAOA/Mb+rxU2Y3z8AAAAAAADgP1ikDDzdmt8/OB+D8zE43z+U8EZT59feP57neZ7ned4/Hh4eHh4e3j+hL+gL+oLePyCT4gUyKd4/jC666KKL3j/vda973evePz/pkz7pk94/3uM93uM93j/f9KY3vendP5dddtlll90/eDbqOxv13T9G2rECYaTdPwAAAAAAAN4/nkSmYbtZ3j+sD431obHePw2JeTtDYt4/FK5H4XoU3j8pMOnUPMjdPx4eHh4eHt4/zCI+gVRy3j92Yid2YifeP57neZ7ned4/dEhNMN7K3j83+4VYURrfP4X2EtpLaN8/6fFdOIge3z9r37D2DWvfP2P7Hb0ytt8/27Zt27Zt3z8SePshgbffPwAAAAAAAOA//HVJ5cO43z8jLPc0wnLfP9/yLd/yLd8/yFYEDSd13z+fejGfejHfP+/u7u7u7t4/xfuR03yt3j9cMgTraPPePzgfg/MxON8/fO+999573z8IrBxaZDvfPw==",
-          "dtype": "f8"
-         },
-         "yaxis": "y"
-        },
-        {
-         "customdata": [
-          [
-           "When you take the average of the standard populati"
-          ],
-          [
-           "In April of 1977, who was the Prime Minister of th"
-          ],
-          [
-           "Use density measures from the chemistry materials "
-          ],
-          [
-           "An office held a Secret Santa gift exchange where "
-          ],
-          [
-           "In Unlambda, what exact charcter or text needs to "
-          ],
-          [
-           ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti"
-          ],
-          [
-           "If Eliud Kipchoge could maintain his record-making"
-          ],
-          [
-           "What is the minimum number of page links a person "
-          ],
-          [
-           "I need to fact-check a citation. This is the citat"
-          ],
-          [
-           "What was the volume in m^3 of the fish bag that wa"
-          ],
-          [
-           "In July 2, 1959 United States standards for grades"
-          ],
-          [
-           "Using the Biopython library in Python, parse the P"
-          ],
-          [
-           "If we assume all articles published by Nature in 2"
-          ],
-          [
-           "According to github, when was Regression added to "
-          ],
-          [
-           "The attached spreadsheet shows the inventory for a"
-          ],
-          [
-           "¬(A ∧ B) ↔ (¬A ∨ ¬B)\n¬(A ∨ B) ↔ (¬A ∧ ¬B)\n(A → B) "
-          ],
-          [
-           "In Series 9, Episode 11 of Doctor Who, the Doctor "
-          ],
-          [
-           "Of the authors (First M. Last) that worked on the "
-          ],
-          [
-           "It is 1999. Before you party like it is 1999, plea"
-          ],
-          [
-           "I’m researching species that became invasive after"
-          ],
-          [
-           "What's the last line of the rhyme under the flavor"
-          ],
-          [
-           "The object in the British Museum's collection with"
-          ],
-          [
-           "What are the EC numbers of the two most commonly u"
-          ],
-          [
-           "In the video https://www.youtube.com/watch?v=L1vXC"
-          ],
-          [
-           "My family reunion is this week, and I was assigned"
-          ],
-          [
-           "The photograph in the Whitney Museum of American A"
-          ],
-          [
-           "In the NCATS PubChem compound database for Food Ad"
-          ],
-          [
-           "What two-word type of model did Manash Pratim Kash"
-          ],
-          [
-           "How many studio albums were published by Mercedes "
-          ],
-          [
-           "Each cell in the attached spreadsheet represents a"
-          ],
-          [
-           "In the fictional language of Tizin, basic sentence"
-          ],
-          [
-           "Under DDC 633 on Bielefeld University Library's BA"
-          ],
-          [
-           "What is the maximum length in meters of #9 in the "
-          ],
-          [
-           "The attached file contains a list of vendors in th"
-          ],
-          [
-           "In Emily Midkiff's June 2014 article in a journal "
-          ],
-          [
-           "Assuming scientists in the famous youtube video Th"
-          ],
-          [
-           "In the 2018 VSCode blog post on replit.com, what w"
-          ],
-          [
-           "What is the average number of pre-2020 works on th"
-          ],
-          [
-           "A paper about AI regulation that was originally su"
-          ],
-          [
-           "Given this table defining * on the set S = {a, b, "
-          ],
-          [
-           "In Valentina Re’s contribution to the 2017 book “W"
-          ],
-          [
-           "Could you help me out with this assignment? Our pr"
-          ],
-          [
-           "Review the chess position provided in the image. I"
-          ],
-          [
-           "What writer is quoted by Merriam-Webster for the W"
-          ],
-          [
-           "The following numbers function similarly to ISBN 1"
-          ],
-          [
-           "How many pages if the 2023 IPCC report (85 pages v"
-          ],
-          [
-           "As a comma separated list with no whitespace, usin"
-          ],
-          [
-           "In the year 2022, and before December, what does \""
-          ],
-          [
-           "Here's a fun riddle that I think you'll enjoy.\n\nYo"
-          ]
-         ],
-         "hovertemplate": "agent_name=code_o1_04_february_submission3<br>index=%{x}<br>is_correct=%{y}<br>question=%{customdata[0]}<extra></extra>",
-         "legendgroup": "code_o1_04_february_submission3",
-         "line": {
-          "color": "#FFA15A",
-          "dash": "solid"
-         },
-         "marker": {
-          "symbol": "circle"
-         },
-         "mode": "lines",
-         "name": "code_o1_04_february_submission3",
-         "showlegend": true,
-         "type": "scattergl",
-         "x": {
-          "bdata": "AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMA==",
-          "dtype": "i1"
-         },
-         "xaxis": "x",
-         "y": {
-          "bdata": "AAAAAAAAAAAAAAAAAADgP1VVVVVVVeU/AAAAAAAA6D8zMzMzMzPjP1VVVVVVVeU/kiRJkiRJ4j8AAAAAAADkP1VVVVVVVeU/ZmZmZmZm5j9ddNFFF13kP6uqqqqqquI/sRM7sRM74T8AAAAAAADgPxEREREREeE/AAAAAAAA4j/x8PDw8PDgPwAAAAAAAOA/DeU1lNdQ3j8AAAAAAADgP57neZ7ned4/AAAAAAAA4D/qTW9605vePwAAAAAAAOA/pHA9Ctej4D8AAAAAAADgPwntJbSX0N4/btu2bdu23T9HWO5phOXePwAAAAAAAOA/hBBCCCGE4D8AAAAAAADgPwgffPDBB98/AAAAAAAA4D9QB3VQB3XgPwAAAAAAAOA/6wZT5LNu4D8AAAAAAADgP5AGaZAGaeA/AAAAAAAA4D9kcD4G52PgPwAAAAAAAOA/0Bf0BX1B3z8AAAAAAADgP7AFW7AFW+A/AAAAAAAA4D99Z6O+s1HfPwAAAAAAAOA/1ofG+tBY3z8=",
-          "dtype": "f8"
-         },
-         "yaxis": "y"
-        },
-        {
-         "customdata": [
-          [
-           "In April of 1977, who was the Prime Minister of th"
-          ],
-          [
-           "Use density measures from the chemistry materials "
-          ],
-          [
-           "If Eliud Kipchoge could maintain his record-making"
-          ],
-          [
-           "When you take the average of the standard populati"
-          ],
-          [
-           "The object in the British Museum's collection with"
-          ],
-          [
-           ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti"
-          ]
-         ],
-         "hovertemplate": "agent_name=code_o1_04_february_submission4<br>index=%{x}<br>is_correct=%{y}<br>question=%{customdata[0]}<extra></extra>",
-         "legendgroup": "code_o1_04_february_submission4",
-         "line": {
-          "color": "#19d3f3",
-          "dash": "solid"
-         },
-         "marker": {
-          "symbol": "circle"
-         },
-         "mode": "lines",
-         "name": "code_o1_04_february_submission4",
-         "showlegend": true,
-         "type": "scattergl",
-         "x": {
-          "bdata": "AAECAwQF",
-          "dtype": "i1"
-         },
-         "xaxis": "x",
-         "y": {
-          "bdata": "AAAAAAAA8D8AAAAAAADwP1VVVVVVVeU/AAAAAAAA4D8zMzMzMzPjPwAAAAAAAOA/",
-          "dtype": "f8"
-         },
-         "yaxis": "y"
-        },
-        {
-         "customdata": [
-          [
-           "In April of 1977, who was the Prime Minister of th"
-          ],
-          [
-           "When you take the average of the standard populati"
-          ],
-          [
-           "An office held a Secret Santa gift exchange where "
-          ],
-          [
-           "In Unlambda, what exact charcter or text needs to "
-          ],
-          [
-           "If Eliud Kipchoge could maintain his record-making"
-          ],
-          [
-           ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti"
-          ],
-          [
-           "If we assume all articles published by Nature in 2"
-          ],
-          [
-           "Use density measures from the chemistry materials "
-          ],
-          [
-           "The attached spreadsheet shows the inventory for a"
-          ],
-          [
-           "In the video https://www.youtube.com/watch?v=L1vXC"
-          ],
-          [
-           "What was the volume in m^3 of the fish bag that wa"
-          ],
-          [
-           "¬(A ∧ B) ↔ (¬A ∨ ¬B)\n¬(A ∨ B) ↔ (¬A ∧ ¬B)\n(A → B) "
-          ],
-          [
-           "I need to fact-check a citation. This is the citat"
-          ],
-          [
-           "Of the authors (First M. Last) that worked on the "
-          ],
-          [
-           "How many studio albums were published by Mercedes "
-          ],
-          [
-           "Each cell in the attached spreadsheet represents a"
-          ],
-          [
-           "In terms of geographical distance between capital "
-          ],
-          [
-           "Using the Biopython library in Python, parse the P"
-          ],
-          [
-           "What is the minimum number of page links a person "
-          ],
-          [
-           "What are the EC numbers of the two most commonly u"
-          ],
-          [
-           "My family reunion is this week, and I was assigned"
-          ],
-          [
-           "What two-word type of model did Manash Pratim Kash"
-          ],
-          [
-           "What's the last line of the rhyme under the flavor"
-          ],
-          [
-           "Assuming scientists in the famous youtube video Th"
-          ],
-          [
-           "According to github, when was Regression added to "
-          ],
-          [
-           "Could you help me out with this assignment? Our pr"
-          ],
-          [
-           "Here's a fun riddle that I think you'll enjoy.\n\nYo"
-          ],
-          [
-           "Compute the check digit the Tropicos ID for the Or"
-          ],
-          [
-           "In the fictional language of Tizin, basic sentence"
-          ],
-          [
-           "Review the chess position provided in the image. I"
-          ],
-          [
-           "Which contributor to the version of OpenCV where s"
-          ],
-          [
-           "The attached file contains a list of vendors in th"
-          ],
-          [
-           "What is the maximum length in meters of #9 in the "
-          ],
-          [
-           "The object in the British Museum's collection with"
-          ],
-          [
-           "Under DDC 633 on Bielefeld University Library's BA"
-          ],
-          [
-           "What integer-rounded percentage of the total lengt"
-          ],
-          [
-           "In Valentina Re’s contribution to the 2017 book “W"
-          ],
-          [
-           "How many applicants for the job in the PDF are onl"
-          ],
-          [
-           "I went to Virtue restaurant & bar in Chicago for m"
-          ],
-          [
-           "In the 2018 VSCode blog post on replit.com, what w"
-          ],
-          [
-           "Given this table defining * on the set S = {a, b, "
-          ],
-          [
-           "In the NCATS PubChem compound database for Food Ad"
-          ],
-          [
-           "Who nominated the only Featured Article on English"
-          ],
-          [
-           "As a comma separated list with no whitespace, usin"
-          ],
-          [
-           "How many High Energy Physics - Lattice articles li"
-          ],
-          [
-           "On July 15, 2008, Phys.org published an article ab"
-          ],
-          [
-           "According to Box Office Mojo's 2020 Worldwide Box "
-          ],
-          [
-           "Find the value of x to the nearest tenth: Lx = (d/"
-          ],
-          [
-           "What writer is quoted by Merriam-Webster for the W"
-          ],
-          [
-           "In the year 2022, and before December, what does \""
-          ],
-          [
-           "The following numbers function similarly to ISBN 1"
-          ],
-          [
-           "I’m researching species that became invasive after"
-          ],
-          [
-           "In Emily Midkiff's June 2014 article in a journal "
-          ],
-          [
-           "How many slides in this PowerPoint presentation me"
-          ],
-          [
-           "Using bass clef notes, what is the age of someone "
-          ],
-          [
-           "The attached file lists accommodations in the reso"
-          ],
-          [
-           "In July 2, 1959 United States standards for grades"
-          ],
-          [
-           "You are a telecommunications engineer who wants to"
-          ],
-          [
-           "A paper about AI regulation that was originally su"
-          ],
-          [
-           "What animals that were mentioned in both Ilias Lag"
-          ],
-          [
-           "According to Google Finance, when was the first ye"
-          ],
-          [
-           "What time was the Tri-Rail train that carried the "
-          ],
-          [
-           "You are Van Helsing, a renowned vampire hunter. A "
-          ],
-          [
-           "In Nature journal's Scientific Reports conference "
-          ],
-          [
-           "If there is anything that doesn't make sense in th"
-          ],
-          [
-           "This is a secret message my friend gave me. It say"
-          ],
-          [
-           "It is 1999. Before you party like it is 1999, plea"
-          ],
-          [
-           "The attached file shows the locomotives in the col"
-          ],
-          [
-           "I was trying to remember how well the Cheater Beat"
-          ],
-          [
-           "The Latin root of the Yola word \"gimlie\" shares a "
-          ],
-          [
-           "In the NIH translation of the original 1913 Michae"
-          ],
-          [
-           "What is the volume in milliliters of a system comp"
-          ],
-          [
-           "The photograph in the Whitney Museum of American A"
-          ],
-          [
-           "The attached spreadsheet contains the sales of men"
-          ],
-          [
-           "The attached file shows a list of books in the col"
-          ],
-          [
-           "How many pages if the 2023 IPCC report (85 pages v"
-          ],
-          [
-           "Who composed the song that was performed by a roos"
-          ],
-          [
-           "Examine the video at https://www.youtube.com/watch"
-          ],
-          [
-           "Given $x_0 = -5$ and $f(x) = x^3 + 4x^2 - 3x + 8$,"
-          ],
-          [
-           "What is the last word before the second chorus of "
-          ],
-          [
-           "What is the area of the green polygon in the attac"
-          ],
-          [
-           "How many nonindigenous crocodiles were found in Fl"
-          ],
-          [
-           "The year is 2022. I am at the National Air and Spa"
-          ],
-          [
-           "You are given this Excel file as a map. You start "
-          ],
-          [
-           "I'm making a grocery list for my mom, but she's a "
-          ],
-          [
-           "The attached PDF lists accommodations in the resor"
-          ],
-          [
-           "Hi, I'm making a pie but I could use some help wit"
-          ],
-          [
-           "How many times was a Twitter/X post cited as a ref"
-          ],
-          [
-           "I’m thinking about selling my home, so I want to l"
-          ],
-          [
-           "I have the Standard plan in the image below, and I"
-          ],
-          [
-           "The attached image contains a Python script. Run t"
-          ],
-          [
-           "According to wikipedia, how many Asian countries s"
-          ],
-          [
-           "How many more blocks (also denoted as layers) in B"
-          ],
-          [
-           "Look at the attached image. The quiz is scored as "
-          ],
-          [
-           "What is the surname of the equine veterinarian men"
-          ],
-          [
-           "This spreadsheet contains a list of clients for a "
-          ],
-          [
-           "What is the final numeric output from the attached"
-          ],
-          [
-           "In the endnote found in the second-to-last paragra"
-          ],
-          [
-           "Who did the actor who played Ray in the Polish-lan"
-          ],
-          [
-           "On a leap day before the year 2008, a joke was rem"
-          ],
-          [
-           "Pull out the sentence in the following 5x7 block o"
-          ],
-          [
-           "On the DeepFruits fruit detection graph on Connect"
-          ],
-          [
-           "The Metropolitan Museum of Art has a portrait in i"
-          ],
-          [
-           "In the Scikit-Learn July 2017 changelog, what othe"
-          ],
-          [
-           "I was referencing each of the tables in the file f"
-          ],
-          [
-           "All of the individuals who formally held the posit"
-          ],
-          [
-           "Consider the following symbols: 𒐜  𒐐𒐚\n\nThis is a n"
-          ],
-          [
-           "The book with the doi 10.1353/book.24372 concerns "
-          ],
-          [
-           "On Cornell Law School website's legal information "
-          ],
-          [
-           "Of the cities within the United States where U.S. "
-          ],
-          [
-           "The longest-lived vertebrate is named after an isl"
-          ],
-          [
-           "As of August 2023, who is the only winner of the U"
-          ],
-          [
-           "How many images are there in the latest 2022 Lego "
-          ],
-          [
-           "During the first week of August 2015, one of the N"
-          ],
-          [
-           "Which of the text elements under CATEGORIES in the"
-          ],
-          [
-           "The attached spreadsheet lists the locomotives own"
-          ],
-          [
-           "The attached file lists the locomotives owned by a"
-          ],
-          [
-           "It's May 2023, and I'm about to drive across the U"
-          ],
-          [
-           "On ScienceDirect, what is the difference to 3 deci"
-          ],
-          [
-           "How many edits were made to the Wikipedia page on "
-          ],
-          [
-           "What is the absolute difference in tens of thousan"
-          ],
-          [
-           "Hi, I was out sick from my classes on Friday, so I"
-          ],
-          [
-           "If this whole pint is made up of ice cream, how ma"
-          ],
-          [
-           "I'd like to learn more about some popular reality "
-          ],
-          [
-           "The YouTube channel Game Grumps began a Let’s Play"
-          ],
-          [
-           "A 5-man group made up of one tank, one healer, and"
-          ],
-          [
-           "Take the gender split from the 2011 Bulgarian cens"
-          ],
-          [
-           "What was the complete title of the book in which t"
-          ],
-          [
-           "What is the latest chronological year date written"
-          ],
-          [
-           "Eva Draconis has a personal website which can be a"
-          ],
-          [
-           "The attached Excel file contains the sales of menu"
-          ],
-          [
-           "Where were the Vietnamese specimens described by K"
-          ],
-          [
-           "The cover of the August 2021 issue of Vogue shows "
-          ],
-          [
-           "Bob was invited to participate in a game show, and"
-          ],
-          [
-           "What percentage of the total penguin population ac"
-          ],
-          [
-           "On the BBC Earth YouTube video of the Top 5 Sillie"
-          ],
-          [
-           "What country had the least number of athletes at t"
-          ],
-          [
-           "What is the first name of the only Malko Competiti"
-          ],
-          [
-           "According to Girls Who Code, how long did it take "
-          ],
-          [
-           "How many at bats did the Yankee with the most walk"
-          ],
-          [
-           "According to Openreview.net, at the NeurIPS 2022 C"
-          ],
-          [
-           "Who are the pitchers with the number before and af"
-          ],
-          [
-           "In Audre Lorde’s poem “Father Son and Holy Ghost”,"
-          ],
-          [
-           "In the YouTube 360 VR video from March 2018 narrat"
-          ],
-          [
-           "Which of the fruits shown in the 2008 painting \"Em"
-          ],
-          [
-           "A standard Rubik’s cube has been broken into cubes"
-          ],
-          [
-           "What was the actual enrollment count of the clinic"
-          ],
-          [
-           "In NASA's Astronomy Picture of the Day on 2006 Jan"
-          ],
-          [
-           "I'm curious about how much information is availabl"
-          ],
-          [
-           "When was a picture of St. Thomas Aquinas first add"
-          ],
-          [
-           "In the 2015 Metropolitan Museum of Art exhibition "
-          ],
-          [
-           "The attached spreadsheet contains a list of books "
-          ],
-          [
-           "According to the USGS, in what year was the Americ"
-          ],
-          [
-           "As of May 2023, how many stops are between South S"
-          ],
-          [
-           "The brand that makes these harnesses the dogs are "
-          ],
-          [
-           "I read a paper about multiwavelength observations "
-          ],
-          [
-           "In the film Goldfinger, what color was the object "
-          ],
-          [
-           "On June 6, 2023, an article by Carolyn Collins Pet"
-          ],
-          [
-           "The work referenced in footnote 397 of Federico La"
-          ],
-          [
-           "At the two-minute mark in the YouTube video upload"
-          ],
-          [
-           "As of the 2020 census, what was the population dif"
-          ],
-          [
-           "I thought we could try a fun word puzzle together "
-          ],
-          [
-           "What is the average number of pre-2020 works on th"
-          ],
-          [
-           "According to the World Bank, which countries had g"
-          ],
-          [
-           "In Series 9, Episode 11 of Doctor Who, the Doctor "
-          ]
-         ],
-         "hovertemplate": "agent_name=code_o1_04_february_submission5<br>index=%{x}<br>is_correct=%{y}<br>question=%{customdata[0]}<extra></extra>",
-         "legendgroup": "code_o1_04_february_submission5",
-         "line": {
-          "color": "#FF6692",
-          "dash": "solid"
-         },
-         "marker": {
-          "symbol": "circle"
-         },
-         "mode": "lines",
-         "name": "code_o1_04_february_submission5",
-         "showlegend": true,
-         "type": "scattergl",
-         "x": {
-          "bdata": "AAABAAIAAwAEAAUABgAHAAgACQAKAAsADAANAA4ADwAQABEAEgATABQAFQAWABcAGAAZABoAGwAcAB0AHgAfACAAIQAiACMAJAAlACYAJwAoACkAKgArACwALQAuAC8AMAAxADIAMwA0ADUANgA3ADgAOQA6ADsAPAA9AD4APwBAAEEAQgBDAEQARQBGAEcASABJAEoASwBMAE0ATgBPAFAAUQBSAFMAVABVAFYAVwBYAFkAWgBbAFwAXQBeAF8AYABhAGIAYwBkAGUAZgBnAGgAaQBqAGsAbABtAG4AbwBwAHEAcgBzAHQAdQB2AHcAeAB5AHoAewB8AH0AfgB/AIAAgQCCAIMAhACFAIYAhwCIAIkAigCLAIwAjQCOAI8AkACRAJIAkwCUAJUAlgCXAJgAmQCaAJsAnACdAJ4AnwCgAKEAogCjAKQA",
-          "dtype": "i2"
-         },
-         "xaxis": "x",
-         "y": {
-          "bdata": "AAAAAAAA8D8AAAAAAADgP1VVVVVVVeU/AAAAAAAA4D8zMzMzMzPjP1VVVVVVVeU/t23btm3b5j8AAAAAAADoPzmO4ziO4+g/mpmZmZmZ6T8vuuiiiy7qP6uqqqqqquo/O7ETO7ET6z9JkiRJkiTpP5qZmZmZmek/AAAAAAAA6j94eHh4eHjoP8dxHMdxHOc/Q3kN5TWU5z8AAAAAAADoPxiGYRiGYeg/RhdddNFF5z9kIQtZyELmP1VVVVVVVeU/exSuR+F65D8UO7ETO7HjP2gvob2E9uI/27Zt27Zt4z8Jyz2NsNzjPzMzMzMzM+M/lVJKKaWU4j8AAAAAAADjP22yySabbOI/09LS0tLS4j+SJEmSJEniP6uqqqqqquI/bzBFPusG4z/lNZTXUF7jPxQ7sRM7seM/AAAAAAAA5D9L1K5E7UrkP/Q8z/M8z+M/Bn1BX9AX5D+jiy666KLjPzMzMzMzM+M/OL3pTW964z9MriAmVxDjP1VVVVVVVeM/5hS8nIKX4z/Xo3A9CtfjPxQUFBQUFOQ/7MRO7MRO5D9NMN7KPofkP9pLaC+hveQ/XXTRRRdd5D8lSZIkSZLkP15DeQ3lNeQ/5p5GWO5p5D9fHlsRNJzkP0REREREROQ/JkOwjjbv4z+dc84555zjP/Q8z/M8z+M/AAAAAACA4z8zMzMzMzPjP+miiy666OI/oHJ2C78a4z9LS0tLS0vjPzDXDsy1A+M/4yu+4iu+4j9TT8Zvl3riP47jOI7jOOI/kB8/fvz44T+YIp91gyniP1nyiyW/WOI/bCivobyG4j8hzspPiLPiP/Mt3/It3+I/E+Z3tbgJ4z/NzMzMzMziP8HTrflhkeI/V6J2JWpX4j+/9pDLioHiP5IkSZIkSeI/EhISEhIS4j+PuCPuiDviP7xAJsULZOI/L7rooosu4j8g/ehHP/rhPyIiIiIiIuI/kiRJkiRJ4j+nN73pTW/iP5VSSimllOI/yhXE5Api4j9sKK+hvIbiP1VVVVVVVeI/EpmG7WZ54j+n4OUUvJziP2r9SoFav+I/j8L1KFyP4j/zIHf9bLHiP9PS0tLS0uI/cl4W8Qmk4j9iJ3ZiJ3biP5IkSZIkSeI/GG9ln0Nq4j/2C7GiND7iP+0ltJfQXuI/OyMVc6sz4j8J8pQgTwniP5gin3WDKeI/kiRJkiRJ4j94+yGBtx/iP3AfwX0E9+E/HYGirQbP4T+E5Z5GWO7hP9IgDdIgDeI/4qTuy2Mr4j9yTQRyTQTiPyIiIiIiIuI/y6BUmHg/4j+wjjbvU1ziPzbSYSMdNuI/U0oppZRS4j+TGARWDi3iP4IgCIIgCOI/iUQikUgk4j8AAAAAAADiP3fEHXFH3OE/gh/4gR/44T9q7oK/ihPiPy+66KKLLuI/kiRJkiRJ4j/l7BZ+NSbiPz0gWefKA+I/Hh4eHh4e4j/8cevyDjjiPyV+RomfUeI/oBvz9NFq4j87qIM6qIPiP8oVxOQKYuI/HUi0wuZA4j++CmZJOSDiP47jOI7jOOI/eoshnbcY4j8SI0aMGDHiP5IkSZIkSeI/DqbIZ91g4j+imo65RHjiP1nyiyW/WOI/kuZIc6Q54j8N5TWU11DiPxK9ZxK9Z+I/kiRJkiRJ4j8rEq8i8SriP9IgDdIgDeI/kROEu7Hv4T8NRKUjewbiP/P32oh16eE/zczMzMzM4T+w8Wj+YOPhP0bKwNOt+eE/yYB6pnLd4T/C+Ricj8HhP6YxYBoDpuE/",
-          "dtype": "f8"
-         },
-         "yaxis": "y"
-        },
-        {
-         "customdata": [
-          [
-           "A paper about AI regulation that was originally su"
-          ],
-          [
-           "I’m researching species that became invasive after"
-          ],
-          [
-           "If we assume all articles published by Nature in 2"
-          ],
-          [
-           "In Unlambda, what exact charcter or text needs to "
-          ],
-          [
-           "If Eliud Kipchoge could maintain his record-making"
-          ],
-          [
-           "How many studio albums were published by Mercedes "
-          ],
-          [
-           "The object in the British Museum's collection with"
-          ],
-          [
-           "According to github, when was Regression added to "
-          ],
-          [
-           "Here's a fun riddle that I think you'll enjoy.\n\nYo"
-          ],
-          [
-           "In July 2, 1959 United States standards for grades"
-          ],
-          [
-           "Using the Biopython library in Python, parse the P"
-          ],
-          [
-           "What are the EC numbers of the two most commonly u"
-          ],
-          [
-           "In April of 1977, who was the Prime Minister of th"
-          ],
-          [
-           "What's the last line of the rhyme under the flavor"
-          ],
-          [
-           "Use density measures from the chemistry materials "
-          ],
-          [
-           "What was the volume in m^3 of the fish bag that wa"
-          ],
-          [
-           "What is the average number of pre-2020 works on th"
-          ],
-          [
-           "In the video https://www.youtube.com/watch?v=L1vXC"
-          ],
-          [
-           "Of the authors (First M. Last) that worked on the "
-          ],
-          [
-           "When you take the average of the standard populati"
-          ],
-          [
-           "Assuming scientists in the famous youtube video Th"
-          ],
-          [
-           "In Series 9, Episode 11 of Doctor Who, the Doctor "
-          ],
-          [
-           "In terms of geographical distance between capital "
-          ],
-          [
-           "In the NCATS PubChem compound database for Food Ad"
-          ],
-          [
-           "I need to fact-check a citation. This is the citat"
-          ],
-          [
-           "Which contributor to the version of OpenCV where s"
-          ],
-          [
-           "What integer-rounded percentage of the total lengt"
-          ],
-          [
-           "An office held a Secret Santa gift exchange where "
-          ],
-          [
-           "What is the maximum length in meters of #9 in the "
-          ],
-          [
-           "What two-word type of model did Manash Pratim Kash"
-          ],
-          [
-           "What animals that were mentioned in both Ilias Lag"
-          ],
-          [
-           "How many High Energy Physics - Lattice articles li"
-          ],
-          [
-           "The photograph in the Whitney Museum of American A"
-          ],
-          [
-           ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti"
-          ],
-          [
-           "What is the minimum number of page links a person "
-          ],
-          [
-           "I went to Virtue restaurant & bar in Chicago for m"
-          ],
-          [
-           "¬(A ∧ B) ↔ (¬A ∨ ¬B)\n¬(A ∨ B) ↔ (¬A ∧ ¬B)\n(A → B) "
-          ],
-          [
-           "My family reunion is this week, and I was assigned"
-          ],
-          [
-           "In Emily Midkiff's June 2014 article in a journal "
-          ],
-          [
-           "It is 1999. Before you party like it is 1999, plea"
-          ],
-          [
-           "Under DDC 633 on Bielefeld University Library's BA"
-          ],
-          [
-           "In the 2018 VSCode blog post on replit.com, what w"
-          ],
-          [
-           "Compute the check digit the Tropicos ID for the Or"
-          ],
-          [
-           "What time was the Tri-Rail train that carried the "
-          ],
-          [
-           "Could you help me out with this assignment? Our pr"
-          ],
-          [
-           "In Valentina Re’s contribution to the 2017 book “W"
-          ],
-          [
-           "In the fictional language of Tizin, basic sentence"
-          ],
-          [
-           "The Metropolitan Museum of Art has a portrait in i"
-          ],
-          [
-           "In Nature journal's Scientific Reports conference "
-          ],
-          [
-           "According to Google Finance, when was the first ye"
-          ],
-          [
-           "Review the chess position provided in the image. I"
-          ],
-          [
-           "According to Box Office Mojo's 2020 Worldwide Box "
-          ],
-          [
-           "In the year 2022, and before December, what does \""
-          ],
-          [
-           "Who nominated the only Featured Article on English"
-          ],
-          [
-           "What writer is quoted by Merriam-Webster for the W"
-          ],
-          [
-           "How many pages if the 2023 IPCC report (85 pages v"
-          ],
-          [
-           "Given this table defining * on the set S = {a, b, "
-          ],
-          [
-           "The following numbers function similarly to ISBN 1"
-          ],
-          [
-           "How many images are there in the latest 2022 Lego "
-          ],
-          [
-           "The attached file shows a list of books in the col"
-          ],
-          [
-           "I was trying to remember how well the Cheater Beat"
-          ],
-          [
-           "As a comma separated list with no whitespace, usin"
-          ],
-          [
-           "On a leap day before the year 2008, a joke was rem"
-          ],
-          [
-           "What is the volume in milliliters of a system comp"
-          ],
-          [
-           "The Latin root of the Yola word \"gimlie\" shares a "
-          ],
-          [
-           "Find the value of x to the nearest tenth: Lx = (d/"
-          ],
-          [
-           "In the endnote found in the second-to-last paragra"
-          ]
-         ],
-         "hovertemplate": "agent_name=code_o1_22-01_managedagent-summary_planning<br>index=%{x}<br>is_correct=%{y}<br>question=%{customdata[0]}<extra></extra>",
-         "legendgroup": "code_o1_22-01_managedagent-summary_planning",
-         "line": {
-          "color": "#B6E880",
-          "dash": "solid"
-         },
-         "marker": {
-          "symbol": "circle"
-         },
-         "mode": "lines",
-         "name": "code_o1_22-01_managedagent-summary_planning",
-         "showlegend": true,
-         "type": "scattergl",
-         "x": {
-          "bdata": "AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQg==",
-          "dtype": "i1"
-         },
-         "xaxis": "x",
-         "y": {
-          "bdata": "AAAAAAAA8D8AAAAAAADwPwAAAAAAAPA/AAAAAAAA6D8zMzMzMzPjP1VVVVVVVeU/t23btm3b5j8AAAAAAADkP3Icx3Ecx+E/AAAAAAAA4D8XXXTRRRfdPwAAAAAAAOA/sRM7sRM74T8AAAAAAADgPxEREREREeE/AAAAAAAA4D8eHh4eHh7ePxzHcRzHcdw/KK+hvIby2j+amZmZmZnZPxiGYRiGYdg/RhdddNFF1z+RhSxkIQvZPwAAAAAAANg/mpmZmZmZ2T/ZiZ3YiZ3YP0J7Ce0ltNc/t23btm3b1j98GmG5pxHWP1VVVVVVVdU/pZRSSiml1D8AAAAAAADUP2WTTTbZZNM/tbS0tLS01D8WX/EVX/HVP1VVVVVVVdU/yWfdYIp81j9DeQ3lNZTXP9mJndiJndg/mpmZmZmZ2T/6GJyPwfnYP3qe53me59k/s6asKWvK2j8vuuiiiy7aP5qZmZmZmdk/pze96U1v2j9t1Hc26jvbPwAAAAAAANw/27Zt27Zt2z/hehSuR+HaP1paWlpaWto/O7ETO7ET2z+WfQ6pCcbbPxzHcRzHcdw/F1100UUX3T8lSZIkSZLcPxbTWUxnMd0/jbDc0wjL3T/msRVBw0ndP83MzMzMzNw/Q7CONu9T3D/fe++9997bP9u2bdu2bds/AAAAAAAA2z9bqZVaqZXaPyebbLLJJts/eqBydgu/2j8=",
-          "dtype": "f8"
-         },
-         "yaxis": "y"
-        },
-        {
-         "customdata": [
-          [
-           "A paper about AI regulation that was originally su"
-          ],
-          [
-           "I’m researching species that became invasive after"
-          ],
-          [
-           "If we assume all articles published by Nature in 2"
-          ],
-          [
-           "In Unlambda, what exact charcter or text needs to "
-          ],
-          [
-           "If Eliud Kipchoge could maintain his record-making"
-          ],
-          [
-           "How many studio albums were published by Mercedes "
-          ],
-          [
-           "The object in the British Museum's collection with"
-          ],
-          [
-           "According to github, when was Regression added to "
-          ],
-          [
-           "Here's a fun riddle that I think you'll enjoy.\n\nYo"
-          ],
-          [
-           "In July 2, 1959 United States standards for grades"
-          ],
-          [
-           "Using the Biopython library in Python, parse the P"
-          ],
-          [
-           "What are the EC numbers of the two most commonly u"
-          ],
-          [
-           "In April of 1977, who was the Prime Minister of th"
-          ],
-          [
-           "What's the last line of the rhyme under the flavor"
-          ],
-          [
-           "Use density measures from the chemistry materials "
-          ],
-          [
-           "What was the volume in m^3 of the fish bag that wa"
-          ],
-          [
-           "What is the average number of pre-2020 works on th"
-          ],
-          [
-           "In the video https://www.youtube.com/watch?v=L1vXC"
-          ],
-          [
-           "Of the authors (First M. Last) that worked on the "
-          ],
-          [
-           "When you take the average of the standard populati"
-          ],
-          [
-           "Assuming scientists in the famous youtube video Th"
-          ],
-          [
-           "In Series 9, Episode 11 of Doctor Who, the Doctor "
-          ],
-          [
-           "In terms of geographical distance between capital "
-          ],
-          [
-           "In the NCATS PubChem compound database for Food Ad"
-          ],
-          [
-           "I need to fact-check a citation. This is the citat"
-          ],
-          [
-           "Which contributor to the version of OpenCV where s"
-          ],
-          [
-           "What integer-rounded percentage of the total lengt"
-          ],
-          [
-           "An office held a Secret Santa gift exchange where "
-          ],
-          [
-           "What is the maximum length in meters of #9 in the "
-          ],
-          [
-           "What two-word type of model did Manash Pratim Kash"
-          ],
-          [
-           "What animals that were mentioned in both Ilias Lag"
-          ],
-          [
-           "How many High Energy Physics - Lattice articles li"
-          ],
-          [
-           "The photograph in the Whitney Museum of American A"
-          ],
-          [
-           ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti"
-          ],
-          [
-           "What is the minimum number of page links a person "
-          ],
-          [
-           "I went to Virtue restaurant & bar in Chicago for m"
-          ],
-          [
-           "¬(A ∧ B) ↔ (¬A ∨ ¬B)\n¬(A ∨ B) ↔ (¬A ∧ ¬B)\n(A → B) "
-          ],
-          [
-           "My family reunion is this week, and I was assigned"
-          ],
-          [
-           "In Emily Midkiff's June 2014 article in a journal "
-          ],
-          [
-           "It is 1999. Before you party like it is 1999, plea"
-          ],
-          [
-           "Under DDC 633 on Bielefeld University Library's BA"
-          ],
-          [
-           "In the 2018 VSCode blog post on replit.com, what w"
-          ],
-          [
-           "Compute the check digit the Tropicos ID for the Or"
-          ],
-          [
-           "What time was the Tri-Rail train that carried the "
-          ],
-          [
-           "Could you help me out with this assignment? Our pr"
-          ],
-          [
-           "In Valentina Re’s contribution to the 2017 book “W"
-          ],
-          [
-           "In the fictional language of Tizin, basic sentence"
-          ],
-          [
-           "The Metropolitan Museum of Art has a portrait in i"
-          ],
-          [
-           "In Nature journal's Scientific Reports conference "
-          ],
-          [
-           "According to Google Finance, when was the first ye"
-          ],
-          [
-           "Review the chess position provided in the image. I"
-          ],
-          [
-           "According to Box Office Mojo's 2020 Worldwide Box "
-          ],
-          [
-           "In the year 2022, and before December, what does \""
-          ]
-         ],
-         "hovertemplate": "agent_name=code_o1_25-01_visioon<br>index=%{x}<br>is_correct=%{y}<br>question=%{customdata[0]}<extra></extra>",
-         "legendgroup": "code_o1_25-01_visioon",
-         "line": {
-          "color": "#FF97FF",
-          "dash": "solid"
-         },
-         "marker": {
-          "symbol": "circle"
-         },
-         "mode": "lines",
-         "name": "code_o1_25-01_visioon",
-         "showlegend": true,
-         "type": "scattergl",
-         "x": {
-          "bdata": "AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ=",
-          "dtype": "i1"
-         },
-         "xaxis": "x",
-         "y": {
-          "bdata": "AAAAAAAA8D8AAAAAAADgP1VVVVVVVdU/AAAAAAAA0D+amZmZmZnJP1VVVVVVVdU/27Zt27Zt2z8AAAAAAADYP1VVVVVVVdU/MzMzMzMz0z900UUXXXTRP1VVVVVVVdU/2Ymd2Imd2D+3bdu2bdvWP5qZmZmZmdk/AAAAAAAA2D+XlpaWlpbWPzmO4ziO49g/Q3kN5TWU1z9mZmZmZmbWP1VVVVVVVdU/XXTRRRdd1D9kIQtZyELWP1VVVVVVVdU/exSuR+F61D92Yid2YifWP1VVVVVVVdU/JUmSJEmS1D8Jyz2NsNzTPzMzMzMzM9M/lVJKKaWU0j8AAAAAAADSP3TRRRdddNE/09LS0tLS0j/UQR3UQR3UP+Q4juM4jtM/HEyRz7rB1D9RXkN5DeXVP1VVVVVVVdU/ZmZmZmZm1j/blahdidrVP1VVVVVVVdU/lTVlTVlT1j/RRRdddNHVP1VVVVVVVdU/ZCELWchC1j9dQUyuICbXP6uqqqqqqtY/jfWhsT401j/D9Shcj8LVP1VVVVVVVdU/xU7sxE7s1D/Z55CaYLzVPw==",
-          "dtype": "f8"
-         },
-         "yaxis": "y"
-        },
-        {
-         "customdata": [
-          [
-           "A paper about AI regulation that was originally su"
-          ],
-          [
-           "I’m researching species that became invasive after"
-          ],
-          [
-           "If we assume all articles published by Nature in 2"
-          ],
-          [
-           "In Unlambda, what exact charcter or text needs to "
-          ],
-          [
-           "If Eliud Kipchoge could maintain his record-making"
-          ],
-          [
-           "How many studio albums were published by Mercedes "
-          ],
-          [
-           "The object in the British Museum's collection with"
-          ],
-          [
-           "According to github, when was Regression added to "
-          ],
-          [
-           "Here's a fun riddle that I think you'll enjoy.\n\nYo"
-          ],
-          [
-           "In July 2, 1959 United States standards for grades"
-          ],
-          [
-           "Using the Biopython library in Python, parse the P"
-          ],
-          [
-           "What are the EC numbers of the two most commonly u"
-          ],
-          [
-           "In April of 1977, who was the Prime Minister of th"
-          ],
-          [
-           "What's the last line of the rhyme under the flavor"
-          ],
-          [
-           "Use density measures from the chemistry materials "
-          ],
-          [
-           "What was the volume in m^3 of the fish bag that wa"
-          ],
-          [
-           "What is the average number of pre-2020 works on th"
-          ],
-          [
-           "In the video https://www.youtube.com/watch?v=L1vXC"
-          ],
-          [
-           "Of the authors (First M. Last) that worked on the "
-          ],
-          [
-           "When you take the average of the standard populati"
-          ],
-          [
-           "Assuming scientists in the famous youtube video Th"
-          ],
-          [
-           "In Series 9, Episode 11 of Doctor Who, the Doctor "
-          ],
-          [
-           "In terms of geographical distance between capital "
-          ],
-          [
-           "In the NCATS PubChem compound database for Food Ad"
-          ],
-          [
-           "I need to fact-check a citation. This is the citat"
-          ],
-          [
-           "Which contributor to the version of OpenCV where s"
-          ],
-          [
-           "What integer-rounded percentage of the total lengt"
-          ],
-          [
-           "An office held a Secret Santa gift exchange where "
-          ],
-          [
-           "What is the maximum length in meters of #9 in the "
-          ],
-          [
-           "What two-word type of model did Manash Pratim Kash"
-          ],
-          [
-           "What animals that were mentioned in both Ilias Lag"
-          ],
-          [
-           "How many High Energy Physics - Lattice articles li"
-          ],
-          [
-           "The photograph in the Whitney Museum of American A"
-          ],
-          [
-           ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti"
-          ],
-          [
-           "What is the minimum number of page links a person "
-          ],
-          [
-           "I went to Virtue restaurant & bar in Chicago for m"
-          ],
-          [
-           "¬(A ∧ B) ↔ (¬A ∨ ¬B)\n¬(A ∨ B) ↔ (¬A ∧ ¬B)\n(A → B) "
-          ],
-          [
-           "My family reunion is this week, and I was assigned"
-          ],
-          [
-           "In Emily Midkiff's June 2014 article in a journal "
-          ],
-          [
-           "It is 1999. Before you party like it is 1999, plea"
-          ],
-          [
-           "Under DDC 633 on Bielefeld University Library's BA"
-          ],
-          [
-           "In the 2018 VSCode blog post on replit.com, what w"
-          ],
-          [
-           "Compute the check digit the Tropicos ID for the Or"
-          ],
-          [
-           "What time was the Tri-Rail train that carried the "
-          ],
-          [
-           "Could you help me out with this assignment? Our pr"
-          ],
-          [
-           "In Valentina Re’s contribution to the 2017 book “W"
-          ],
-          [
-           "In the fictional language of Tizin, basic sentence"
-          ],
-          [
-           "The Metropolitan Museum of Art has a portrait in i"
-          ],
-          [
-           "In Nature journal's Scientific Reports conference "
-          ],
-          [
-           "According to Google Finance, when was the first ye"
-          ],
-          [
-           "Review the chess position provided in the image. I"
-          ],
-          [
-           "According to Box Office Mojo's 2020 Worldwide Box "
-          ],
-          [
-           "In the year 2022, and before December, what does \""
-          ],
-          [
-           "Who nominated the only Featured Article on English"
-          ],
-          [
-           "What writer is quoted by Merriam-Webster for the W"
-          ],
-          [
-           "How many pages if the 2023 IPCC report (85 pages v"
-          ],
-          [
-           "Given this table defining * on the set S = {a, b, "
-          ],
-          [
-           "The following numbers function similarly to ISBN 1"
-          ],
-          [
-           "How many images are there in the latest 2022 Lego "
-          ],
-          [
-           "The attached file shows a list of books in the col"
-          ],
-          [
-           "I was trying to remember how well the Cheater Beat"
-          ],
-          [
-           "As a comma separated list with no whitespace, usin"
-          ],
-          [
-           "On a leap day before the year 2008, a joke was rem"
-          ],
-          [
-           "What is the volume in milliliters of a system comp"
-          ],
-          [
-           "The Latin root of the Yola word \"gimlie\" shares a "
-          ],
-          [
-           "Find the value of x to the nearest tenth: Lx = (d/"
-          ],
-          [
-           "In the endnote found in the second-to-last paragra"
-          ],
-          [
-           "Using bass clef notes, what is the age of someone "
-          ],
-          [
-           "On July 15, 2008, Phys.org published an article ab"
-          ],
-          [
-           "The attached file lists accommodations in the reso"
-          ],
-          [
-           "I’m thinking about selling my home, so I want to l"
-          ],
-          [
-           "I'm making a grocery list for my mom, but she's a "
-          ],
-          [
-           "How many times was a Twitter/X post cited as a ref"
-          ],
-          [
-           "On ScienceDirect, what is the difference to 3 deci"
-          ],
-          [
-           "What is the last word before the second chorus of "
-          ],
-          [
-           "Look at the attached image. The quiz is scored as "
-          ],
-          [
-           "How many edits were made to the Wikipedia page on "
-          ],
-          [
-           "You are a telecommunications engineer who wants to"
-          ],
-          [
-           "If there is anything that doesn't make sense in th"
-          ],
-          [
-           "How many nonindigenous crocodiles were found in Fl"
-          ],
-          [
-           "The work referenced in footnote 397 of Federico La"
-          ],
-          [
-           "As of the 2020 census, what was the population dif"
-          ],
-          [
-           "How many slides in this PowerPoint presentation me"
-          ],
-          [
-           "What percentage of the total penguin population ac"
-          ],
-          [
-           "Given $x_0 = -5$ and $f(x) = x^3 + 4x^2 - 3x + 8$,"
-          ],
-          [
-           "You are Van Helsing, a renowned vampire hunter. A "
-          ],
-          [
-           "Examine the video at https://www.youtube.com/watch"
-          ],
-          [
-           "This is a secret message my friend gave me. It say"
-          ],
-          [
-           "What is the area of the green polygon in the attac"
-          ],
-          [
-           "According to wikipedia, how many Asian countries s"
-          ],
-          [
-           "Who composed the song that was performed by a roos"
-          ],
-          [
-           "I thought we could try a fun word puzzle together "
-          ],
-          [
-           "What is the surname of the equine veterinarian men"
-          ],
-          [
-           "According to the World Bank, which countries had g"
-          ],
-          [
-           "Which of the fruits shown in the 2008 painting \"Em"
-          ],
-          [
-           "Hi, I'm making a pie but I could use some help wit"
-          ],
-          [
-           "The attached image contains a Python script. Run t"
-          ],
-          [
-           "I have the Standard plan in the image below, and I"
-          ],
-          [
-           "The attached PDF lists accommodations in the resor"
-          ],
-          [
-           "The year is 2022. I am at the National Air and Spa"
-          ],
-          [
-           "In the Scikit-Learn July 2017 changelog, what othe"
-          ],
-          [
-           "It's May 2023, and I'm about to drive across the U"
-          ],
-          [
-           "Who did the actor who played Ray in the Polish-lan"
-          ],
-          [
-           "What is the latest chronological year date written"
-          ],
-          [
-           "The YouTube channel Game Grumps began a Let’s Play"
-          ]
-         ],
-         "hovertemplate": "agent_name=code_o1_29-01_text<br>index=%{x}<br>is_correct=%{y}<br>question=%{customdata[0]}<extra></extra>",
-         "legendgroup": "code_o1_29-01_text",
-         "line": {
-          "color": "#FECB52",
-          "dash": "solid"
-         },
-         "marker": {
-          "symbol": "circle"
-         },
-         "mode": "lines",
-         "name": "code_o1_29-01_text",
-         "showlegend": true,
-         "type": "scattergl",
-         "x": {
-          "bdata": "AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5fYGFiY2RlZmdo",
-          "dtype": "i1"
-         },
-         "xaxis": "x",
-         "y": {
-          "bdata": "AAAAAAAAAAAAAAAAAADgP1VVVVVVVeU/AAAAAAAA4D+amZmZmZnZPwAAAAAAAOA/kiRJkiRJ4j8AAAAAAADgPxzHcRzHcdw/mpmZmZmZ2T9GF1100UXXP1VVVVVVVdU/2Ymd2Imd2D+3bdu2bdvWP5qZmZmZmdk/AAAAAAAA3D9aWlpaWlraPzmO4ziO49g/Q3kN5TWU1z9mZmZmZmbWP1VVVVVVVdU/XXTRRRdd1D9kIQtZyELWP1VVVVVVVdU/exSuR+F61D8UO7ETO7HTP2gvob2E9tI/kiRJkiRJ0j+WexphuafRPxEREREREdE/hBBCCCGE0D8AAAAAAADQPwgffPDBB88/8fDw8PDw0D+SJEmSJEnSP+Q4juM4jtM/HEyRz7rB1D9RXkN5DeXVP5dv+ZZv+dY/AAAAAAAA2D9qV6J2JWrXPxiGYRiGYdg/9AV9QV/Q1z9GF1100UXXPxdswRZswdY/etOb3vSm1z9icgUxuYLYPwAAAAAAANg/4eUUvJyC1z8K16NwPQrXP5eWlpaWltY/dmIndmIn1j9ln0NqgvHWP0J7Ce0ltNc/cFj7hrVv2D9JkiRJkiTZPzGdxXQW09k/YbmnEZZ72j+Uui+PrQjaP5qZmZmZmdk/WEeb9yku2T/GGGOMMcbYPxiGYRiGYdg/AAAAAAAA2D8YeqEXeqHXPz744IMPPtg/SQ9Uzm7h1z+Ih4eHh4fXP4K5dmCuHdg/+Yqv+Iqv2D/RCpsDiVbYPwAAAAAAANg/vXr16tWr1z+fdYMp8lnXP+UXS36x5Nc/Q3kN5TWU1z9kamDvmBrYP9mJndiJndg/OrJnICod2T/NzMzMzMzYP5Ey8HRrftg/Mjgfg/Mx2D+q82sPuazYPxiGYRiGYdg/GBgYGBgY2D8k7og74o7YP+5phOWeRtg/AAAAAAAA2D983ete97rXP9iCLdiCLdg/2Ymd2Imd2D+GLGQhC1nYP8YYY4wxxtg/YnIFMbmC2D8LhJF2rEDYPwAAAAAAANg/2G6WJ5Fp2D801ofG+tDYPzbZZJNNNtk/mpmZmZmZ2T96kLt+tljZP7q5ubm5udk/i/gEUsl52T+xEzuxEzvZP9mP/diP/dg/",
-          "dtype": "f8"
-         },
-         "yaxis": "y"
-        },
-        {
-         "customdata": [
-          [
-           "Using the Biopython library in Python, parse the P"
-          ],
-          [
-           "The attached spreadsheet shows the inventory for a"
-          ],
-          [
-           "Of the authors (First M. Last) that worked on the "
-          ],
-          [
-           "In July 2, 1959 United States standards for grades"
-          ],
-          [
-           "What's the last line of the rhyme under the flavor"
-          ],
-          [
-           "In April of 1977, who was the Prime Minister of th"
-          ],
-          [
-           "What two-word type of model did Manash Pratim Kash"
-          ],
-          [
-           "The object in the British Museum's collection with"
-          ],
-          [
-           "What are the EC numbers of the two most commonly u"
-          ],
-          [
-           "Assuming scientists in the famous youtube video Th"
-          ],
-          [
-           "A paper about AI regulation that was originally su"
-          ],
-          [
-           "Use density measures from the chemistry materials "
-          ],
-          [
-           "What animals that were mentioned in both Ilias Lag"
-          ],
-          [
-           "What is the average number of pre-2020 works on th"
-          ],
-          [
-           ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti"
-          ],
-          [
-           "In Unlambda, what exact charcter or text needs to "
-          ],
-          [
-           "How many studio albums were published by Mercedes "
-          ],
-          [
-           "If Eliud Kipchoge could maintain his record-making"
-          ],
-          [
-           "¬(A ∧ B) ↔ (¬A ∨ ¬B)\n¬(A ∨ B) ↔ (¬A ∧ ¬B)\n(A → B) "
-          ],
-          [
-           "Which contributor to the version of OpenCV where s"
-          ],
-          [
-           "How many High Energy Physics - Lattice articles li"
-          ],
-          [
-           "In Valentina Re’s contribution to the 2017 book “W"
-          ],
-          [
-           "I’m researching species that became invasive after"
-          ],
-          [
-           "I went to Virtue restaurant & bar in Chicago for m"
-          ],
-          [
-           "It is 1999. Before you party like it is 1999, plea"
-          ],
-          [
-           "In Emily Midkiff's June 2014 article in a journal "
-          ],
-          [
-           "If we assume all articles published by Nature in 2"
-          ],
-          [
-           "Each cell in the attached spreadsheet represents a"
-          ],
-          [
-           "In Series 9, Episode 11 of Doctor Who, the Doctor "
-          ],
-          [
-           "What was the volume in m^3 of the fish bag that wa"
-          ],
-          [
-           "Compute the check digit the Tropicos ID for the Or"
-          ],
-          [
-           "Could you help me out with this assignment? Our pr"
-          ],
-          [
-           "Given this table defining * on the set S = {a, b, "
-          ],
-          [
-           "What time was the Tri-Rail train that carried the "
-          ],
-          [
-           "In the fictional language of Tizin, basic sentence"
-          ],
-          [
-           "My family reunion is this week, and I was assigned"
-          ],
-          [
-           "In the video https://www.youtube.com/watch?v=L1vXC"
-          ],
-          [
-           "In terms of geographical distance between capital "
-          ],
-          [
-           "I need to fact-check a citation. This is the citat"
-          ],
-          [
-           "I was trying to remember how well the Cheater Beat"
-          ],
-          [
-           "The attached file contains a list of vendors in th"
-          ],
-          [
-           "Review the chess position provided in the image. I"
-          ],
-          [
-           "What is the minimum number of page links a person "
-          ],
-          [
-           "Who nominated the only Featured Article on English"
-          ],
-          [
-           "The Latin root of the Yola word \"gimlie\" shares a "
-          ],
-          [
-           "The attached file shows a list of books in the col"
-          ],
-          [
-           "According to Google Finance, when was the first ye"
-          ],
-          [
-           "Using bass clef notes, what is the age of someone "
-          ],
-          [
-           "On a leap day before the year 2008, a joke was rem"
-          ],
-          [
-           "On July 15, 2008, Phys.org published an article ab"
-          ],
-          [
-           "In the NCATS PubChem compound database for Food Ad"
-          ],
-          [
-           "If there is anything that doesn't make sense in th"
-          ],
-          [
-           "When you take the average of the standard populati"
-          ],
-          [
-           "The following numbers function similarly to ISBN 1"
-          ],
-          [
-           "In the year 2022, and before December, what does \""
-          ],
-          [
-           "What is the volume in milliliters of a system comp"
-          ],
-          [
-           "What integer-rounded percentage of the total lengt"
-          ],
-          [
-           "The attached file lists accommodations in the reso"
-          ],
-          [
-           "In the NIH translation of the original 1913 Michae"
-          ],
-          [
-           "Under DDC 633 on Bielefeld University Library's BA"
-          ],
-          [
-           "You are Van Helsing, a renowned vampire hunter. A "
-          ],
-          [
-           "Find the value of x to the nearest tenth: Lx = (d/"
-          ],
-          [
-           "You are a telecommunications engineer who wants to"
-          ],
-          [
-           "According to Box Office Mojo's 2020 Worldwide Box "
-          ],
-          [
-           "How many applicants for the job in the PDF are onl"
-          ],
-          [
-           "As of the 2020 census, what was the population dif"
-          ],
-          [
-           "The Metropolitan Museum of Art has a portrait in i"
-          ],
-          [
-           "How many slides in this PowerPoint presentation me"
-          ],
-          [
-           "This is a secret message my friend gave me. It say"
-          ],
-          [
-           "According to wikipedia, how many Asian countries s"
-          ],
-          [
-           "The work referenced in footnote 397 of Federico La"
-          ],
-          [
-           "I was referencing each of the tables in the file f"
-          ],
-          [
-           "In Nature journal's Scientific Reports conference "
-          ],
-          [
-           "The attached file shows the locomotives in the col"
-          ],
-          [
-           "How many nonindigenous crocodiles were found in Fl"
-          ],
-          [
-           "As a comma separated list with no whitespace, usin"
-          ],
-          [
-           "According to the World Bank, which countries had g"
-          ],
-          [
-           "The attached spreadsheet contains the sales of men"
-          ],
-          [
-           "Who composed the song that was performed by a roos"
-          ],
-          [
-           "I'm making a grocery list for my mom, but she's a "
-          ],
-          [
-           "According to github, when was Regression added to "
-          ],
-          [
-           "In the 2018 VSCode blog post on replit.com, what w"
-          ],
-          [
-           "Look at the attached image. The quiz is scored as "
-          ],
-          [
-           "What writer is quoted by Merriam-Webster for the W"
-          ],
-          [
-           "Examine the video at https://www.youtube.com/watch"
-          ],
-          [
-           "Hi, I'm making a pie but I could use some help wit"
-          ],
-          [
-           "In the Scikit-Learn July 2017 changelog, what othe"
-          ],
-          [
-           "You are given this Excel file as a map. You start "
-          ],
-          [
-           "How many images are there in the latest 2022 Lego "
-          ],
-          [
-           "The attached image contains a Python script. Run t"
-          ],
-          [
-           "I thought we could try a fun word puzzle together "
-          ],
-          [
-           "On ScienceDirect, what is the difference to 3 deci"
-          ],
-          [
-           "What is the final numeric output from the attached"
-          ],
-          [
-           "What is the maximum length in meters of #9 in the "
-          ],
-          [
-           "How many more blocks (also denoted as layers) in B"
-          ],
-          [
-           "The longest-lived vertebrate is named after an isl"
-          ],
-          [
-           "On the DeepFruits fruit detection graph on Connect"
-          ],
-          [
-           "An office held a Secret Santa gift exchange where "
-          ],
-          [
-           "The attached PDF lists accommodations in the resor"
-          ],
-          [
-           "This spreadsheet contains a list of clients for a "
-          ],
-          [
-           "How many times was a Twitter/X post cited as a ref"
-          ],
-          [
-           "During the first week of August 2015, one of the N"
-          ],
-          [
-           "What is the surname of the equine veterinarian men"
-          ],
-          [
-           "The YouTube channel Game Grumps began a Let’s Play"
-          ],
-          [
-           "What is the last word before the second chorus of "
-          ],
-          [
-           "Who did the actor who played Ray in the Polish-lan"
-          ],
-          [
-           "I have the Standard plan in the image below, and I"
-          ],
-          [
-           "In the endnote found in the second-to-last paragra"
-          ],
-          [
-           "The book with the doi 10.1353/book.24372 concerns "
-          ],
-          [
-           "Pull out the sentence in the following 5x7 block o"
-          ],
-          [
-           "What is the latest chronological year date written"
-          ],
-          [
-           "The photograph in the Whitney Museum of American A"
-          ],
-          [
-           "Eva Draconis has a personal website which can be a"
-          ],
-          [
-           "How many at bats did the Yankee with the most walk"
-          ],
-          [
-           "According to Girls Who Code, how long did it take "
-          ],
-          [
-           "The attached spreadsheet contains a list of books "
-          ],
-          [
-           "How many pages if the 2023 IPCC report (85 pages v"
-          ],
-          [
-           "It's May 2023, and I'm about to drive across the U"
-          ],
-          [
-           "In Audre Lorde’s poem “Father Son and Holy Ghost”,"
-          ],
-          [
-           "On Cornell Law School website's legal information "
-          ],
-          [
-           "How many edits were made to the Wikipedia page on "
-          ],
-          [
-           "Consider the following symbols: 𒐜  𒐐𒐚\n\nThis is a n"
-          ],
-          [
-           "On the BBC Earth YouTube video of the Top 5 Sillie"
-          ],
-          [
-           "What is the absolute difference in tens of thousan"
-          ],
-          [
-           "The attached spreadsheet lists the locomotives own"
-          ],
-          [
-           "The attached file lists the locomotives owned by a"
-          ],
-          [
-           "I’m thinking about selling my home, so I want to l"
-          ],
-          [
-           "When was a picture of St. Thomas Aquinas first add"
-          ],
-          [
-           "As of August 2023, who is the only winner of the U"
-          ],
-          [
-           "Take the gender split from the 2011 Bulgarian cens"
-          ],
-          [
-           "All of the individuals who formally held the posit"
-          ],
-          [
-           "Hi, I was out sick from my classes on Friday, so I"
-          ],
-          [
-           "If this whole pint is made up of ice cream, how ma"
-          ],
-          [
-           "Which of the fruits shown in the 2008 painting \"Em"
-          ],
-          [
-           "What country had the least number of athletes at t"
-          ],
-          [
-           "In the YouTube 360 VR video from March 2018 narrat"
-          ],
-          [
-           "Which of the text elements under CATEGORIES in the"
-          ],
-          [
-           "Where were the Vietnamese specimens described by K"
-          ],
-          [
-           "The cover of the August 2021 issue of Vogue shows "
-          ],
-          [
-           "I'd like to learn more about some popular reality "
-          ],
-          [
-           "I read a paper about multiwavelength observations "
-          ],
-          [
-           "Here's a fun riddle that I think you'll enjoy.\n\nYo"
-          ],
-          [
-           "Given $x_0 = -5$ and $f(x) = x^3 + 4x^2 - 3x + 8$,"
-          ],
-          [
-           "A standard Rubik’s cube has been broken into cubes"
-          ],
-          [
-           "According to the USGS, in what year was the Americ"
-          ],
-          [
-           "The attached Excel file contains the sales of menu"
-          ],
-          [
-           "I'm curious about how much information is availabl"
-          ],
-          [
-           "What percentage of the total penguin population ac"
-          ],
-          [
-           "As of May 2023, how many stops are between South S"
-          ],
-          [
-           "According to Openreview.net, at the NeurIPS 2022 C"
-          ],
-          [
-           "Of the cities within the United States where U.S. "
-          ],
-          [
-           "Who are the pitchers with the number before and af"
-          ],
-          [
-           "In the 2015 Metropolitan Museum of Art exhibition "
-          ],
-          [
-           "On June 6, 2023, an article by Carolyn Collins Pet"
-          ],
-          [
-           "What is the area of the green polygon in the attac"
-          ],
-          [
-           "What is the first name of the only Malko Competiti"
-          ],
-          [
-           "The brand that makes these harnesses the dogs are "
-          ],
-          [
-           "The year is 2022. I am at the National Air and Spa"
-          ],
-          [
-           "What was the actual enrollment count of the clinic"
-          ],
-          [
-           "What was the complete title of the book in which t"
-          ],
-          [
-           "Bob was invited to participate in a game show, and"
-          ],
-          [
-           "In NASA's Astronomy Picture of the Day on 2006 Jan"
-          ],
-          [
-           "At the two-minute mark in the YouTube video upload"
-          ],
-          [
-           "In the film Goldfinger, what color was the object "
-          ],
-          [
-           "A 5-man group made up of one tank, one healer, and"
-          ]
-         ],
-         "hovertemplate": "agent_name=code_o3-mini_03_february_remove-navigational<br>index=%{x}<br>is_correct=%{y}<br>question=%{customdata[0]}<extra></extra>",
-         "legendgroup": "code_o3-mini_03_february_remove-navigational",
-         "line": {
-          "color": "#636efa",
-          "dash": "solid"
-         },
-         "marker": {
-          "symbol": "circle"
-         },
-         "mode": "lines",
-         "name": "code_o3-mini_03_february_remove-navigational",
-         "showlegend": true,
-         "type": "scattergl",
-         "x": {
-          "bdata": "AAABAAIAAwAEAAUABgAHAAgACQAKAAsADAANAA4ADwAQABEAEgATABQAFQAWABcAGAAZABoAGwAcAB0AHgAfACAAIQAiACMAJAAlACYAJwAoACkAKgArACwALQAuAC8AMAAxADIAMwA0ADUANgA3ADgAOQA6ADsAPAA9AD4APwBAAEEAQgBDAEQARQBGAEcASABJAEoASwBMAE0ATgBPAFAAUQBSAFMAVABVAFYAVwBYAFkAWgBbAFwAXQBeAF8AYABhAGIAYwBkAGUAZgBnAGgAaQBqAGsAbABtAG4AbwBwAHEAcgBzAHQAdQB2AHcAeAB5AHoAewB8AH0AfgB/AIAAgQCCAIMAhACFAIYAhwCIAIkAigCLAIwAjQCOAI8AkACRAJIAkwCUAJUAlgCXAJgAmQCaAJsAnACdAJ4AnwCgAKEAogCjAKQA",
-          "dtype": "i2"
-         },
-         "xaxis": "x",
-         "y": {
-          "bdata": "AAAAAAAAAAAAAAAAAADgP1VVVVVVVdU/AAAAAAAA0D+amZmZmZnJP1VVVVVVVcU/kiRJkiRJwj8AAAAAAADAPxzHcRzHccw/mpmZmZmZyT9GF1100UXHPwAAAAAAANA/FDuxEzux0z+SJEmSJEnSP1VVVVVVVdU/AAAAAAAA1D/T0tLS0tLSP3Icx3Ecx9E/XkN5DeU11D9mZmZmZmbWP1VVVVVVVdU/RhdddNFF1z9kIQtZyELWP1VVVVVVVdU/exSuR+F61D8UO7ETO7HTP2gvob2E9tI/kiRJkiRJ0j+WexphuafRPzMzMzMzM9M/lVJKKaWU0j8AAAAAAADSP2WTTTbZZNM/09LS0tLS0j+SJEmSJEnSP3Icx3Ecx9E/whT5rBtM0T9sKK+hvIbSP9IgDdIgDdI/mpmZmZmZ0T+7ErUrUbvSP5IkSZIkSdI/1pQ1ZU1Z0z9ddNFFF13UP5Q+6ZM+6dM/OL3pTW960z9MriAmVxDTP6uqqqqqqtI/kiRJkiRJ0j8zMzMzMzPTP9PS0tLS0tI/FDuxEzux0z/BeCv7HFLTP19CewntJdQ/yFOCPCXI0z/btm3btm3TP2cxncV0FtM/Ccs9jbDc0z/vy2MrgobTPzMzMzMzM9M/JkOwjjbv0z+llFJKKaXUP1VVVVVVVdU/AAAAAAAA1T+WWqmVWqnVP1VVVVVVVdU/F341JtID1T+mpaWlpaXVP1VVVVVVVdU/Fl/xFV/x1T9ItMLmQKLVP1VVVVVVVdU/r169evXq1T/yWTeYIp/VP1VVVVVVVdU/2FBeQ3kN1T/sHVMDe8fUP1VVVVVVVdU/ICod2TMQ1T/NzMzMzMzUPwaebs0Pi9Q/S9SuRO1K1D/6tYdcVgzUPyVJkiRJktQ/VFRUVFRU1D8GfUFf0BfUPwnLPY2w3NM/o4suuuii0z83talNbWrTP5Q+6ZM+6dM/VEZlVEZl1D9DFrKQhSzUP6WUUkoppdQ/Ut/ZqO9s1D8mTv2eW+LUP1VVVVVVVdU/Ffji6gcd1T85BS+n4OXUP1VVVVVVVdU/H4XrUbge1T/L8I0oMOnUP7W0tLS0tNQ/k/OyiE8g1T/FTuzETuzUP5VLuZRLudQ/E4y3ss8h1T9RGh+ZQO/UP1VVVVVVVdU/NFIxtzoj1T+HtW9Y+4bVP1VVVVVVVdU/SZIkSZIk1T/DSk8trPTUP1pMZzGdxdQ/SeXDuF+X1D/mnkZY7mnUP9RDPdRDPdQ/J3VfHlsR1D+U3W+U3W/UP0RERERERNQ/69khcGMZ1D8mQ7CONu/TP0vUrkTtStQ/IYQQQggh1D97FK5H4XrUPxRFURRFUdQ/CoVCoVAo1D8AAAAAAADUP/aEPWFP2NM/FDuxEzux0z+Hae6Cv4rTP+GDDz744NM/qzut7rS60z9+NSbSA5XTP/42xajhb9M/S0tLS0tL0z8xNguqPSfTPzDXDsy1A9M/UfxFzrDg0j8zMzMzMzPTP0yuICZXENM/K2wOJFph0z8UO7ETO7HTPwAAAAAAANQ/Ccs9jbDc0z+hQoUKFSrUPwJl4kr3BtQ/RT7rBlPk0z8M1XTMJcLTP6DTBjptoNM/n65P16fr0z+ivIbyGsrTP1T+qFP+qNM/zspPiLPy0z/SExw9wdHTPxQ7sRM7sdM/Qbgb+x6R0z/jJszvanHTP8F4K/scUtM/MzMzMzMz0z9Wigm6qxTTP2gvob2E9tI/n6lcd7zY0j+7ErUrUbvSP54S5ClBntI/",
-          "dtype": "f8"
-         },
-         "yaxis": "y"
-        },
-        {
-         "customdata": [
-          [
-           "A paper about AI regulation that was originally su"
-          ],
-          [
-           "If we assume all articles published by Nature in 2"
-          ],
-          [
-           "In Unlambda, what exact charcter or text needs to "
-          ],
-          [
-           "I’m researching species that became invasive after"
-          ],
-          [
-           "The attached spreadsheet shows the inventory for a"
-          ],
-          [
-           "How many studio albums were published by Mercedes "
-          ],
-          [
-           "If Eliud Kipchoge could maintain his record-making"
-          ],
-          [
-           "The object in the British Museum's collection with"
-          ],
-          [
-           "According to github, when was Regression added to "
-          ],
-          [
-           "Here's a fun riddle that I think you'll enjoy.\n\nYo"
-          ],
-          [
-           "Using the Biopython library in Python, parse the P"
-          ],
-          [
-           "What are the EC numbers of the two most commonly u"
-          ],
-          [
-           "In July 2, 1959 United States standards for grades"
-          ],
-          [
-           "In April of 1977, who was the Prime Minister of th"
-          ],
-          [
-           "Use density measures from the chemistry materials "
-          ],
-          [
-           "What was the volume in m^3 of the fish bag that wa"
-          ],
-          [
-           "What is the average number of pre-2020 works on th"
-          ],
-          [
-           "In the video https://www.youtube.com/watch?v=L1vXC"
-          ],
-          [
-           "Of the authors (First M. Last) that worked on the "
-          ],
-          [
-           "When you take the average of the standard populati"
-          ],
-          [
-           "Assuming scientists in the famous youtube video Th"
-          ],
-          [
-           "In Series 9, Episode 11 of Doctor Who, the Doctor "
-          ],
-          [
-           "In terms of geographical distance between capital "
-          ],
-          [
-           "In the NCATS PubChem compound database for Food Ad"
-          ],
-          [
-           "I need to fact-check a citation. This is the citat"
-          ],
-          [
-           "Which contributor to the version of OpenCV where s"
-          ],
-          [
-           "What integer-rounded percentage of the total lengt"
-          ],
-          [
-           "An office held a Secret Santa gift exchange where "
-          ],
-          [
-           "What is the maximum length in meters of #9 in the "
-          ],
-          [
-           "What two-word type of model did Manash Pratim Kash"
-          ],
-          [
-           "What animals that were mentioned in both Ilias Lag"
-          ],
-          [
-           "How many High Energy Physics - Lattice articles li"
-          ],
-          [
-           "The photograph in the Whitney Museum of American A"
-          ],
-          [
-           ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti"
-          ],
-          [
-           "What is the minimum number of page links a person "
-          ],
-          [
-           "Each cell in the attached spreadsheet represents a"
-          ],
-          [
-           "Which of the text elements under CATEGORIES in the"
-          ],
-          [
-           "I went to Virtue restaurant & bar in Chicago for m"
-          ],
-          [
-           "¬(A ∧ B) ↔ (¬A ∨ ¬B)\n¬(A ∨ B) ↔ (¬A ∧ ¬B)\n(A → B) "
-          ],
-          [
-           "My family reunion is this week, and I was assigned"
-          ],
-          [
-           "In Emily Midkiff's June 2014 article in a journal "
-          ],
-          [
-           "It is 1999. Before you party like it is 1999, plea"
-          ],
-          [
-           "Under DDC 633 on Bielefeld University Library's BA"
-          ]
-         ],
-         "hovertemplate": "agent_name=code_qwen-coder-32B_03_february_text<br>index=%{x}<br>is_correct=%{y}<br>question=%{customdata[0]}<extra></extra>",
-         "legendgroup": "code_qwen-coder-32B_03_february_text",
-         "line": {
-          "color": "#EF553B",
-          "dash": "solid"
-         },
-         "marker": {
-          "symbol": "circle"
-         },
-         "mode": "lines",
-         "name": "code_qwen-coder-32B_03_february_text",
-         "showlegend": true,
-         "type": "scattergl",
-         "x": {
-          "bdata": "AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKg==",
-          "dtype": "i1"
-         },
-         "xaxis": "x",
-         "y": {
-          "bdata": "AAAAAAAAAAAAAAAAAADgP1VVVVVVVdU/AAAAAAAA0D+amZmZmZnZP1VVVVVVVdU/kiRJkiRJ0j8AAAAAAADQPxzHcRzHccw/mpmZmZmZyT9GF1100UXHP1VVVVVVVcU/FDuxEzuxwz+SJEmSJEnCP5qZmZmZmck/AAAAAAAA0D8eHh4eHh7OPxzHcRzHccw/KK+hvIbyyj+amZmZmZnJP57neZ7nec4/F1100UUXzT+96U1vetPLP6uqqqqqqso/mpmZmZmZyT/ZiZ3YiZ3IP0J7Ce0ltMc/t23btm3bxj98GmG5pxHGP1VVVVVVVcU/pZRSSimlxD8AAAAAAADEP2WTTTbZZMM/l5aWlpaWxj8WX/EVX/HFPzmO4ziO48g/doMp8lk3yD9DeQ3lNZTHPxqkQRqkQco/zczMzMzMzD8ZnI/B+RjMP9u2bdu2bcs/s6asKWvKyj8=",
-          "dtype": "f8"
-         },
-         "yaxis": "y"
-        }
-       ],
-       "layout": {
-        "legend": {
-         "title": {
-          "text": "agent_name"
-         },
-         "tracegroupgap": 0
-        },
-        "margin": {
-         "t": 60
-        },
-        "template": {
-         "data": {
-          "bar": [
-           {
-            "error_x": {
-             "color": "#2a3f5f"
-            },
-            "error_y": {
-             "color": "#2a3f5f"
-            },
-            "marker": {
-             "line": {
-              "color": "#E5ECF6",
-              "width": 0.5
-             },
-             "pattern": {
-              "fillmode": "overlay",
-              "size": 10,
-              "solidity": 0.2
-             }
-            },
-            "type": "bar"
-           }
-          ],
-          "barpolar": [
-           {
-            "marker": {
-             "line": {
-              "color": "#E5ECF6",
-              "width": 0.5
-             },
-             "pattern": {
-              "fillmode": "overlay",
-              "size": 10,
-              "solidity": 0.2
-             }
-            },
-            "type": "barpolar"
-           }
-          ],
-          "carpet": [
-           {
-            "aaxis": {
-             "endlinecolor": "#2a3f5f",
-             "gridcolor": "white",
-             "linecolor": "white",
-             "minorgridcolor": "white",
-             "startlinecolor": "#2a3f5f"
-            },
-            "baxis": {
-             "endlinecolor": "#2a3f5f",
-             "gridcolor": "white",
-             "linecolor": "white",
-             "minorgridcolor": "white",
-             "startlinecolor": "#2a3f5f"
-            },
-            "type": "carpet"
-           }
-          ],
-          "choropleth": [
-           {
-            "colorbar": {
-             "outlinewidth": 0,
-             "ticks": ""
-            },
-            "type": "choropleth"
-           }
-          ],
-          "contour": [
-           {
-            "colorbar": {
-             "outlinewidth": 0,
-             "ticks": ""
-            },
-            "colorscale": [
-             [
-              0,
-              "#0d0887"
-             ],
-             [
-              0.1111111111111111,
-              "#46039f"
-             ],
-             [
-              0.2222222222222222,
-              "#7201a8"
-             ],
-             [
-              0.3333333333333333,
-              "#9c179e"
-             ],
-             [
-              0.4444444444444444,
-              "#bd3786"
-             ],
-             [
-              0.5555555555555556,
-              "#d8576b"
-             ],
-             [
-              0.6666666666666666,
-              "#ed7953"
-             ],
-             [
-              0.7777777777777778,
-              "#fb9f3a"
-             ],
-             [
-              0.8888888888888888,
-              "#fdca26"
-             ],
-             [
-              1,
-              "#f0f921"
-             ]
-            ],
-            "type": "contour"
-           }
-          ],
-          "contourcarpet": [
-           {
-            "colorbar": {
-             "outlinewidth": 0,
-             "ticks": ""
-            },
-            "type": "contourcarpet"
-           }
-          ],
-          "heatmap": [
-           {
-            "colorbar": {
-             "outlinewidth": 0,
-             "ticks": ""
-            },
-            "colorscale": [
-             [
-              0,
-              "#0d0887"
-             ],
-             [
-              0.1111111111111111,
-              "#46039f"
-             ],
-             [
-              0.2222222222222222,
-              "#7201a8"
-             ],
-             [
-              0.3333333333333333,
-              "#9c179e"
-             ],
-             [
-              0.4444444444444444,
-              "#bd3786"
-             ],
-             [
-              0.5555555555555556,
-              "#d8576b"
-             ],
-             [
-              0.6666666666666666,
-              "#ed7953"
-             ],
-             [
-              0.7777777777777778,
-              "#fb9f3a"
-             ],
-             [
-              0.8888888888888888,
-              "#fdca26"
-             ],
-             [
-              1,
-              "#f0f921"
-             ]
-            ],
-            "type": "heatmap"
-           }
-          ],
-          "histogram": [
-           {
-            "marker": {
-             "pattern": {
-              "fillmode": "overlay",
-              "size": 10,
-              "solidity": 0.2
-             }
-            },
-            "type": "histogram"
-           }
-          ],
-          "histogram2d": [
-           {
-            "colorbar": {
-             "outlinewidth": 0,
-             "ticks": ""
-            },
-            "colorscale": [
-             [
-              0,
-              "#0d0887"
-             ],
-             [
-              0.1111111111111111,
-              "#46039f"
-             ],
-             [
-              0.2222222222222222,
-              "#7201a8"
-             ],
-             [
-              0.3333333333333333,
-              "#9c179e"
-             ],
-             [
-              0.4444444444444444,
-              "#bd3786"
-             ],
-             [
-              0.5555555555555556,
-              "#d8576b"
-             ],
-             [
-              0.6666666666666666,
-              "#ed7953"
-             ],
-             [
-              0.7777777777777778,
-              "#fb9f3a"
-             ],
-             [
-              0.8888888888888888,
-              "#fdca26"
-             ],
-             [
-              1,
-              "#f0f921"
-             ]
-            ],
-            "type": "histogram2d"
-           }
-          ],
-          "histogram2dcontour": [
-           {
-            "colorbar": {
-             "outlinewidth": 0,
-             "ticks": ""
-            },
-            "colorscale": [
-             [
-              0,
-              "#0d0887"
-             ],
-             [
-              0.1111111111111111,
-              "#46039f"
-             ],
-             [
-              0.2222222222222222,
-              "#7201a8"
-             ],
-             [
-              0.3333333333333333,
-              "#9c179e"
-             ],
-             [
-              0.4444444444444444,
-              "#bd3786"
-             ],
-             [
-              0.5555555555555556,
-              "#d8576b"
-             ],
-             [
-              0.6666666666666666,
-              "#ed7953"
-             ],
-             [
-              0.7777777777777778,
-              "#fb9f3a"
-             ],
-             [
-              0.8888888888888888,
-              "#fdca26"
-             ],
-             [
-              1,
-              "#f0f921"
-             ]
-            ],
-            "type": "histogram2dcontour"
-           }
-          ],
-          "mesh3d": [
-           {
-            "colorbar": {
-             "outlinewidth": 0,
-             "ticks": ""
-            },
-            "type": "mesh3d"
-           }
-          ],
-          "parcoords": [
-           {
-            "line": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "parcoords"
-           }
-          ],
-          "pie": [
-           {
-            "automargin": true,
-            "type": "pie"
-           }
-          ],
-          "scatter": [
-           {
-            "fillpattern": {
-             "fillmode": "overlay",
-             "size": 10,
-             "solidity": 0.2
-            },
-            "type": "scatter"
-           }
-          ],
-          "scatter3d": [
-           {
-            "line": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "marker": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "scatter3d"
-           }
-          ],
-          "scattercarpet": [
-           {
-            "marker": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "scattercarpet"
-           }
-          ],
-          "scattergeo": [
-           {
-            "marker": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "scattergeo"
-           }
-          ],
-          "scattergl": [
-           {
-            "marker": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "scattergl"
-           }
-          ],
-          "scattermap": [
-           {
-            "marker": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "scattermap"
-           }
-          ],
-          "scattermapbox": [
-           {
-            "marker": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "scattermapbox"
-           }
-          ],
-          "scatterpolar": [
-           {
-            "marker": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "scatterpolar"
-           }
-          ],
-          "scatterpolargl": [
-           {
-            "marker": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "scatterpolargl"
-           }
-          ],
-          "scatterternary": [
-           {
-            "marker": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "scatterternary"
-           }
-          ],
-          "surface": [
-           {
-            "colorbar": {
-             "outlinewidth": 0,
-             "ticks": ""
-            },
-            "colorscale": [
-             [
-              0,
-              "#0d0887"
-             ],
-             [
-              0.1111111111111111,
-              "#46039f"
-             ],
-             [
-              0.2222222222222222,
-              "#7201a8"
-             ],
-             [
-              0.3333333333333333,
-              "#9c179e"
-             ],
-             [
-              0.4444444444444444,
-              "#bd3786"
-             ],
-             [
-              0.5555555555555556,
-              "#d8576b"
-             ],
-             [
-              0.6666666666666666,
-              "#ed7953"
-             ],
-             [
-              0.7777777777777778,
-              "#fb9f3a"
-             ],
-             [
-              0.8888888888888888,
-              "#fdca26"
-             ],
-             [
-              1,
-              "#f0f921"
-             ]
-            ],
-            "type": "surface"
-           }
-          ],
-          "table": [
-           {
-            "cells": {
-             "fill": {
-              "color": "#EBF0F8"
-             },
-             "line": {
-              "color": "white"
-             }
-            },
-            "header": {
-             "fill": {
-              "color": "#C8D4E3"
-             },
-             "line": {
-              "color": "white"
-             }
-            },
-            "type": "table"
-           }
-          ]
-         },
-         "layout": {
-          "annotationdefaults": {
-           "arrowcolor": "#2a3f5f",
-           "arrowhead": 0,
-           "arrowwidth": 1
-          },
-          "autotypenumbers": "strict",
-          "coloraxis": {
-           "colorbar": {
-            "outlinewidth": 0,
-            "ticks": ""
-           }
-          },
-          "colorscale": {
-           "diverging": [
-            [
-             0,
-             "#8e0152"
-            ],
-            [
-             0.1,
-             "#c51b7d"
-            ],
-            [
-             0.2,
-             "#de77ae"
-            ],
-            [
-             0.3,
-             "#f1b6da"
-            ],
-            [
-             0.4,
-             "#fde0ef"
-            ],
-            [
-             0.5,
-             "#f7f7f7"
-            ],
-            [
-             0.6,
-             "#e6f5d0"
-            ],
-            [
-             0.7,
-             "#b8e186"
-            ],
-            [
-             0.8,
-             "#7fbc41"
-            ],
-            [
-             0.9,
-             "#4d9221"
-            ],
-            [
-             1,
-             "#276419"
-            ]
-           ],
-           "sequential": [
-            [
-             0,
-             "#0d0887"
-            ],
-            [
-             0.1111111111111111,
-             "#46039f"
-            ],
-            [
-             0.2222222222222222,
-             "#7201a8"
-            ],
-            [
-             0.3333333333333333,
-             "#9c179e"
-            ],
-            [
-             0.4444444444444444,
-             "#bd3786"
-            ],
-            [
-             0.5555555555555556,
-             "#d8576b"
-            ],
-            [
-             0.6666666666666666,
-             "#ed7953"
-            ],
-            [
-             0.7777777777777778,
-             "#fb9f3a"
-            ],
-            [
-             0.8888888888888888,
-             "#fdca26"
-            ],
-            [
-             1,
-             "#f0f921"
-            ]
-           ],
-           "sequentialminus": [
-            [
-             0,
-             "#0d0887"
-            ],
-            [
-             0.1111111111111111,
-             "#46039f"
-            ],
-            [
-             0.2222222222222222,
-             "#7201a8"
-            ],
-            [
-             0.3333333333333333,
-             "#9c179e"
-            ],
-            [
-             0.4444444444444444,
-             "#bd3786"
-            ],
-            [
-             0.5555555555555556,
-             "#d8576b"
-            ],
-            [
-             0.6666666666666666,
-             "#ed7953"
-            ],
-            [
-             0.7777777777777778,
-             "#fb9f3a"
-            ],
-            [
-             0.8888888888888888,
-             "#fdca26"
-            ],
-            [
-             1,
-             "#f0f921"
-            ]
-           ]
-          },
-          "colorway": [
-           "#636efa",
-           "#EF553B",
-           "#00cc96",
-           "#ab63fa",
-           "#FFA15A",
-           "#19d3f3",
-           "#FF6692",
-           "#B6E880",
-           "#FF97FF",
-           "#FECB52"
-          ],
-          "font": {
-           "color": "#2a3f5f"
-          },
-          "geo": {
-           "bgcolor": "white",
-           "lakecolor": "white",
-           "landcolor": "#E5ECF6",
-           "showlakes": true,
-           "showland": true,
-           "subunitcolor": "white"
-          },
-          "hoverlabel": {
-           "align": "left"
-          },
-          "hovermode": "closest",
-          "mapbox": {
-           "style": "light"
-          },
-          "paper_bgcolor": "white",
-          "plot_bgcolor": "#E5ECF6",
-          "polar": {
-           "angularaxis": {
-            "gridcolor": "white",
-            "linecolor": "white",
-            "ticks": ""
-           },
-           "bgcolor": "#E5ECF6",
-           "radialaxis": {
-            "gridcolor": "white",
-            "linecolor": "white",
-            "ticks": ""
-           }
-          },
-          "scene": {
-           "xaxis": {
-            "backgroundcolor": "#E5ECF6",
-            "gridcolor": "white",
-            "gridwidth": 2,
-            "linecolor": "white",
-            "showbackground": true,
-            "ticks": "",
-            "zerolinecolor": "white"
-           },
-           "yaxis": {
-            "backgroundcolor": "#E5ECF6",
-            "gridcolor": "white",
-            "gridwidth": 2,
-            "linecolor": "white",
-            "showbackground": true,
-            "ticks": "",
-            "zerolinecolor": "white"
-           },
-           "zaxis": {
-            "backgroundcolor": "#E5ECF6",
-            "gridcolor": "white",
-            "gridwidth": 2,
-            "linecolor": "white",
-            "showbackground": true,
-            "ticks": "",
-            "zerolinecolor": "white"
-           }
-          },
-          "shapedefaults": {
-           "line": {
-            "color": "#2a3f5f"
-           }
-          },
-          "ternary": {
-           "aaxis": {
-            "gridcolor": "white",
-            "linecolor": "white",
-            "ticks": ""
-           },
-           "baxis": {
-            "gridcolor": "white",
-            "linecolor": "white",
-            "ticks": ""
-           },
-           "bgcolor": "#E5ECF6",
-           "caxis": {
-            "gridcolor": "white",
-            "linecolor": "white",
-            "ticks": ""
-           }
-          },
-          "title": {
-           "x": 0.05
-          },
-          "xaxis": {
-           "automargin": true,
-           "gridcolor": "white",
-           "linecolor": "white",
-           "ticks": "",
-           "title": {
-            "standoff": 15
-           },
-           "zerolinecolor": "white",
-           "zerolinewidth": 2
-          },
-          "yaxis": {
-           "automargin": true,
-           "gridcolor": "white",
-           "linecolor": "white",
-           "ticks": "",
-           "title": {
-            "standoff": 15
-           },
-           "zerolinecolor": "white",
-           "zerolinewidth": 2
-          }
-         }
-        },
-        "xaxis": {
-         "anchor": "y",
-         "domain": [
-          0,
-          1
-         ],
-         "title": {
-          "text": "index"
-         }
-        },
-        "yaxis": {
-         "anchor": "x",
-         "domain": [
-          0,
-          1
-         ],
-         "title": {
-          "text": "is_correct"
-         }
-        }
-       }
-      }
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "import plotly.express as px\n",
     "\n",
@@ -10788,9 +245,6 @@
     "\n",
     "\n",
     "cumulative_df[\"question\"] = cumulative_df.apply(find_question, axis=1)\n",
-    "# cumulative_df[\"question\"] = [el[:50] for el in sel_df[\"question\"].values]\n",
-    "\n",
-    "# cumulative_df[\"is_correct\"] = cumulative_df[\"is_correct\"] * (165 - 68) / 165\n",
     "\n",
     "px.line(\n",
     "    cumulative_df,\n",
@@ -10810,19 +264,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "165\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
-    "sel_df = result_df.loc[result_df[\"agent_name\"] == o1]\n",
+    "sel_df = result_df.loc[result_df[\"agent_name\"] == \"o1\"]\n",
     "print(len(sel_df))"
    ]
   },
@@ -10835,56 +281,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/var/folders/6m/9b1tts6d5w960j80wbw9tx3m0000gn/T/ipykernel_25011/2022001392.py:10: SettingWithCopyWarning:\n",
-      "\n",
-      "\n",
-      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
-      "Try using .loc[row_indexer,col_indexer] = value instead\n",
-      "\n",
-      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
-      "\n",
-      "/var/folders/6m/9b1tts6d5w960j80wbw9tx3m0000gn/T/ipykernel_25011/2022001392.py:10: SettingWithCopyWarning:\n",
-      "\n",
-      "\n",
-      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
-      "Try using .loc[row_indexer,col_indexer] = value instead\n",
-      "\n",
-      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
-      "\n",
-      "/var/folders/6m/9b1tts6d5w960j80wbw9tx3m0000gn/T/ipykernel_25011/2022001392.py:10: SettingWithCopyWarning:\n",
-      "\n",
-      "\n",
-      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
-      "Try using .loc[row_indexer,col_indexer] = value instead\n",
-      "\n",
-      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
-      "\n",
-      "/var/folders/6m/9b1tts6d5w960j80wbw9tx3m0000gn/T/ipykernel_25011/2022001392.py:10: SettingWithCopyWarning:\n",
-      "\n",
-      "\n",
-      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
-      "Try using .loc[row_indexer,col_indexer] = value instead\n",
-      "\n",
-      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
-      "\n",
-      "/var/folders/6m/9b1tts6d5w960j80wbw9tx3m0000gn/T/ipykernel_25011/2022001392.py:11: SettingWithCopyWarning:\n",
-      "\n",
-      "\n",
-      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
-      "Try using .loc[row_indexer,col_indexer] = value instead\n",
-      "\n",
-      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import numpy as np\n",
     "\n",
@@ -10916,890 +315,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.plotly.v1+json": {
-       "config": {
-        "plotlyServerURL": "https://plot.ly"
-       },
-       "data": [
-        {
-         "hovertemplate": "is_correct=False<br>variable=%{x}<br><b>Average count</b>=%{y}<extra></extra>",
-         "legendgroup": "False",
-         "marker": {
-          "color": "#636efa",
-          "pattern": {
-           "shape": ""
-          }
-         },
-         "name": "False",
-         "orientation": "v",
-         "showlegend": true,
-         "textposition": "outside",
-         "type": "bar",
-         "x": [
-          "AgentParsingError",
-          "AgentExecutionError",
-          "AgentMaxIterationsError",
-          "AgentGenerationError",
-          "Count steps"
-         ],
-         "xaxis": "x",
-         "y": {
-          "bdata": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACSJEmSJEkMQA==",
-          "dtype": "f8"
-         },
-         "yaxis": "y"
-        },
-        {
-         "hovertemplate": "is_correct=True<br>variable=%{x}<br><b>Average count</b>=%{y}<extra></extra>",
-         "legendgroup": "True",
-         "marker": {
-          "color": "#EF553B",
-          "pattern": {
-           "shape": ""
-          }
-         },
-         "name": "True",
-         "orientation": "v",
-         "showlegend": true,
-         "textposition": "outside",
-         "type": "bar",
-         "x": [
-          "AgentParsingError",
-          "AgentExecutionError",
-          "AgentMaxIterationsError",
-          "AgentGenerationError",
-          "Count steps"
-         ],
-         "xaxis": "x",
-         "y": {
-          "bdata": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABPt+aHRcoIQA==",
-          "dtype": "f8"
-         },
-         "yaxis": "y"
-        }
-       ],
-       "layout": {
-        "bargroupgap": 0,
-        "barmode": "group",
-        "height": 500,
-        "legend": {
-         "title": {
-          "text": "is_correct"
-         },
-         "tracegroupgap": 0
-        },
-        "margin": {
-         "t": 60
-        },
-        "template": {
-         "data": {
-          "bar": [
-           {
-            "error_x": {
-             "color": "#2a3f5f"
-            },
-            "error_y": {
-             "color": "#2a3f5f"
-            },
-            "marker": {
-             "line": {
-              "color": "#E5ECF6",
-              "width": 0.5
-             },
-             "pattern": {
-              "fillmode": "overlay",
-              "size": 10,
-              "solidity": 0.2
-             }
-            },
-            "type": "bar"
-           }
-          ],
-          "barpolar": [
-           {
-            "marker": {
-             "line": {
-              "color": "#E5ECF6",
-              "width": 0.5
-             },
-             "pattern": {
-              "fillmode": "overlay",
-              "size": 10,
-              "solidity": 0.2
-             }
-            },
-            "type": "barpolar"
-           }
-          ],
-          "carpet": [
-           {
-            "aaxis": {
-             "endlinecolor": "#2a3f5f",
-             "gridcolor": "white",
-             "linecolor": "white",
-             "minorgridcolor": "white",
-             "startlinecolor": "#2a3f5f"
-            },
-            "baxis": {
-             "endlinecolor": "#2a3f5f",
-             "gridcolor": "white",
-             "linecolor": "white",
-             "minorgridcolor": "white",
-             "startlinecolor": "#2a3f5f"
-            },
-            "type": "carpet"
-           }
-          ],
-          "choropleth": [
-           {
-            "colorbar": {
-             "outlinewidth": 0,
-             "ticks": ""
-            },
-            "type": "choropleth"
-           }
-          ],
-          "contour": [
-           {
-            "colorbar": {
-             "outlinewidth": 0,
-             "ticks": ""
-            },
-            "colorscale": [
-             [
-              0,
-              "#0d0887"
-             ],
-             [
-              0.1111111111111111,
-              "#46039f"
-             ],
-             [
-              0.2222222222222222,
-              "#7201a8"
-             ],
-             [
-              0.3333333333333333,
-              "#9c179e"
-             ],
-             [
-              0.4444444444444444,
-              "#bd3786"
-             ],
-             [
-              0.5555555555555556,
-              "#d8576b"
-             ],
-             [
-              0.6666666666666666,
-              "#ed7953"
-             ],
-             [
-              0.7777777777777778,
-              "#fb9f3a"
-             ],
-             [
-              0.8888888888888888,
-              "#fdca26"
-             ],
-             [
-              1,
-              "#f0f921"
-             ]
-            ],
-            "type": "contour"
-           }
-          ],
-          "contourcarpet": [
-           {
-            "colorbar": {
-             "outlinewidth": 0,
-             "ticks": ""
-            },
-            "type": "contourcarpet"
-           }
-          ],
-          "heatmap": [
-           {
-            "colorbar": {
-             "outlinewidth": 0,
-             "ticks": ""
-            },
-            "colorscale": [
-             [
-              0,
-              "#0d0887"
-             ],
-             [
-              0.1111111111111111,
-              "#46039f"
-             ],
-             [
-              0.2222222222222222,
-              "#7201a8"
-             ],
-             [
-              0.3333333333333333,
-              "#9c179e"
-             ],
-             [
-              0.4444444444444444,
-              "#bd3786"
-             ],
-             [
-              0.5555555555555556,
-              "#d8576b"
-             ],
-             [
-              0.6666666666666666,
-              "#ed7953"
-             ],
-             [
-              0.7777777777777778,
-              "#fb9f3a"
-             ],
-             [
-              0.8888888888888888,
-              "#fdca26"
-             ],
-             [
-              1,
-              "#f0f921"
-             ]
-            ],
-            "type": "heatmap"
-           }
-          ],
-          "histogram": [
-           {
-            "marker": {
-             "pattern": {
-              "fillmode": "overlay",
-              "size": 10,
-              "solidity": 0.2
-             }
-            },
-            "type": "histogram"
-           }
-          ],
-          "histogram2d": [
-           {
-            "colorbar": {
-             "outlinewidth": 0,
-             "ticks": ""
-            },
-            "colorscale": [
-             [
-              0,
-              "#0d0887"
-             ],
-             [
-              0.1111111111111111,
-              "#46039f"
-             ],
-             [
-              0.2222222222222222,
-              "#7201a8"
-             ],
-             [
-              0.3333333333333333,
-              "#9c179e"
-             ],
-             [
-              0.4444444444444444,
-              "#bd3786"
-             ],
-             [
-              0.5555555555555556,
-              "#d8576b"
-             ],
-             [
-              0.6666666666666666,
-              "#ed7953"
-             ],
-             [
-              0.7777777777777778,
-              "#fb9f3a"
-             ],
-             [
-              0.8888888888888888,
-              "#fdca26"
-             ],
-             [
-              1,
-              "#f0f921"
-             ]
-            ],
-            "type": "histogram2d"
-           }
-          ],
-          "histogram2dcontour": [
-           {
-            "colorbar": {
-             "outlinewidth": 0,
-             "ticks": ""
-            },
-            "colorscale": [
-             [
-              0,
-              "#0d0887"
-             ],
-             [
-              0.1111111111111111,
-              "#46039f"
-             ],
-             [
-              0.2222222222222222,
-              "#7201a8"
-             ],
-             [
-              0.3333333333333333,
-              "#9c179e"
-             ],
-             [
-              0.4444444444444444,
-              "#bd3786"
-             ],
-             [
-              0.5555555555555556,
-              "#d8576b"
-             ],
-             [
-              0.6666666666666666,
-              "#ed7953"
-             ],
-             [
-              0.7777777777777778,
-              "#fb9f3a"
-             ],
-             [
-              0.8888888888888888,
-              "#fdca26"
-             ],
-             [
-              1,
-              "#f0f921"
-             ]
-            ],
-            "type": "histogram2dcontour"
-           }
-          ],
-          "mesh3d": [
-           {
-            "colorbar": {
-             "outlinewidth": 0,
-             "ticks": ""
-            },
-            "type": "mesh3d"
-           }
-          ],
-          "parcoords": [
-           {
-            "line": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "parcoords"
-           }
-          ],
-          "pie": [
-           {
-            "automargin": true,
-            "type": "pie"
-           }
-          ],
-          "scatter": [
-           {
-            "fillpattern": {
-             "fillmode": "overlay",
-             "size": 10,
-             "solidity": 0.2
-            },
-            "type": "scatter"
-           }
-          ],
-          "scatter3d": [
-           {
-            "line": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "marker": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "scatter3d"
-           }
-          ],
-          "scattercarpet": [
-           {
-            "marker": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "scattercarpet"
-           }
-          ],
-          "scattergeo": [
-           {
-            "marker": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "scattergeo"
-           }
-          ],
-          "scattergl": [
-           {
-            "marker": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "scattergl"
-           }
-          ],
-          "scattermap": [
-           {
-            "marker": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "scattermap"
-           }
-          ],
-          "scattermapbox": [
-           {
-            "marker": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "scattermapbox"
-           }
-          ],
-          "scatterpolar": [
-           {
-            "marker": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "scatterpolar"
-           }
-          ],
-          "scatterpolargl": [
-           {
-            "marker": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "scatterpolargl"
-           }
-          ],
-          "scatterternary": [
-           {
-            "marker": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "scatterternary"
-           }
-          ],
-          "surface": [
-           {
-            "colorbar": {
-             "outlinewidth": 0,
-             "ticks": ""
-            },
-            "colorscale": [
-             [
-              0,
-              "#0d0887"
-             ],
-             [
-              0.1111111111111111,
-              "#46039f"
-             ],
-             [
-              0.2222222222222222,
-              "#7201a8"
-             ],
-             [
-              0.3333333333333333,
-              "#9c179e"
-             ],
-             [
-              0.4444444444444444,
-              "#bd3786"
-             ],
-             [
-              0.5555555555555556,
-              "#d8576b"
-             ],
-             [
-              0.6666666666666666,
-              "#ed7953"
-             ],
-             [
-              0.7777777777777778,
-              "#fb9f3a"
-             ],
-             [
-              0.8888888888888888,
-              "#fdca26"
-             ],
-             [
-              1,
-              "#f0f921"
-             ]
-            ],
-            "type": "surface"
-           }
-          ],
-          "table": [
-           {
-            "cells": {
-             "fill": {
-              "color": "#EBF0F8"
-             },
-             "line": {
-              "color": "white"
-             }
-            },
-            "header": {
-             "fill": {
-              "color": "#C8D4E3"
-             },
-             "line": {
-              "color": "white"
-             }
-            },
-            "type": "table"
-           }
-          ]
-         },
-         "layout": {
-          "annotationdefaults": {
-           "arrowcolor": "#2a3f5f",
-           "arrowhead": 0,
-           "arrowwidth": 1
-          },
-          "autotypenumbers": "strict",
-          "coloraxis": {
-           "colorbar": {
-            "outlinewidth": 0,
-            "ticks": ""
-           }
-          },
-          "colorscale": {
-           "diverging": [
-            [
-             0,
-             "#8e0152"
-            ],
-            [
-             0.1,
-             "#c51b7d"
-            ],
-            [
-             0.2,
-             "#de77ae"
-            ],
-            [
-             0.3,
-             "#f1b6da"
-            ],
-            [
-             0.4,
-             "#fde0ef"
-            ],
-            [
-             0.5,
-             "#f7f7f7"
-            ],
-            [
-             0.6,
-             "#e6f5d0"
-            ],
-            [
-             0.7,
-             "#b8e186"
-            ],
-            [
-             0.8,
-             "#7fbc41"
-            ],
-            [
-             0.9,
-             "#4d9221"
-            ],
-            [
-             1,
-             "#276419"
-            ]
-           ],
-           "sequential": [
-            [
-             0,
-             "#0d0887"
-            ],
-            [
-             0.1111111111111111,
-             "#46039f"
-            ],
-            [
-             0.2222222222222222,
-             "#7201a8"
-            ],
-            [
-             0.3333333333333333,
-             "#9c179e"
-            ],
-            [
-             0.4444444444444444,
-             "#bd3786"
-            ],
-            [
-             0.5555555555555556,
-             "#d8576b"
-            ],
-            [
-             0.6666666666666666,
-             "#ed7953"
-            ],
-            [
-             0.7777777777777778,
-             "#fb9f3a"
-            ],
-            [
-             0.8888888888888888,
-             "#fdca26"
-            ],
-            [
-             1,
-             "#f0f921"
-            ]
-           ],
-           "sequentialminus": [
-            [
-             0,
-             "#0d0887"
-            ],
-            [
-             0.1111111111111111,
-             "#46039f"
-            ],
-            [
-             0.2222222222222222,
-             "#7201a8"
-            ],
-            [
-             0.3333333333333333,
-             "#9c179e"
-            ],
-            [
-             0.4444444444444444,
-             "#bd3786"
-            ],
-            [
-             0.5555555555555556,
-             "#d8576b"
-            ],
-            [
-             0.6666666666666666,
-             "#ed7953"
-            ],
-            [
-             0.7777777777777778,
-             "#fb9f3a"
-            ],
-            [
-             0.8888888888888888,
-             "#fdca26"
-            ],
-            [
-             1,
-             "#f0f921"
-            ]
-           ]
-          },
-          "colorway": [
-           "#636efa",
-           "#EF553B",
-           "#00cc96",
-           "#ab63fa",
-           "#FFA15A",
-           "#19d3f3",
-           "#FF6692",
-           "#B6E880",
-           "#FF97FF",
-           "#FECB52"
-          ],
-          "font": {
-           "color": "#2a3f5f"
-          },
-          "geo": {
-           "bgcolor": "white",
-           "lakecolor": "white",
-           "landcolor": "#E5ECF6",
-           "showlakes": true,
-           "showland": true,
-           "subunitcolor": "white"
-          },
-          "hoverlabel": {
-           "align": "left"
-          },
-          "hovermode": "closest",
-          "mapbox": {
-           "style": "light"
-          },
-          "paper_bgcolor": "white",
-          "plot_bgcolor": "#E5ECF6",
-          "polar": {
-           "angularaxis": {
-            "gridcolor": "white",
-            "linecolor": "white",
-            "ticks": ""
-           },
-           "bgcolor": "#E5ECF6",
-           "radialaxis": {
-            "gridcolor": "white",
-            "linecolor": "white",
-            "ticks": ""
-           }
-          },
-          "scene": {
-           "xaxis": {
-            "backgroundcolor": "#E5ECF6",
-            "gridcolor": "white",
-            "gridwidth": 2,
-            "linecolor": "white",
-            "showbackground": true,
-            "ticks": "",
-            "zerolinecolor": "white"
-           },
-           "yaxis": {
-            "backgroundcolor": "#E5ECF6",
-            "gridcolor": "white",
-            "gridwidth": 2,
-            "linecolor": "white",
-            "showbackground": true,
-            "ticks": "",
-            "zerolinecolor": "white"
-           },
-           "zaxis": {
-            "backgroundcolor": "#E5ECF6",
-            "gridcolor": "white",
-            "gridwidth": 2,
-            "linecolor": "white",
-            "showbackground": true,
-            "ticks": "",
-            "zerolinecolor": "white"
-           }
-          },
-          "shapedefaults": {
-           "line": {
-            "color": "#2a3f5f"
-           }
-          },
-          "ternary": {
-           "aaxis": {
-            "gridcolor": "white",
-            "linecolor": "white",
-            "ticks": ""
-           },
-           "baxis": {
-            "gridcolor": "white",
-            "linecolor": "white",
-            "ticks": ""
-           },
-           "bgcolor": "#E5ECF6",
-           "caxis": {
-            "gridcolor": "white",
-            "linecolor": "white",
-            "ticks": ""
-           }
-          },
-          "title": {
-           "x": 0.05
-          },
-          "xaxis": {
-           "automargin": true,
-           "gridcolor": "white",
-           "linecolor": "white",
-           "ticks": "",
-           "title": {
-            "standoff": 15
-           },
-           "zerolinecolor": "white",
-           "zerolinewidth": 2
-          },
-          "yaxis": {
-           "automargin": true,
-           "gridcolor": "white",
-           "linecolor": "white",
-           "ticks": "",
-           "title": {
-            "standoff": 15
-           },
-           "zerolinecolor": "white",
-           "zerolinewidth": 2
-          }
-         }
-        },
-        "width": 800,
-        "xaxis": {
-         "anchor": "y",
-         "domain": [
-          0,
-          1
-         ],
-         "title": {
-          "text": "variable"
-         }
-        },
-        "yaxis": {
-         "anchor": "x",
-         "domain": [
-          0,
-          1
-         ],
-         "title": {
-          "text": "<b>Average count</b>"
-         }
-        }
-       }
-      }
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "import plotly.express as px\n",
     "\n",
@@ -11841,153 +359,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>is_correct</th>\n",
-       "      <th>count_steps</th>\n",
-       "      <th>question</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>attachment_type</th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>None</th>\n",
-       "      <td>0.423799</td>\n",
-       "      <td>4.959725</td>\n",
-       "      <td>2185</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>csv</th>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>7.750000</td>\n",
-       "      <td>16</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>docx</th>\n",
-       "      <td>0.571429</td>\n",
-       "      <td>4.904762</td>\n",
-       "      <td>21</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>jpg</th>\n",
-       "      <td>0.142857</td>\n",
-       "      <td>5.750000</td>\n",
-       "      <td>28</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>jsonld</th>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>6.600000</td>\n",
-       "      <td>15</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>mp3</th>\n",
-       "      <td>0.480000</td>\n",
-       "      <td>4.500000</td>\n",
-       "      <td>50</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>pdb</th>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>4.444444</td>\n",
-       "      <td>18</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>pdf</th>\n",
-       "      <td>0.588235</td>\n",
-       "      <td>4.137255</td>\n",
-       "      <td>51</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>png</th>\n",
-       "      <td>0.216783</td>\n",
-       "      <td>4.412587</td>\n",
-       "      <td>143</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>pptx</th>\n",
-       "      <td>0.882353</td>\n",
-       "      <td>4.058824</td>\n",
-       "      <td>17</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>py</th>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>4.266667</td>\n",
-       "      <td>15</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>txt</th>\n",
-       "      <td>0.705882</td>\n",
-       "      <td>4.764706</td>\n",
-       "      <td>17</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>xlsx</th>\n",
-       "      <td>0.612745</td>\n",
-       "      <td>4.823529</td>\n",
-       "      <td>204</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>zip</th>\n",
-       "      <td>0.448276</td>\n",
-       "      <td>5.344828</td>\n",
-       "      <td>29</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                 is_correct  count_steps  question\n",
-       "attachment_type                                   \n",
-       "None               0.423799     4.959725      2185\n",
-       "csv                0.000000     7.750000        16\n",
-       "docx               0.571429     4.904762        21\n",
-       "jpg                0.142857     5.750000        28\n",
-       "jsonld             0.000000     6.600000        15\n",
-       "mp3                0.480000     4.500000        50\n",
-       "pdb                0.000000     4.444444        18\n",
-       "pdf                0.588235     4.137255        51\n",
-       "png                0.216783     4.412587       143\n",
-       "pptx               0.882353     4.058824        17\n",
-       "py                 1.000000     4.266667        15\n",
-       "txt                0.705882     4.764706        17\n",
-       "xlsx               0.612745     4.823529       204\n",
-       "zip                0.448276     5.344828        29"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "display(\n",
     "    result_df.groupby([\"attachment_type\"])[[\"is_correct\", \"count_steps\", \"question\"]].agg(\n",
@@ -12005,7 +379,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -12015,52 +389,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "agent_name\n",
-       "code_gpt4o_03_february_goodoldtext-unbroken         38.36\n",
-       "code_gpt4o_03_february_magenticbrowser              35.22\n",
-       "code_gpt4o_03_february_magenticbrowser2             36.54\n",
-       "code_gpt4o_03_february_text                         37.58\n",
-       "code_o1_01_february_text                            49.09\n",
-       "code_o1_03_february_ablation-toolcalling-manager    32.73\n",
-       "code_o1_03_february_fix-print-outputs               51.83\n",
-       "code_o1_03_february_fix-print-outputs2              55.77\n",
-       "code_o1_03_february_goodoldtext-unbroken            53.42\n",
-       "code_o1_03_february_remove-navigational             53.66\n",
-       "code_o1_03_february_text_high-reasoning-effort      48.48\n",
-       "code_o1_04_february_submission                      49.38\n",
-       "code_o1_04_february_submission5                     55.15\n",
-       "code_o3-mini_03_february_remove-navigational        29.09\n",
-       "Name: is_correct, dtype: float64"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Majority score: 58.18\n",
-      "Oracle score: 72.73\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/var/folders/6m/9b1tts6d5w960j80wbw9tx3m0000gn/T/ipykernel_25011/3287428472.py:20: DeprecationWarning:\n",
-      "\n",
-      "DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "def majority_vote(df):\n",
     "    df = df[(df[\"prediction\"] != \"Unable to determine\") & (~df[\"prediction\"].isna()) & (df[\"prediction\"] != \"None\")]\n",
@@ -12100,7 +431,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -12112,7 +443,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -12129,9 +460,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "gaia",
+   "display_name": "test",
    "language": "python",
-   "name": "python3"
+   "name": "test"
   },
   "language_info": {
    "codemirror_mode": {
diff --git a/examples/open_deep_research/app.py b/examples/open_deep_research/app.py
new file mode 100644
index 000000000..a7f884faa
--- /dev/null
+++ b/examples/open_deep_research/app.py
@@ -0,0 +1,11 @@
+from run import create_agent
+
+from smolagents.gradio_ui import GradioUI
+
+
+agent = create_agent()
+
+demo = GradioUI(agent)
+
+if __name__ == "__main__":
+    demo.launch()
diff --git a/examples/open_deep_research/requirements.txt b/examples/open_deep_research/requirements.txt
index a18936ae4..4fe0e0e2a 100644
--- a/examples/open_deep_research/requirements.txt
+++ b/examples/open_deep_research/requirements.txt
@@ -1,4 +1,5 @@
 anthropic>=0.37.1
+audioop-lts<1.0; python_version >= "3.13" # required to use pydub in Python >=3.13; LTS port of the removed Python builtin module audioop
 beautifulsoup4>=4.12.3
 datasets>=2.21.0
 google_search_results>=2.4.2
diff --git a/examples/open_deep_research/run.py b/examples/open_deep_research/run.py
index 2dcddab4f..be1ad38a5 100644
--- a/examples/open_deep_research/run.py
+++ b/examples/open_deep_research/run.py
@@ -11,7 +11,6 @@
     FindNextTool,
     PageDownTool,
     PageUpTool,
-    SearchInformationTool,
     SimpleTextBrowser,
     VisitTool,
 )
@@ -19,38 +18,13 @@
 
 from smolagents import (
     CodeAgent,
-    # HfApiModel,
+    GoogleSearchTool,
+    # InferenceClientModel,
     LiteLLMModel,
     ToolCallingAgent,
 )
 
 
-AUTHORIZED_IMPORTS = [
-    "requests",
-    "zipfile",
-    "os",
-    "pandas",
-    "numpy",
-    "sympy",
-    "json",
-    "bs4",
-    "pubchempy",
-    "xml",
-    "yahoo_finance",
-    "Bio",
-    "sklearn",
-    "scipy",
-    "pydub",
-    "io",
-    "PIL",
-    "chess",
-    "PyPDF2",
-    "pptx",
-    "torch",
-    "datetime",
-    "fractions",
-    "csv",
-]
 load_dotenv(override=True)
 login(os.getenv("HF_TOKEN"))
 
@@ -83,22 +57,20 @@ def parse_args():
 os.makedirs(f"./{BROWSER_CONFIG['downloads_folder']}", exist_ok=True)
 
 
-def main():
-    args = parse_args()
-    text_limit = 100000
-
-    model = LiteLLMModel(
-        args.model_id,
-        custom_role_conversions=custom_role_conversions,
-        max_completion_tokens=8192,
-        reasoning_effort="high",
-    )
-    document_inspection_tool = TextInspectorTool(model, text_limit)
+def create_agent(model_id="o1"):
+    model_params = {
+        "model_id": model_id,
+        "custom_role_conversions": custom_role_conversions,
+        "max_completion_tokens": 8192,
+    }
+    if model_id == "o1":
+        model_params["reasoning_effort"] = "high"
+    model = LiteLLMModel(**model_params)
 
+    text_limit = 100000
     browser = SimpleTextBrowser(**BROWSER_CONFIG)
-
     WEB_TOOLS = [
-        SearchInformationTool(browser),
+        GoogleSearchTool(provider="serper"),
         VisitTool(browser),
         PageUpTool(browser),
         PageDownTool(browser),
@@ -107,7 +79,6 @@ def main():
         ArchiveSearchTool(browser),
         TextInspectorTool(model, text_limit),
     ]
-
     text_webbrowser_agent = ToolCallingAgent(
         model=model,
         tools=WEB_TOOLS,
@@ -129,15 +100,23 @@ def main():
 
     manager_agent = CodeAgent(
         model=model,
-        tools=[visualizer, document_inspection_tool],
+        tools=[visualizer, TextInspectorTool(model, text_limit)],
         max_steps=12,
         verbosity_level=2,
-        additional_authorized_imports=AUTHORIZED_IMPORTS,
+        additional_authorized_imports=["*"],
         planning_interval=4,
         managed_agents=[text_webbrowser_agent],
     )
 
-    answer = manager_agent.run(args.question)
+    return manager_agent
+
+
+def main():
+    args = parse_args()
+
+    agent = create_agent(model_id=args.model_id)
+
+    answer = agent.run(args.question)
 
     print(f"Got this answer: {answer}")
 
diff --git a/examples/open_deep_research/run_gaia.py b/examples/open_deep_research/run_gaia.py
index fa59fc03e..192081787 100644
--- a/examples/open_deep_research/run_gaia.py
+++ b/examples/open_deep_research/run_gaia.py
@@ -1,3 +1,4 @@
+# EXAMPLE COMMAND: python examples/open_deep_research/run_gaia.py --concurrency 32 --run-name generate-traces-03-apr-noplanning --model-id gpt-4o
 import argparse
 import json
 import os
@@ -5,7 +6,6 @@
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from datetime import datetime
 from pathlib import Path
-from typing import List
 
 import datasets
 import pandas as pd
@@ -23,7 +23,6 @@
     FindNextTool,
     PageDownTool,
     PageUpTool,
-    SearchInformationTool,
     SimpleTextBrowser,
     VisitTool,
 )
@@ -32,39 +31,13 @@
 
 from smolagents import (
     CodeAgent,
-    # HfApiModel,
+    GoogleSearchTool,
     LiteLLMModel,
     Model,
     ToolCallingAgent,
 )
 
 
-AUTHORIZED_IMPORTS = [
-    "requests",
-    "zipfile",
-    "os",
-    "pandas",
-    "numpy",
-    "sympy",
-    "json",
-    "bs4",
-    "pubchempy",
-    "xml",
-    "yahoo_finance",
-    "Bio",
-    "sklearn",
-    "scipy",
-    "pydub",
-    "io",
-    "PIL",
-    "chess",
-    "PyPDF2",
-    "pptx",
-    "torch",
-    "datetime",
-    "fractions",
-    "csv",
-]
 load_dotenv(override=True)
 login(os.getenv("HF_TOKEN"))
 
@@ -121,14 +94,14 @@ def preprocess_file_paths(row):
 os.makedirs(f"./{BROWSER_CONFIG['downloads_folder']}", exist_ok=True)
 
 
-def create_agent_hierarchy(model: Model):
+def create_agent_team(model: Model):
     text_limit = 100000
     ti_tool = TextInspectorTool(model, text_limit)
 
     browser = SimpleTextBrowser(**BROWSER_CONFIG)
 
     WEB_TOOLS = [
-        SearchInformationTool(browser),
+        GoogleSearchTool(provider="serper"),
         VisitTool(browser),
         PageUpTool(browser),
         PageDownTool(browser),
@@ -137,6 +110,7 @@ def create_agent_hierarchy(model: Model):
         ArchiveSearchTool(browser),
         TextInspectorTool(model, text_limit),
     ]
+
     text_webbrowser_agent = ToolCallingAgent(
         model=model,
         tools=WEB_TOOLS,
@@ -161,7 +135,7 @@ def create_agent_hierarchy(model: Model):
         tools=[visualizer, ti_tool],
         max_steps=12,
         verbosity_level=2,
-        additional_authorized_imports=AUTHORIZED_IMPORTS,
+        additional_authorized_imports=["*"],
         planning_interval=4,
         managed_agents=[text_webbrowser_agent],
     )
@@ -178,21 +152,20 @@ def append_answer(entry: dict, jsonl_file: str) -> None:
 
 
 def answer_single_question(example, model_id, answers_file, visual_inspection_tool):
-    model = LiteLLMModel(
-        model_id,
-        custom_role_conversions=custom_role_conversions,
-        max_completion_tokens=8192,
-        reasoning_effort="high",
-    )
-    # model = HfApiModel("Qwen/Qwen2.5-72B-Instruct", provider="together")
-    #     "https://lnxyuvj02bpe6mam.us-east-1.aws.endpoints.huggingface.cloud",
-    #     custom_role_conversions=custom_role_conversions,
-    #     # provider="sambanova",
-    #     max_tokens=8096,
-    # )
+    model_params = {
+        "model_id": model_id,
+        "custom_role_conversions": custom_role_conversions,
+    }
+    if model_id == "o1":
+        model_params["reasoning_effort"] = "high"
+        model_params["max_completion_tokens"] = 8192
+    else:
+        model_params["max_tokens"] = 4096
+    model = LiteLLMModel(**model_params)
+    # model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct", provider="together", max_tokens=4096)
     document_inspection_tool = TextInspectorTool(model, 100000)
 
-    agent = create_agent_hierarchy(model)
+    agent = create_agent_team(model)
 
     augmented_question = """You have one question to answer. It is paramount that you provide a correct answer.
 Give it all you can: I know for a fact that you have access to all the relevant tools to solve it and find the correct answer (the answer does exist). Failure or 'I cannot answer' or 'None found' will not be tolerated, success will be rewarded.
@@ -218,14 +191,14 @@ def answer_single_question(example, model_id, answers_file, visual_inspection_to
         # Run agent 🚀
         final_result = agent.run(augmented_question)
 
-        agent_memory = agent.write_memory_to_messages(summary_mode=True)
+        agent_memory = agent.write_memory_to_messages()
 
         final_result = prepare_response(augmented_question, agent_memory, reformulation_model=model)
 
         output = str(final_result)
         for memory_step in agent.memory.steps:
             memory_step.model_input_messages = None
-        intermediate_steps = [str(step) for step in agent.memory.steps]
+        intermediate_steps = agent_memory
 
         # Check for parsing errors which indicate the LLM failed to follow the required format
         parsing_error = True if any(["AgentParsingError" in step for step in intermediate_steps]) else False
@@ -243,6 +216,12 @@ def answer_single_question(example, model_id, answers_file, visual_inspection_to
         exception = e
         raised_exception = True
     end_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    token_counts_manager = agent.monitor.get_total_token_counts()
+    token_counts_web = list(agent.managed_agents.values())[0].monitor.get_total_token_counts()
+    total_token_counts = {
+        "input": token_counts_manager["input"] + token_counts_web["input"],
+        "output": token_counts_manager["output"] + token_counts_web["output"],
+    }
     annotated_example = {
         "agent_name": model.model_id,
         "question": example["question"],
@@ -252,16 +231,17 @@ def answer_single_question(example, model_id, answers_file, visual_inspection_to
         "parsing_error": parsing_error,
         "iteration_limit_exceeded": iteration_limit_exceeded,
         "agent_error": str(exception) if raised_exception else None,
-        "start_time": start_time,
-        "end_time": end_time,
         "task": example["task"],
         "task_id": example["task_id"],
         "true_answer": example["true_answer"],
+        "start_time": start_time,
+        "end_time": end_time,
+        "token_counts": total_token_counts,
     }
     append_answer(annotated_example, answers_file)
 
 
-def get_examples_to_answer(answers_file, eval_ds) -> List[dict]:
+def get_examples_to_answer(answers_file, eval_ds) -> list[dict]:
     print(f"Loading answers from {answers_file}...")
     try:
         done_questions = pd.read_json(answers_file, lines=True)["question"].tolist()
diff --git a/examples/open_deep_research/scripts/mdconvert.py b/examples/open_deep_research/scripts/mdconvert.py
index 68f13a28b..939cd121a 100644
--- a/examples/open_deep_research/scripts/mdconvert.py
+++ b/examples/open_deep_research/scripts/mdconvert.py
@@ -14,7 +14,7 @@
 import tempfile
 import traceback
 import zipfile
-from typing import Any, Dict, List, Optional, Union
+from typing import Any
 from urllib.parse import parse_qs, quote, unquote, urlparse, urlunparse
 
 import mammoth
@@ -112,22 +112,22 @@ def convert_soup(self, soup: Any) -> str:
 class DocumentConverterResult:
     """The result of converting a document to text."""
 
-    def __init__(self, title: Union[str, None] = None, text_content: str = ""):
-        self.title: Union[str, None] = title
+    def __init__(self, title: str | None = None, text_content: str = ""):
+        self.title: str | None = title
         self.text_content: str = text_content
 
 
 class DocumentConverter:
     """Abstract superclass of all DocumentConverters."""
 
-    def convert(self, local_path: str, **kwargs: Any) -> Union[None, DocumentConverterResult]:
+    def convert(self, local_path: str, **kwargs: Any) -> None | DocumentConverterResult:
         raise NotImplementedError()
 
 
 class PlainTextConverter(DocumentConverter):
     """Anything with content type text/plain"""
 
-    def convert(self, local_path: str, **kwargs: Any) -> Union[None, DocumentConverterResult]:
+    def convert(self, local_path: str, **kwargs: Any) -> None | DocumentConverterResult:
         # Guess the content type from any file extension that might be around
         content_type, _ = mimetypes.guess_type("__placeholder" + kwargs.get("file_extension", ""))
 
@@ -149,7 +149,7 @@ def convert(self, local_path: str, **kwargs: Any) -> Union[None, DocumentConvert
 class HtmlConverter(DocumentConverter):
     """Anything with content type text/html"""
 
-    def convert(self, local_path: str, **kwargs: Any) -> Union[None, DocumentConverterResult]:
+    def convert(self, local_path: str, **kwargs: Any) -> None | DocumentConverterResult:
         # Bail if not html
         extension = kwargs.get("file_extension", "")
         if extension.lower() not in [".html", ".htm"]:
@@ -161,7 +161,7 @@ def convert(self, local_path: str, **kwargs: Any) -> Union[None, DocumentConvert
 
         return result
 
-    def _convert(self, html_content: str) -> Union[None, DocumentConverterResult]:
+    def _convert(self, html_content: str) -> None | DocumentConverterResult:
         """Helper function that converts and HTML string."""
 
         # Parse the string
@@ -189,7 +189,7 @@ def _convert(self, html_content: str) -> Union[None, DocumentConverterResult]:
 class WikipediaConverter(DocumentConverter):
     """Handle Wikipedia pages separately, focusing only on the main document content."""
 
-    def convert(self, local_path: str, **kwargs: Any) -> Union[None, DocumentConverterResult]:
+    def convert(self, local_path: str, **kwargs: Any) -> None | DocumentConverterResult:
         # Bail if not Wikipedia
         extension = kwargs.get("file_extension", "")
         if extension.lower() not in [".html", ".htm"]:
@@ -234,7 +234,7 @@ def convert(self, local_path: str, **kwargs: Any) -> Union[None, DocumentConvert
 class YouTubeConverter(DocumentConverter):
     """Handle YouTube specially, focusing on the video title, description, and transcript."""
 
-    def convert(self, local_path: str, **kwargs: Any) -> Union[None, DocumentConverterResult]:
+    def convert(self, local_path: str, **kwargs: Any) -> None | DocumentConverterResult:
         # Bail if not YouTube
         extension = kwargs.get("file_extension", "")
         if extension.lower() not in [".html", ".htm"]:
@@ -250,7 +250,7 @@ def convert(self, local_path: str, **kwargs: Any) -> Union[None, DocumentConvert
 
         # Read the meta tags
         assert soup.title is not None and soup.title.string is not None
-        metadata: Dict[str, str] = {"title": soup.title.string}
+        metadata: dict[str, str] = {"title": soup.title.string}
         for meta in soup(["meta"]):
             for a in meta.attrs:
                 if a in ["itemprop", "property", "name"]:
@@ -328,13 +328,13 @@ def convert(self, local_path: str, **kwargs: Any) -> Union[None, DocumentConvert
             text_content=webpage_text,
         )
 
-    def _get(self, metadata: Dict[str, str], keys: List[str], default: Union[str, None] = None) -> Union[str, None]:
+    def _get(self, metadata: dict[str, str], keys: list[str], default: str | None = None) -> str | None:
         for k in keys:
             if k in metadata:
                 return metadata[k]
         return default
 
-    def _findKey(self, json: Any, key: str) -> Union[str, None]:  # TODO: Fix json type
+    def _findKey(self, json: Any, key: str) -> str | None:  # TODO: Fix json type
         if isinstance(json, list):
             for elm in json:
                 ret = self._findKey(elm, key)
@@ -356,7 +356,7 @@ class PdfConverter(DocumentConverter):
     Converts PDFs to Markdown. Most style information is ignored, so the results are essentially plain-text.
     """
 
-    def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
+    def convert(self, local_path, **kwargs) -> None | DocumentConverterResult:
         # Bail if not a PDF
         extension = kwargs.get("file_extension", "")
         if extension.lower() != ".pdf":
@@ -373,7 +373,7 @@ class DocxConverter(HtmlConverter):
     Converts DOCX files to Markdown. Style information (e.g.m headings) and tables are preserved where possible.
     """
 
-    def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
+    def convert(self, local_path, **kwargs) -> None | DocumentConverterResult:
         # Bail if not a DOCX
         extension = kwargs.get("file_extension", "")
         if extension.lower() != ".docx":
@@ -393,7 +393,7 @@ class XlsxConverter(HtmlConverter):
     Converts XLSX files to Markdown, with each sheet presented as a separate Markdown table.
     """
 
-    def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
+    def convert(self, local_path, **kwargs) -> None | DocumentConverterResult:
         # Bail if not a XLSX
         extension = kwargs.get("file_extension", "")
         if extension.lower() not in [".xlsx", ".xls"]:
@@ -417,7 +417,7 @@ class PptxConverter(HtmlConverter):
     Converts PPTX files to Markdown. Supports heading, tables and images with alt text.
     """
 
-    def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
+    def convert(self, local_path, **kwargs) -> None | DocumentConverterResult:
         # Bail if not a PPTX
         extension = kwargs.get("file_extension", "")
         if extension.lower() != ".pptx":
@@ -520,7 +520,7 @@ class WavConverter(MediaConverter):
     Converts WAV files to markdown via extraction of metadata (if `exiftool` is installed), and speech transcription (if `speech_recognition` is installed).
     """
 
-    def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
+    def convert(self, local_path, **kwargs) -> None | DocumentConverterResult:
         # Bail if not a XLSX
         extension = kwargs.get("file_extension", "")
         if extension.lower() != ".wav":
@@ -570,7 +570,7 @@ class Mp3Converter(WavConverter):
     Converts MP3 and M4A files to markdown via extraction of metadata (if `exiftool` is installed), and speech transcription (if `speech_recognition` AND `pydub` are installed).
     """
 
-    def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
+    def convert(self, local_path, **kwargs) -> None | DocumentConverterResult:
         # Bail if not a MP3
         extension = kwargs.get("file_extension", "")
         if extension.lower() not in [".mp3", ".m4a"]:
@@ -644,7 +644,7 @@ def __init__(self, extract_dir: str = "downloads"):
         # Create the extraction directory if it doesn't exist
         os.makedirs(self.extract_dir, exist_ok=True)
 
-    def convert(self, local_path: str, **kwargs: Any) -> Union[None, DocumentConverterResult]:
+    def convert(self, local_path: str, **kwargs: Any) -> None | DocumentConverterResult:
         # Bail if not a ZIP file
         extension = kwargs.get("file_extension", "")
         if extension.lower() != ".zip":
@@ -681,7 +681,7 @@ class ImageConverter(MediaConverter):
     Converts images to markdown via extraction of metadata (if `exiftool` is installed), OCR (if `easyocr` is installed), and description via a multimodal LLM (if an mlm_client is configured).
     """
 
-    def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
+    def convert(self, local_path, **kwargs) -> None | DocumentConverterResult:
         # Bail if not a XLSX
         extension = kwargs.get("file_extension", "")
         if extension.lower() not in [".jpg", ".jpeg", ".png"]:
@@ -771,9 +771,9 @@ class MarkdownConverter:
 
     def __init__(
         self,
-        requests_session: Optional[requests.Session] = None,
-        mlm_client: Optional[Any] = None,
-        mlm_model: Optional[Any] = None,
+        requests_session: requests.Session | None = None,
+        mlm_client: Any | None = None,
+        mlm_model: Any | None = None,
     ):
         if requests_session is None:
             self._requests_session = requests.Session()
@@ -783,7 +783,7 @@ def __init__(
         self._mlm_client = mlm_client
         self._mlm_model = mlm_model
 
-        self._page_converters: List[DocumentConverter] = []
+        self._page_converters: list[DocumentConverter] = []
 
         # Register converters for successful browsing operations
         # Later registrations are tried first / take higher priority than earlier registrations
@@ -802,7 +802,7 @@ def __init__(
         self.register_page_converter(PdfConverter())
 
     def convert(
-        self, source: Union[str, requests.Response], **kwargs: Any
+        self, source: str | requests.Response, **kwargs: Any
     ) -> DocumentConverterResult:  # TODO: deal with kwargs
         """
         Args:
@@ -924,7 +924,7 @@ def convert_response(
 
         return result
 
-    def _convert(self, local_path: str, extensions: List[Union[str, None]], **kwargs) -> DocumentConverterResult:
+    def _convert(self, local_path: str, extensions: list[str | None], **kwargs) -> DocumentConverterResult:
         error_trace = ""
         for ext in extensions + [None]:  # Try last with no extension
             for converter in self._page_converters:
diff --git a/examples/open_deep_research/scripts/text_inspector_tool.py b/examples/open_deep_research/scripts/text_inspector_tool.py
index 056168cee..2b1e18ae5 100644
--- a/examples/open_deep_research/scripts/text_inspector_tool.py
+++ b/examples/open_deep_research/scripts/text_inspector_tool.py
@@ -1,9 +1,5 @@
-from typing import Optional
-
 from smolagents import Tool
-from smolagents.models import MessageRole, Model
-
-from .mdconvert import MarkdownConverter
+from smolagents.models import Model
 
 
 class TextInspectorTool(Tool):
@@ -24,14 +20,18 @@ class TextInspectorTool(Tool):
         },
     }
     output_type = "string"
-    md_converter = MarkdownConverter()
 
-    def __init__(self, model: Model, text_limit: int):
+    def __init__(self, model: Model = None, text_limit: int = 100000):
         super().__init__()
         self.model = model
         self.text_limit = text_limit
+        from .mdconvert import MarkdownConverter
+
+        self.md_converter = MarkdownConverter()
 
     def forward_initial_exam_mode(self, file_path, question):
+        from smolagents.models import MessageRole
+
         result = self.md_converter.convert(file_path)
 
         if file_path[-4:] in [".png", ".jpg"]:
@@ -73,7 +73,9 @@ def forward_initial_exam_mode(self, file_path, question):
         ]
         return self.model(messages).content
 
-    def forward(self, file_path, question: Optional[str] = None) -> str:
+    def forward(self, file_path, question: str | None = None) -> str:
+        from smolagents.models import MessageRole
+
         result = self.md_converter.convert(file_path)
 
         if file_path[-4:] in [".png", ".jpg"]:
diff --git a/examples/open_deep_research/scripts/text_web_browser.py b/examples/open_deep_research/scripts/text_web_browser.py
index ef40f8551..044128edb 100644
--- a/examples/open_deep_research/scripts/text_web_browser.py
+++ b/examples/open_deep_research/scripts/text_web_browser.py
@@ -6,7 +6,7 @@
 import re
 import time
 import uuid
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any
 from urllib.parse import unquote, urljoin, urlparse
 
 import pathvalidate
@@ -24,19 +24,19 @@ class SimpleTextBrowser:
 
     def __init__(
         self,
-        start_page: Optional[str] = None,
-        viewport_size: Optional[int] = 1024 * 8,
-        downloads_folder: Optional[Union[str, None]] = None,
-        serpapi_key: Optional[Union[str, None]] = None,
-        request_kwargs: Optional[Union[Dict[str, Any], None]] = None,
+        start_page: str | None = None,
+        viewport_size: int | None = 1024 * 8,
+        downloads_folder: str | None | None = None,
+        serpapi_key: str | None | None = None,
+        request_kwargs: dict[str, Any] | None | None = None,
     ):
         self.start_page: str = start_page if start_page else "about:blank"
         self.viewport_size = viewport_size  # Applies only to the standard uri types
         self.downloads_folder = downloads_folder
-        self.history: List[Tuple[str, float]] = list()
-        self.page_title: Optional[str] = None
+        self.history: list[tuple[str, float]] = list()
+        self.page_title: str | None = None
         self.viewport_current_page = 0
-        self.viewport_pages: List[Tuple[int, int]] = list()
+        self.viewport_pages: list[tuple[int, int]] = list()
         self.set_address(self.start_page)
         self.serpapi_key = serpapi_key
         self.request_kwargs = request_kwargs
@@ -44,15 +44,15 @@ def __init__(
         self._mdconvert = MarkdownConverter()
         self._page_content: str = ""
 
-        self._find_on_page_query: Union[str, None] = None
-        self._find_on_page_last_result: Union[int, None] = None  # Location of the last result
+        self._find_on_page_query: str | None = None
+        self._find_on_page_last_result: int | None = None  # Location of the last result
 
     @property
     def address(self) -> str:
         """Return the address of the current page."""
         return self.history[-1][0]
 
-    def set_address(self, uri_or_path: str, filter_year: Optional[int] = None) -> None:
+    def set_address(self, uri_or_path: str, filter_year: int | None = None) -> None:
         # TODO: Handle anchors
         self.history.append((uri_or_path, time.time()))
 
@@ -102,7 +102,7 @@ def page_down(self) -> None:
     def page_up(self) -> None:
         self.viewport_current_page = max(self.viewport_current_page - 1, 0)
 
-    def find_on_page(self, query: str) -> Union[str, None]:
+    def find_on_page(self, query: str) -> str | None:
         """Searches for the query from the current viewport forward, looping back to the start if necessary."""
 
         # Did we get here via a previous find_on_page search with the same query?
@@ -121,7 +121,7 @@ def find_on_page(self, query: str) -> Union[str, None]:
             self._find_on_page_last_result = viewport_match
             return self.viewport
 
-    def find_next(self) -> Union[str, None]:
+    def find_next(self) -> str | None:
         """Scroll to the next viewport that matches the query"""
 
         if self._find_on_page_query is None:
@@ -144,7 +144,7 @@ def find_next(self) -> Union[str, None]:
             self._find_on_page_last_result = viewport_match
             return self.viewport
 
-    def _find_next_viewport(self, query: str, starting_viewport: int) -> Union[int, None]:
+    def _find_next_viewport(self, query: str, starting_viewport: int) -> int | None:
         """Search for matches between the starting viewport looping when reaching the end."""
 
         if query is None:
@@ -174,7 +174,7 @@ def _find_next_viewport(self, query: str, starting_viewport: int) -> Union[int,
 
         return None
 
-    def visit_page(self, path_or_uri: str, filter_year: Optional[int] = None) -> str:
+    def visit_page(self, path_or_uri: str, filter_year: int | None = None) -> str:
         """Update the address, visit the page, and return the content of the viewport."""
         self.set_address(path_or_uri, filter_year=filter_year)
         return self.viewport
@@ -201,7 +201,7 @@ def _split_pages(self) -> None:
             self.viewport_pages.append((start_idx, end_idx))
             start_idx = end_idx
 
-    def _serpapi_search(self, query: str, filter_year: Optional[int] = None) -> None:
+    def _serpapi_search(self, query: str, filter_year: int | None = None) -> None:
         if self.serpapi_key is None:
             raise ValueError("Missing SerpAPI key.")
 
@@ -231,7 +231,7 @@ def _prev_visit(url):
                     return f"You previously visited this page {round(time.time() - self.history[i][1])} seconds ago.\n"
             return ""
 
-        web_snippets: List[str] = list()
+        web_snippets: list[str] = list()
         idx = 0
         if "organic_results" in results:
             for page in results["organic_results"]:
@@ -352,7 +352,7 @@ def _fetch_page(self, url: str) -> None:
                 self.page_title = "Error"
                 self._set_page_content(f"## Error\n\n{str(request_exception)}")
 
-    def _state(self) -> Tuple[str, str]:
+    def _state(self) -> tuple[str, str]:
         header = f"Address: {self.address}\n"
         if self.page_title is not None:
             header += f"Title: {self.page_title}\n"
@@ -385,7 +385,7 @@ def __init__(self, browser):
         super().__init__()
         self.browser = browser
 
-    def forward(self, query: str, filter_year: Optional[int] = None) -> str:
+    def forward(self, query: str, filter_year: int | None = None) -> str:
         self.browser.visit_page(f"google: {query}", filter_year=filter_year)
         header, content = self.browser._state()
         return header.strip() + "\n=======================\n" + content
@@ -397,7 +397,7 @@ class VisitTool(Tool):
     inputs = {"url": {"type": "string", "description": "The relative or absolute url of the webpage to visit."}}
     output_type = "string"
 
-    def __init__(self, browser):
+    def __init__(self, browser=None):
         super().__init__()
         self.browser = browser
 
@@ -421,6 +421,8 @@ def __init__(self, browser):
         self.browser = browser
 
     def forward(self, url: str) -> str:
+        import requests
+
         if "arxiv" in url:
             url = url.replace("abs", "pdf")
         response = requests.get(url)
@@ -452,11 +454,13 @@ class ArchiveSearchTool(Tool):
     }
     output_type = "string"
 
-    def __init__(self, browser):
+    def __init__(self, browser=None):
         super().__init__()
         self.browser = browser
 
     def forward(self, url, date) -> str:
+        import requests
+
         no_timestamp_url = f"https://archive.org/wayback/available?url={url}"
         archive_url = no_timestamp_url + f"&timestamp={date}"
         response = requests.get(archive_url).json()
@@ -487,7 +491,7 @@ class PageUpTool(Tool):
     inputs = {}
     output_type = "string"
 
-    def __init__(self, browser):
+    def __init__(self, browser=None):
         super().__init__()
         self.browser = browser
 
@@ -505,7 +509,7 @@ class PageDownTool(Tool):
     inputs = {}
     output_type = "string"
 
-    def __init__(self, browser):
+    def __init__(self, browser=None):
         super().__init__()
         self.browser = browser
 
@@ -526,7 +530,7 @@ class FinderTool(Tool):
     }
     output_type = "string"
 
-    def __init__(self, browser):
+    def __init__(self, browser=None):
         super().__init__()
         self.browser = browser
 
@@ -549,7 +553,7 @@ class FindNextTool(Tool):
     inputs = {}
     output_type = "string"
 
-    def __init__(self, browser):
+    def __init__(self, browser=None):
         super().__init__()
         self.browser = browser
 
diff --git a/examples/open_deep_research/scripts/visual_qa.py b/examples/open_deep_research/scripts/visual_qa.py
index 84d240b66..01d60b30a 100644
--- a/examples/open_deep_research/scripts/visual_qa.py
+++ b/examples/open_deep_research/scripts/visual_qa.py
@@ -4,23 +4,21 @@
 import os
 import uuid
 from io import BytesIO
-from typing import Optional
 
+import PIL.Image
 import requests
 from dotenv import load_dotenv
 from huggingface_hub import InferenceClient
-from PIL import Image
-from transformers import AutoProcessor
 
 from smolagents import Tool, tool
 
 
 load_dotenv(override=True)
 
-idefics_processor = AutoProcessor.from_pretrained("HuggingFaceM4/idefics2-8b-chatty")
-
 
 def process_images_and_text(image_path, query, client):
+    from transformers import AutoProcessor
+
     messages = [
         {
             "role": "user",
@@ -30,7 +28,7 @@ def process_images_and_text(image_path, query, client):
             ],
         },
     ]
-
+    idefics_processor = AutoProcessor.from_pretrained("HuggingFaceM4/idefics2-8b-chatty")
     prompt_with_template = idefics_processor.apply_chat_template(messages, add_generation_prompt=True)
 
     # load images from local directory
@@ -38,7 +36,7 @@ def process_images_and_text(image_path, query, client):
     # encode images to strings which can be sent to the endpoint
     def encode_local_image(image_path):
         # load image
-        image = Image.open(image_path).convert("RGB")
+        image = PIL.Image.open(image_path).convert("RGB")
 
         # Convert the image to a base64 string
         buffer = BytesIO()
@@ -95,11 +93,8 @@ def encode_image(image_path):
         return base64.b64encode(image_file.read()).decode("utf-8")
 
 
-headers = {"Content-Type": "application/json", "Authorization": f"Bearer {os.getenv('OPENAI_API_KEY')}"}
-
-
 def resize_image(image_path):
-    img = Image.open(image_path)
+    img = PIL.Image.open(image_path)
     width, height = img.size
     img = img.resize((int(width / 2), int(height / 2)))
     new_image_path = f"resized_{image_path}"
@@ -121,7 +116,7 @@ class VisualQATool(Tool):
 
     client = InferenceClient("HuggingFaceM4/idefics2-8b-chatty")
 
-    def forward(self, image_path: str, question: Optional[str] = None) -> str:
+    def forward(self, image_path: str, question: str | None = None) -> str:
         output = ""
         add_note = False
         if not question:
@@ -144,13 +139,19 @@ def forward(self, image_path: str, question: Optional[str] = None) -> str:
 
 
 @tool
-def visualizer(image_path: str, question: Optional[str] = None) -> str:
+def visualizer(image_path: str, question: str | None = None) -> str:
     """A tool that can answer questions about attached images.
 
     Args:
         image_path: The path to the image on which to answer the question. This should be a local path to downloaded image.
         question: The question to answer.
     """
+    import mimetypes
+    import os
+
+    import requests
+
+    from .visual_qa import encode_image
 
     add_note = False
     if not question:
@@ -175,6 +176,7 @@ def visualizer(image_path: str, question: Optional[str] = None) -> str:
         ],
         "max_tokens": 1000,
     }
+    headers = {"Content-Type": "application/json", "Authorization": f"Bearer {os.getenv('OPENAI_API_KEY')}"}
     response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
     try:
         output = response.json()["choices"][0]["message"]["content"]
diff --git a/examples/open_deep_research/visual_vs_text_browser.ipynb b/examples/open_deep_research/visual_vs_text_browser.ipynb
index 9bb4ee8dc..4a85a465a 100644
--- a/examples/open_deep_research/visual_vs_text_browser.ipynb
+++ b/examples/open_deep_research/visual_vs_text_browser.ipynb
@@ -102,7 +102,7 @@
     "from smolagents import CodeAgent, LiteLLMModel\n",
     "\n",
     "\n",
-    "proprietary_model = LiteLLMModel(\"gpt-4o\")"
+    "proprietary_model = LiteLLMModel(model_id=\"gpt-4o\")"
    ]
   },
   {
@@ -178,7 +178,7 @@
     ")\n",
     "\n",
     "\n",
-    "proprietary_model = LiteLLMModel(\"gpt-4o\")\n",
+    "proprietary_model = LiteLLMModel(model_id=\"gpt-4o\")\n",
     "vision_browser_agent = initialize_agent(proprietary_model)\n",
     "### BUILD AGENTS & TOOLS\n",
     "\n",
diff --git a/examples/rag.py b/examples/rag.py
index f5a2e2cb1..3ff572fb3 100644
--- a/examples/rag.py
+++ b/examples/rag.py
@@ -28,11 +28,11 @@
 
 class RetrieverTool(Tool):
     name = "retriever"
-    description = "Uses semantic search to retrieve the parts of transformers documentation that could be most relevant to answer your query."
+    description = "Uses lexical search to retrieve the parts of transformers documentation that could be most relevant to answer your query."
     inputs = {
         "query": {
             "type": "string",
-            "description": "The query to perform. This should be semantically close to your target documents. Use the affirmative form rather than a question.",
+            "description": "The query to perform. This should be lexically close to your target documents. Use the affirmative form rather than a question.",
         }
     }
     output_type = "string"
@@ -52,13 +52,13 @@ def forward(self, query: str) -> str:
         )
 
 
-from smolagents import CodeAgent, HfApiModel
+from smolagents import CodeAgent, InferenceClientModel
 
 
 retriever_tool = RetrieverTool(docs_processed)
 agent = CodeAgent(
     tools=[retriever_tool],
-    model=HfApiModel("meta-llama/Llama-3.3-70B-Instruct"),
+    model=InferenceClientModel(model_id="meta-llama/Llama-3.3-70B-Instruct"),
     max_steps=4,
     verbosity_level=2,
 )
diff --git a/examples/rag_using_chromadb.py b/examples/rag_using_chromadb.py
index 864bfc848..fa2764355 100644
--- a/examples/rag_using_chromadb.py
+++ b/examples/rag_using_chromadb.py
@@ -97,8 +97,8 @@ def forward(self, query: str) -> str:
 
 # Choose which LLM engine to use!
 
-# from smolagents import HfApiModel
-# model = HfApiModel(model_id="meta-llama/Llama-3.3-70B-Instruct")
+# from smolagents import InferenceClientModel
+# model = InferenceClientModel(model_id="meta-llama/Llama-3.3-70B-Instruct")
 
 # from smolagents import TransformersModel
 # model = TransformersModel(model_id="meta-llama/Llama-3.2-2B-Instruct")
diff --git a/examples/sandboxed_execution.py b/examples/sandboxed_execution.py
new file mode 100644
index 000000000..25e4fb771
--- /dev/null
+++ b/examples/sandboxed_execution.py
@@ -0,0 +1,12 @@
+from smolagents import CodeAgent, DuckDuckGoSearchTool, InferenceClientModel
+
+
+model = InferenceClientModel()
+
+agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model, executor_type="docker")
+output = agent.run("How many seconds would it take for a leopard at full speed to run through Pont des Arts?")
+print("Docker executor result:", output)
+
+agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model, executor_type="e2b")
+output = agent.run("How many seconds would it take for a leopard at full speed to run through Pont des Arts?")
+print("E2B executor result:", output)
diff --git a/examples/smolagents_benchmark/run.py b/examples/smolagents_benchmark/run.py
new file mode 100644
index 000000000..f2b60eb58
--- /dev/null
+++ b/examples/smolagents_benchmark/run.py
@@ -0,0 +1,254 @@
+import argparse
+import datetime
+import json
+import os
+import threading
+import time
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from pathlib import Path
+
+import datasets
+import pandas as pd
+from dotenv import load_dotenv
+from tqdm import tqdm
+
+from smolagents import (
+    AgentError,
+    CodeAgent,
+    GoogleSearchTool,
+    InferenceClientModel,
+    LiteLLMModel,
+    PythonInterpreterTool,
+    ToolCallingAgent,
+    VisitWebpageTool,
+)
+from smolagents.agents import ActionStep
+
+
+load_dotenv()
+os.makedirs("output", exist_ok=True)
+
+APPEND_ANSWER_LOCK = threading.Lock()
+
+
+def parse_arguments():
+    parser = argparse.ArgumentParser(description="Runs an agent powered by the given model on smolagent benchmark.")
+    parser.add_argument(
+        "--date",
+        type=str,
+        default=None,
+        help="The date for the evaluation.",
+    )
+    parser.add_argument(
+        "--eval-dataset",
+        type=str,
+        default="smolagents/benchmark-v1",
+    )
+    # The eval dataset is gated, so you must first visit its page to request access: https://huggingface.co/datasets/smolagents-benchmark/benchmark-v1
+    parser.add_argument(
+        "--model-type",
+        type=str,
+        default="InferenceClientModel",
+        choices=["LiteLLMModel", "InferenceClientModel"],
+        help="The model type to use (LiteLLMModel or InferenceClientModel)",
+    )
+    parser.add_argument(
+        "--model-id",
+        type=str,
+        required=True,
+        help="The model ID to use for the specified model type",
+    )
+    parser.add_argument(
+        "--provider",
+        type=str,
+        default="hf-inference",
+        help="The provider for InferenceClientModel - will not be used for LiteLLMModel",
+    )
+    parser.add_argument(
+        "--agent-action-type",
+        type=str,
+        default="code",
+        choices=["code", "tool-calling", "vanilla"],
+        help="The agent action type: 'code', 'tool-calling', or 'vanilla' to use the vanilla llm",
+    )
+    parser.add_argument(
+        "--parallel-workers",
+        type=int,
+        default=8,
+        help="The number of processes to run in parallel",
+    )
+    parser.add_argument(
+        "--push-answers-to-hub",
+        action="store_true",
+        default=False,
+        help="Push the answers to the hub",
+    )
+    parser.add_argument(
+        "--answers-dataset",
+        type=str,
+        default="smolagents/answers",
+    )
+    return parser.parse_args()
+
+
+def load_eval_dataset(eval_dataset):
+    # Choose the tasks to evaluate on:
+    # tasks = ["gaia"]
+    # or evaluate on all tasks: ["gaia", "math", "simpleqa"]
+    tasks = datasets.get_dataset_config_names(eval_dataset)
+    print(tasks)
+
+    eval_ds = {task: datasets.load_dataset(eval_dataset, task, split="test") for task in tasks}
+    print(pd.DataFrame(eval_ds["simpleqa"]).head())
+    return eval_ds
+
+
+def serialize_agent_error(obj):
+    if isinstance(obj, AgentError):
+        return {"error_type": obj.__class__.__name__, "message": obj.message}
+    else:
+        return str(obj)
+
+
+def append_answer(entry: dict, jsonl_file: str) -> None:
+    jsonl_file = Path(jsonl_file)
+    jsonl_file.parent.mkdir(parents=True, exist_ok=True)
+    with APPEND_ANSWER_LOCK, open(jsonl_file, "a", encoding="utf-8") as fp:
+        fp.write(json.dumps(entry) + "\n")
+    assert os.path.exists(jsonl_file), "File not found!"
+
+
+def answer_single_question(example, model, answers_file, action_type):
+    if action_type == "vanilla":
+        agent = model
+    elif action_type == "code":
+        agent = CodeAgent(
+            tools=[GoogleSearchTool(provider="serper"), VisitWebpageTool()],
+            model=model,
+            additional_authorized_imports=["numpy", "sympy"],
+            max_steps=10,
+        )
+    elif action_type == "tool-calling":
+        agent = ToolCallingAgent(
+            tools=[GoogleSearchTool(provider="serper"), VisitWebpageTool(), PythonInterpreterTool()],
+            model=model,
+            additional_authorized_imports=["numpy", "sympy"],
+            max_steps=10,
+        )
+
+    augmented_question = example["question"]
+    if example["source"] == "SimpleQA":
+        augmented_question += " Answer with only the final number."
+    if example["source"] == "MATH":
+        augmented_question += " Write code, not latex."
+
+    start_time = time.time()
+
+    try:
+        if action_type == "vanilla":
+            answer = agent([{"role": "user", "content": augmented_question}]).content
+            token_counts = agent.monitor.get_total_token_counts()
+            intermediate_steps = answer
+        else:
+            # Run agent 🚀
+            answer = str(agent.run(augmented_question))
+            token_counts = agent.monitor.get_total_token_counts()
+            # Remove memory from logs to make them more compact.
+            for step in agent.memory.steps:
+                if isinstance(step, ActionStep):
+                    step.agent_memory = None
+            intermediate_steps = str(agent.memory.steps)
+
+        end_time = time.time()
+    except Exception as e:
+        print("Error on ", augmented_question, e)
+        intermediate_steps = []
+        token_counts = {"input": 0, "output": 0}
+        answer = str(e)
+    end_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    annotated_example = {
+        "model_id": model.model_id,
+        "agent_action_type": action_type,
+        "question": augmented_question,
+        "original_question": example["question"],
+        "answer": answer,
+        "true_answer": example["true_answer"],
+        "source": example["source"],
+        "intermediate_steps": intermediate_steps,
+        "start_time": start_time,
+        "end_time": end_time,
+        "token_counts": token_counts,
+    }
+    append_answer(annotated_example, answers_file)
+
+
+def answer_questions(
+    eval_ds,
+    model,
+    date,
+    action_type: str = "code",
+    output_dir: str = "output",
+    answers_dataset: str = None,
+    push_answers_to_hub: bool = False,
+    parallel_workers: int = 32,
+):
+    date = date or datetime.date.today().isoformat()
+    model_id = model.model_id
+
+    for task in eval_ds:
+        file_name = f"{output_dir}/{model_id.replace('/', '__')}__{action_type}__{task}__{date}.jsonl"
+        print(f"Starting processing and writing output to '{file_name}'")
+        answered_questions = []
+        if os.path.exists(file_name):
+            with open(file_name, "r") as f:
+                for line in f:
+                    answered_questions.append(json.loads(line)["original_question"])
+
+        examples_todo = [example for example in eval_ds[task] if example["question"] not in answered_questions]
+        print(f"Launching {parallel_workers} parallel workers.")
+
+        with ThreadPoolExecutor(max_workers=parallel_workers) as exe:
+            futures = [
+                exe.submit(answer_single_question, example, model, file_name, action_type) for example in examples_todo
+            ]
+            for f in tqdm(as_completed(futures), total=len(examples_todo), desc="Processing tasks"):
+                f.result()
+
+        print("All tasks processed.")
+
+        if push_answers_to_hub and answers_dataset:
+            print("Pushing answers to hub...")
+            ds = datasets.Dataset.from_pandas(pd.read_json(file_name, lines=True), split="test", preserve_index=False)
+            config = f"{model_id.replace('/', '__')}__{action_type}__{task}"
+            data_dir = f"{model_id}/{action_type}/{task}/{date}"
+            ds.push_to_hub(
+                answers_dataset,
+                config_name=config,
+                data_dir=data_dir,
+                split="test",
+                commit_message=f"Upload {config}",
+            )
+
+
+if __name__ == "__main__":
+    args = parse_arguments()
+
+    eval_ds = load_eval_dataset(args.eval_dataset)
+
+    if args.model_type == "LiteLLMModel":
+        model = LiteLLMModel(
+            model_id=args.model_id,
+            max_completion_tokens=8192,
+        )
+    else:
+        model = InferenceClientModel(model_id=args.model_id, provider=args.provider, max_tokens=8192)
+
+    answer_questions(
+        eval_ds,
+        model,
+        args.date,
+        action_type=args.agent_action_type,
+        answers_dataset=args.answers_dataset,
+        push_answers_to_hub=args.push_answers_to_hub,
+        parallel_workers=args.parallel_workers,
+    )
diff --git a/examples/smolagents_benchmark/score.ipynb b/examples/smolagents_benchmark/score.ipynb
new file mode 100644
index 000000000..b624d802c
--- /dev/null
+++ b/examples/smolagents_benchmark/score.ipynb
@@ -0,0 +1,392 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install -e .. datasets sympy numpy matplotlib seaborn -q  # Install dev version of smolagents + some packages"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Benchmark date\n",
+    "# - set a concrete date:\n",
+    "DATE = \"2024-12-26\"\n",
+    "# - or use default: today\n",
+    "# DATE = None\n",
+    "\n",
+    "# Evaluation dataset\n",
+    "# - the dataset is gated, so you must first visit its page to request access: https://huggingface.co/datasets/smolagents-benchmark/benchmark-v1\n",
+    "EVAL_DATASET = \"smolagents-benchmark/benchmark-v1\"\n",
+    "\n",
+    "# Answers dataset: it must be a gated dataset; required to score the answers\n",
+    "ANSWERS_DATASET = \"smolagents-benchmark/answers\"\n",
+    "# Whether to push the answers dataset to the Hub\n",
+    "PUSH_ANSWERS_DATASET_TO_HUB = True\n",
+    "\n",
+    "# Results dataset\n",
+    "RESULTS_DATASET = \"smolagents-benchmark/results\"\n",
+    "# Whether to push the results dataset to the Hub\n",
+    "PUSH_RESULTS_DATASET_TO_HUB = True"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Constants and utilities/tools"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import re\n",
+    "import string\n",
+    "import warnings\n",
+    "from concurrent.futures import ThreadPoolExecutor, as_completed\n",
+    "from datetime import datetime\n",
+    "\n",
+    "import numpy as np\n",
+    "from tqdm import tqdm\n",
+    "\n",
+    "\n",
+    "def normalize_number_str(number_str: str) -> float:\n",
+    "    # we replace these common units and commas to allow\n",
+    "    # conversion to float\n",
+    "    for char in [\"$\", \"%\", \",\"]:\n",
+    "        number_str = number_str.replace(char, \"\")\n",
+    "    try:\n",
+    "        return float(number_str)\n",
+    "    except ValueError:\n",
+    "        return float(\"inf\")\n",
+    "\n",
+    "\n",
+    "def split_string(\n",
+    "    s: str,\n",
+    "    char_list: list[str] = [\",\", \";\"],\n",
+    ") -> list[str]:\n",
+    "    pattern = f\"[{''.join(char_list)}]\"\n",
+    "    return re.split(pattern, s)\n",
+    "\n",
+    "\n",
+    "def is_float(element: any) -> bool:\n",
+    "    try:\n",
+    "        float(element)\n",
+    "        return True\n",
+    "    except ValueError:\n",
+    "        return False\n",
+    "\n",
+    "\n",
+    "def normalize_str(input_str, remove_punct=True) -> str:\n",
+    "    \"\"\"\n",
+    "    Normalize a string by:\n",
+    "    - Removing all white spaces\n",
+    "    - Optionally removing punctuation (if remove_punct is True)\n",
+    "    - Converting to lowercase\n",
+    "    Parameters:\n",
+    "    - input_str: str, the string to normalize\n",
+    "    - remove_punct: bool, whether to remove punctuation (default: True)\n",
+    "    Returns:\n",
+    "    - str, the normalized string\n",
+    "    \"\"\"\n",
+    "    # Remove all white spaces. Required e.g for seagull vs. sea gull\n",
+    "    no_spaces = re.sub(r\"\\s\", \"\", input_str)\n",
+    "\n",
+    "    # Remove punctuation, if specified.\n",
+    "    if remove_punct:\n",
+    "        translator = str.maketrans(\"\", \"\", string.punctuation)\n",
+    "        return no_spaces.lower().translate(translator)\n",
+    "    else:\n",
+    "        return no_spaces.lower()\n",
+    "\n",
+    "\n",
+    "def extract_numbers(text: str) -> list[str]:\n",
+    "    \"\"\"This pattern matches:\n",
+    "    - Optional negative sign\n",
+    "    - Numbers with optional comma thousand separators\n",
+    "    - Optional decimal points with decimal numbers\n",
+    "    \"\"\"\n",
+    "    pattern = r\"-?(?:\\d{1,3}(?:,\\d{3})+|\\d+)(?:\\.\\d+)?\"\n",
+    "\n",
+    "    return [el.replace(\",\", \"\") for el in re.findall(pattern, text)]\n",
+    "\n",
+    "\n",
+    "def get_question_score_gaia(\n",
+    "    model_answer: str,\n",
+    "    ground_truth: str,\n",
+    ") -> bool:\n",
+    "    \"\"\"Scoring function used to score functions from the GAIA benchmark\"\"\"\n",
+    "    if is_float(ground_truth):\n",
+    "        normalized_answer = normalize_number_str(str(model_answer))\n",
+    "        return normalized_answer == float(ground_truth)\n",
+    "\n",
+    "    elif any(char in ground_truth for char in [\",\", \";\"]):  # if gt is a list\n",
+    "        # question with the fish: normalization removes punct\n",
+    "        gt_elems = split_string(ground_truth)\n",
+    "        ma_elems = split_string(model_answer)\n",
+    "\n",
+    "        if len(gt_elems) != len(ma_elems):  # check length is the same\n",
+    "            warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n",
+    "            return False\n",
+    "\n",
+    "        comparisons = []\n",
+    "        for ma_elem, gt_elem in zip(ma_elems, gt_elems):  # compare each element as float or str\n",
+    "            if is_float(gt_elem):\n",
+    "                normalized_ma_elem = normalize_number_str(ma_elem)\n",
+    "                comparisons.append(normalized_ma_elem == float(gt_elem))\n",
+    "            else:\n",
+    "                # we do not remove punct since comparisons can include punct\n",
+    "                comparisons.append(\n",
+    "                    normalize_str(ma_elem, remove_punct=False) == normalize_str(gt_elem, remove_punct=False)\n",
+    "                )\n",
+    "        return all(comparisons)\n",
+    "\n",
+    "    else:  # if gt is a str\n",
+    "        return normalize_str(model_answer) == normalize_str(ground_truth)\n",
+    "\n",
+    "\n",
+    "def get_correct(row):\n",
+    "    if row[\"source\"] == \"MATH\":  # Checks the last number in answer\n",
+    "        numbers_answer = extract_numbers(str(row[\"answer\"]))\n",
+    "        if len(numbers_answer) == 0:\n",
+    "            return False\n",
+    "        return np.isclose(float(numbers_answer[-1]), float(row[\"true_answer\"]), rtol=1e-5, atol=1e-7)\n",
+    "    else:\n",
+    "        return get_question_score_gaia(str(row[\"answer\"]), str(row[\"true_answer\"]))\n",
+    "\n",
+    "\n",
+    "def score_answers_subset(answers_dataset, answers_subset):\n",
+    "    try:\n",
+    "        print(answers_dataset, answers_subset)\n",
+    "        *model_id, action_type, task = answers_subset.split(\"__\")\n",
+    "        model_id = \"/\".join(model_id)\n",
+    "        ds = datasets.load_dataset(answers_dataset, answers_subset, split=\"test\")\n",
+    "        df = ds.to_pandas()\n",
+    "        df[\"correct\"] = df.apply(get_correct, axis=1)\n",
+    "        assert df[\"correct\"].notnull().sum() > 30, \"Missing answers\"\n",
+    "        acc = df[\"correct\"].mean().item()\n",
+    "        result = df.loc[0, [\"model_id\", \"agent_action_type\", \"source\"]].to_dict()\n",
+    "        result[\"acc\"] = acc\n",
+    "        return result\n",
+    "    except Exception as e:\n",
+    "        print(f\"Error with {answers_subset}: {e}\")\n",
+    "        return None\n",
+    "\n",
+    "\n",
+    "def score_answers(\n",
+    "    answers_subsets,\n",
+    "    answers_dataset=ANSWERS_DATASET,\n",
+    "    date=DATE,\n",
+    "    push_to_hub_dataset=RESULTS_DATASET if PUSH_RESULTS_DATASET_TO_HUB else None,\n",
+    "    set_default=True,\n",
+    "):\n",
+    "    if not answers_dataset:\n",
+    "        raise ValueError(\"Pass 'answers_dataset' to load the answers from it\")\n",
+    "    date = date or datetime.date.today().isoformat()\n",
+    "    results = []\n",
+    "    with ThreadPoolExecutor(max_workers=16) as exe:\n",
+    "        futures = [\n",
+    "            exe.submit(score_answers_subset, answers_dataset, answers_subset) for answers_subset in answers_subsets\n",
+    "        ]\n",
+    "        for f in tqdm(as_completed(futures), total=len(answers_subsets), desc=\"Processing tasks\"):\n",
+    "            result = f.result()\n",
+    "            if result:\n",
+    "                results.append(result)\n",
+    "    df = pd.DataFrame(results)\n",
+    "\n",
+    "    if push_to_hub_dataset:\n",
+    "        ds = datasets.Dataset.from_pandas(df)\n",
+    "        config = date\n",
+    "        set_default = set_default\n",
+    "        ds.push_to_hub(\n",
+    "            push_to_hub_dataset, config_name=config, set_default=set_default, commit_message=f\"Upload {config} results\"\n",
+    "        )\n",
+    "    return df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Score answers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import datasets\n",
+    "import pandas as pd\n",
+    "\n",
+    "\n",
+    "# Choose the answers subsets to score:\n",
+    "# answers_subsets = [\"meta-llama__Llama-3.1-8B-Instruct__code__gaia\"]\n",
+    "# or get all the answers subsets present in the ANSWERS_DATASET\n",
+    "answers_subsets = datasets.get_dataset_config_names(ANSWERS_DATASET)\n",
+    "print(\"Number of answers_subsets\", len(answers_subsets))\n",
+    "print(\"Example of answers_subset\", answers_subsets[0])\n",
+    "\n",
+    "result_df = score_answers(answers_subsets)\n",
+    "result_df[\"acc\"] = (result_df[\"acc\"] * 100).round(2)\n",
+    "result_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pivot_df = result_df.pivot_table(\n",
+    "    index=[\"model_id\", \"source\"],\n",
+    "    columns=[\"agent_action_type\"],\n",
+    "    values=\"acc\",\n",
+    "    fill_value=float(\"nan\"),\n",
+    ").reset_index()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Display results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "display(pivot_df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "from matplotlib.legend_handler import HandlerTuple  # Added import\n",
+    "\n",
+    "\n",
+    "# Assuming pivot_df is your original dataframe\n",
+    "models = pivot_df[\"model_id\"].unique()\n",
+    "sources = pivot_df[\"source\"].unique()\n",
+    "\n",
+    "# Create figure and axis\n",
+    "plt.style.use(\"seaborn-v0_8-white\")\n",
+    "fig, ax = plt.subplots(figsize=(15, 6))\n",
+    "\n",
+    "# Set the width of each bar group and positions of the bars\n",
+    "width = 0.15  # width of each bar\n",
+    "spacing = 0.02  # space between bars within a group\n",
+    "group_spacing = 0.2  # space between model groups\n",
+    "\n",
+    "# Calculate positions for the bars\n",
+    "num_sources = len(sources)\n",
+    "total_width_per_group = (width + spacing) * num_sources * 2  # *2 for agent and vanilla\n",
+    "x = np.arange(len(models)) * (total_width_per_group + group_spacing)\n",
+    "\n",
+    "# Plot bars for each source\n",
+    "for i, source in enumerate(sources):\n",
+    "    source_data = pivot_df[pivot_df[\"source\"] == source]\n",
+    "    agent_scores = [\n",
+    "        source_data[source_data[\"model_id\"] == model][\"code\"].values[0]\n",
+    "        if len(source_data[source_data[\"model_id\"] == model]) > 0\n",
+    "        else np.nan\n",
+    "        for model in models\n",
+    "    ]\n",
+    "    vanilla_scores = [\n",
+    "        source_data[source_data[\"model_id\"] == model][\"vanilla\"].values[0]\n",
+    "        if len(source_data[source_data[\"model_id\"] == model]) > 0\n",
+    "        else np.nan\n",
+    "        for model in models\n",
+    "    ]\n",
+    "\n",
+    "    # Position calculation for each pair of bars\n",
+    "    pos = x + i * (width * 2 + spacing)\n",
+    "\n",
+    "    agent_bars = ax.bar(pos, agent_scores, width, label=f\"{source} (Agent)\", alpha=0.8)\n",
+    "    vanilla_bars = ax.bar(\n",
+    "        pos + width * 0.6,\n",
+    "        vanilla_scores,\n",
+    "        width,\n",
+    "        hatch=\"////\",\n",
+    "        alpha=0.5,\n",
+    "        hatch_linewidth=2,\n",
+    "        label=f\"{source} (Vanilla)\",\n",
+    "        color=\"white\",\n",
+    "        edgecolor=agent_bars[0].get_facecolor(),\n",
+    "    )\n",
+    "\n",
+    "# Customize the plot\n",
+    "ax.set_ylabel(\"Score\")\n",
+    "ax.set_title(\"Model Performance Comparison\")\n",
+    "\n",
+    "# Set x-axis ticks in the middle of each group\n",
+    "group_centers = x + (total_width_per_group - spacing) / 2\n",
+    "ax.set_xticks(group_centers)\n",
+    "\n",
+    "# Wrap long model names to prevent overlap\n",
+    "wrapped_labels = [\"\\n\".join(model.split(\"/\")) for model in models]\n",
+    "ax.set_xticklabels(wrapped_labels, rotation=0, ha=\"center\")\n",
+    "\n",
+    "# Modify legend to combine agent and vanilla entries\n",
+    "handles, labels = ax.get_legend_handles_labels()\n",
+    "unique_sources = sources\n",
+    "legend_elements = [\n",
+    "    (handles[i * 2], handles[i * 2 + 1], labels[i * 2].replace(\" (Agent)\", \"\")) for i in range(len(unique_sources))\n",
+    "]\n",
+    "custom_legend = ax.legend(\n",
+    "    [(agent_handle, vanilla_handle) for agent_handle, vanilla_handle, _ in legend_elements],\n",
+    "    [label for _, _, label in legend_elements],\n",
+    "    handler_map={tuple: HandlerTuple(ndivide=None)},\n",
+    "    bbox_to_anchor=(1.05, 1),\n",
+    "    loc=\"upper left\",\n",
+    ")\n",
+    "\n",
+    "ax.yaxis.grid(True, linestyle=\"--\", alpha=0.3)\n",
+    "ax.set_ylim(bottom=0)\n",
+    "plt.tight_layout()\n",
+    "ax.spines[\"top\"].set_visible(False)\n",
+    "ax.spines[\"right\"].set_visible(False)\n",
+    "\n",
+    "plt.show()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "test",
+   "language": "python",
+   "name": "test"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/text_to_sql.py b/examples/text_to_sql.py
index c25f0caa0..1b5bd3d6d 100644
--- a/examples/text_to_sql.py
+++ b/examples/text_to_sql.py
@@ -69,11 +69,11 @@ def sql_engine(query: str) -> str:
     return output
 
 
-from smolagents import CodeAgent, HfApiModel
+from smolagents import CodeAgent, InferenceClientModel
 
 
 agent = CodeAgent(
     tools=[sql_engine],
-    model=HfApiModel("meta-llama/Meta-Llama-3.1-8B-Instruct"),
+    model=InferenceClientModel(model_id="meta-llama/Meta-Llama-3.1-8B-Instruct"),
 )
 agent.run("Can you give me the name of the client who got the most expensive receipt?")
diff --git a/pyproject.toml b/pyproject.toml
index ab323f8a1..0db6ab2b8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,10 +4,10 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "smolagents"
-version = "1.10.0.dev0"
+version = "1.15.0.dev0"
 description = "🤗 smolagents: a barebones library for agents. Agents write python code to call tools or orchestrate other agents."
 authors = [
-  { name="Aymeric Roucher", email="aymeric@hf.co" }, { name="Thomas Wolf"},
+  { name="Aymeric Roucher", email="aymeric@hf.co" },
 ]
 readme = "README.md"
 requires-python = ">=3.10"
@@ -15,7 +15,6 @@ dependencies = [
   "huggingface-hub>=0.28.0",
   "requests>=2.32.3",
   "rich>=13.9.4",
-  "pandas>=2.2.3",
   "jinja2>=3.1.4",
   "pillow>=11.0.0",
   "markdownify>=0.14.1",
@@ -24,14 +23,22 @@ dependencies = [
 ]
 
 [project.optional-dependencies]
+bedrock = [
+  "boto3>=1.36.18"
+]
 torch = [
   "torch",
   "torchvision",
+  "numpy>=1.21.2",
 ]
 audio = [
   "soundfile",
   "smolagents[torch]",
 ]
+docker = [
+  "docker>=7.1.0",
+  "websocket-client",
+]
 e2b = [
   "e2b-code-interpreter>=1.0.3",
   "python-dotenv>=1.0.1",
@@ -43,7 +50,7 @@ litellm = [
   "litellm>=1.60.2",
 ]
 mcp = [
-  "mcpadapt>=0.0.6",
+  "mcpadapt>=0.0.19",  # Security fix
   "mcp",
 ]
 mlx-lm = [
@@ -60,21 +67,32 @@ telemetry = [
 ]
 transformers = [
   "accelerate",
-  "transformers>=4.0.0,<4.49.0",
+  "transformers>=4.0.0",
   "smolagents[torch]",
 ]
+vision = [
+  "helium",
+  "selenium",
+]
+vllm = [
+  "vllm",
+  "torch"
+]
 all = [
-  "smolagents[audio,e2b,gradio,litellm,mcp,openai,telemetry,transformers]",
+  "smolagents[audio,docker,e2b,gradio,litellm,mcp,mlx-lm,openai,telemetry,transformers,vision,bedrock]",
 ]
 quality = [
   "ruff>=0.9.0",
 ]
 test = [
   "ipython>=8.31.0", # for interactive environment tests
+  "pandas>=2.2.3",
   "pytest>=8.1.0",
+  "pytest-datadir",
   "python-dotenv>=1.0.1", # For test_all_docs
   "smolagents[all]",
   "rank-bm25", # For test_all_docs
+  "Wikipedia-API>=0.8.1",
 ]
 dev = [
   "smolagents[quality,test]",
@@ -107,4 +125,4 @@ lines-after-imports = 2
 
 [project.scripts]
 smolagent = "smolagents.cli:main"
-webagent = "smolagents.vision_web_browser:main"
\ No newline at end of file
+webagent = "smolagents.vision_web_browser:main"
diff --git a/src/smolagents/__init__.py b/src/smolagents/__init__.py
index a1321eb1b..be4c3c19e 100644
--- a/src/smolagents/__init__.py
+++ b/src/smolagents/__init__.py
@@ -14,17 +14,18 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "1.10.0.dev0"
+__version__ = "1.15.0.dev0"
 
 from .agent_types import *  # noqa: I001
 from .agents import *  # Above noqa avoids a circular dependency due to cli.py
 from .default_tools import *
-from .e2b_executor import *
 from .gradio_ui import *
 from .local_python_executor import *
+from .mcp_client import *
 from .memory import *
 from .models import *
 from .monitoring import *
+from .remote_executors import *
 from .tools import *
 from .utils import *
 from .cli import *
diff --git a/src/smolagents/_function_type_hints_utils.py b/src/smolagents/_function_type_hints_utils.py
index dddd90d0c..e5a367c08 100644
--- a/src/smolagents/_function_type_hints_utils.py
+++ b/src/smolagents/_function_type_hints_utils.py
@@ -26,26 +26,19 @@
 import json
 import re
 import types
+from collections.abc import Callable
 from copy import copy
 from typing import (
     Any,
-    Callable,
-    Dict,
-    List,
-    Optional,
-    Tuple,
+    Literal,
     Union,
     get_args,
     get_origin,
     get_type_hints,
 )
 
-from huggingface_hub.utils import is_torch_available
 
-from .utils import _is_pillow_available
-
-
-def get_imports(code: str) -> List[str]:
+def get_imports(code: str) -> list[str]:
     """
     Extracts all the libraries (not relative imports) that are imported in a code.
 
@@ -83,7 +76,7 @@ class DocstringParsingException(Exception):
     """Exception raised for errors in parsing docstrings to generate JSON schemas"""
 
 
-def get_json_schema(func: Callable) -> Dict:
+def get_json_schema(func: Callable) -> dict:
     """
     This function generates a JSON schema for a given function, based on its docstring and type hints. This is
     mostly used for passing lists of tools to a chat template. The JSON schema contains the name and description of
@@ -221,26 +214,30 @@ def get_json_schema(func: Callable) -> Dict:
 
 
 # Extracts the initial segment of the docstring, containing the function description
-description_re = re.compile(r"^(.*?)[\n\s]*(Args:|Returns:|Raises:|\Z)", re.DOTALL)
+description_re = re.compile(r"^(.*?)(?=\n\s*(Args:|Returns:|Raises:)|\Z)", re.DOTALL)
 # Extracts the Args: block from the docstring
 args_re = re.compile(r"\n\s*Args:\n\s*(.*?)[\n\s]*(Returns:|Raises:|\Z)", re.DOTALL)
 # Splits the Args: block into individual arguments
 args_split_re = re.compile(
-    r"""
-(?:^|\n)  # Match the start of the args block, or a newline
-\s*(\w+)\s*(?:\([^)]*\))?:\s*  # Capture the argument name (ignore the type) and strip spacing
-(.*?)\s*  # Capture the argument description, which can span multiple lines, and strip trailing spacing
-(?=\n\s*\w+:|\Z)  # Stop when you hit the next argument or the end of the block
-""",
+    r"(?:^|\n)"  # Match the start of the args block, or a newline
+    r"\s*(\w+)\s*(?:\([^)]*?\))?:\s*"  # Capture the argument name (ignore the type) and strip spacing
+    r"(.*?)\s*"  # Capture the argument description, which can span multiple lines, and strip trailing spacing
+    r"(?=\n\s*\w+\s*(?:\([^)]*?\))?:|\Z)",  # Stop when you hit the next argument (with or without type) or the end of the block
     re.DOTALL | re.VERBOSE,
 )
 # Extracts the Returns: block from the docstring, if present. Note that most chat templates ignore the return type/doc!
-returns_re = re.compile(r"\n\s*Returns:\n\s*(.*?)[\n\s]*(Raises:|\Z)", re.DOTALL)
+returns_re = re.compile(
+    r"\n\s*Returns:\n\s*"
+    r"(?:[^)]*?:\s*)?"  # Ignore the return type if present
+    r"(.*?)"  # Capture the return description
+    r"[\n\s]*(Raises:|\Z)",
+    re.DOTALL,
+)
 
 
 def _parse_google_format_docstring(
     docstring: str,
-) -> Tuple[Optional[str], Optional[Dict], Optional[str]]:
+) -> tuple[str | None, dict | None, str | None]:
     """
     Parses a Google-style docstring to extract the function description,
     argument descriptions, and return description.
@@ -273,7 +270,7 @@ def _parse_google_format_docstring(
     return description, args_dict, returns
 
 
-def _convert_type_hints_to_json_schema(func: Callable, error_on_missing_type_hints: bool = True) -> Dict:
+def _convert_type_hints_to_json_schema(func: Callable, error_on_missing_type_hints: bool = True) -> dict:
     type_hints = get_type_hints(func)
     signature = inspect.signature(func)
 
@@ -300,7 +297,7 @@ def _convert_type_hints_to_json_schema(func: Callable, error_on_missing_type_hin
     return schema
 
 
-def _parse_type_hint(hint: str) -> Dict:
+def _parse_type_hint(hint: str) -> dict:
     origin = get_origin(hint)
     args = get_args(hint)
 
@@ -314,20 +311,7 @@ def _parse_type_hint(hint: str) -> Dict:
             )
 
     elif origin is Union or (hasattr(types, "UnionType") and origin is types.UnionType):
-        # Recurse into each of the subtypes in the Union, except None, which is handled separately at the end
-        subtypes = [_parse_type_hint(t) for t in args if t is not type(None)]
-        if len(subtypes) == 1:
-            # A single non-null type can be expressed directly
-            return_dict = subtypes[0]
-        elif all(isinstance(subtype["type"], str) for subtype in subtypes):
-            # A union of basic types can be expressed as a list in the schema
-            return_dict = {"type": sorted([subtype["type"] for subtype in subtypes])}
-        else:
-            # A union of more complex types requires "anyOf"
-            return_dict = {"anyOf": subtypes}
-        if type(None) in args:
-            return_dict["nullable"] = True
-        return return_dict
+        return _parse_union_type(args)
 
     elif origin is list:
         if not args:
@@ -363,9 +347,33 @@ def _parse_type_hint(hint: str) -> Dict:
             out["additionalProperties"] = _parse_type_hint(args[1])
         return out
 
+    elif origin is Literal:
+        literal_types = set(type(arg) for arg in args)
+        final_type = _parse_union_type(literal_types)
+
+        # None literal value is represented by 'nullable' field set by _parse_union_type
+        final_type.update({"enum": [arg for arg in args if arg is not None]})
+        return final_type
+
     raise TypeHintParsingException("Couldn't parse this type hint, likely due to a custom class or object: ", hint)
 
 
+def _parse_union_type(args: tuple[Any, ...]) -> dict:
+    subtypes = [_parse_type_hint(t) for t in args if t is not type(None)]
+    if len(subtypes) == 1:
+        # A single non-null type can be expressed directly
+        return_dict = subtypes[0]
+    elif all(isinstance(subtype["type"], str) for subtype in subtypes):
+        # A union of basic types can be expressed as a list in the schema
+        return_dict = {"type": sorted([subtype["type"] for subtype in subtypes])}
+    else:
+        # A union of more complex types requires "anyOf"
+        return_dict = {"anyOf": subtypes}
+    if type(None) in args:
+        return_dict["nullable"] = True
+    return return_dict
+
+
 _BASE_TYPE_MAPPING = {
     int: {"type": "integer"},
     float: {"type": "number"},
@@ -376,17 +384,20 @@ def _parse_type_hint(hint: str) -> Dict:
 }
 
 
-def _get_json_schema_type(param_type: str) -> Dict[str, str]:
+def _get_json_schema_type(param_type: str) -> dict[str, str]:
     if param_type in _BASE_TYPE_MAPPING:
         return copy(_BASE_TYPE_MAPPING[param_type])
-    if str(param_type) == "Image" and _is_pillow_available():
+    if str(param_type) == "Image":
         from PIL.Image import Image
 
         if param_type == Image:
             return {"type": "image"}
-    if str(param_type) == "Tensor" and is_torch_available():
-        from torch import Tensor
+    if str(param_type) == "Tensor":
+        try:
+            from torch import Tensor
 
-        if param_type == Tensor:
-            return {"type": "audio"}
+            if param_type == Tensor:
+                return {"type": "audio"}
+        except ModuleNotFoundError:
+            pass
     return {"type": "object"}
diff --git a/src/smolagents/agent_types.py b/src/smolagents/agent_types.py
index b0d4ee1d1..73772292e 100644
--- a/src/smolagents/agent_types.py
+++ b/src/smolagents/agent_types.py
@@ -19,11 +19,8 @@
 import uuid
 from io import BytesIO
 
-import numpy as np
+import PIL.Image
 import requests
-from huggingface_hub.utils import is_torch_available
-from PIL import Image
-from PIL.Image import Image as ImageType
 
 from .utils import _is_package_available
 
@@ -37,7 +34,7 @@ class AgentType:
 
     These objects serve three purposes:
 
-    - They behave as they were the type they're meant to be, e.g., a string for text, a PIL.Image for images
+    - They behave as they were the type they're meant to be, e.g., a string for text, a PIL.Image.Image for images
     - They can be stringified: str(object) in order to return a string defining the object
     - They should be displayed correctly in ipython notebooks/colab/jupyter
     """
@@ -73,14 +70,14 @@ def to_string(self):
         return str(self._value)
 
 
-class AgentImage(AgentType, ImageType):
+class AgentImage(AgentType, PIL.Image.Image):
     """
-    Image type returned by the agent. Behaves as a PIL.Image.
+    Image type returned by the agent. Behaves as a PIL.Image.Image.
     """
 
     def __init__(self, value):
         AgentType.__init__(self, value)
-        ImageType.__init__(self)
+        PIL.Image.Image.__init__(self)
 
         self._path = None
         self._raw = None
@@ -88,19 +85,24 @@ def __init__(self, value):
 
         if isinstance(value, AgentImage):
             self._raw, self._path, self._tensor = value._raw, value._path, value._tensor
-        elif isinstance(value, ImageType):
+        elif isinstance(value, PIL.Image.Image):
             self._raw = value
         elif isinstance(value, bytes):
-            self._raw = Image.open(BytesIO(value))
+            self._raw = PIL.Image.open(BytesIO(value))
         elif isinstance(value, (str, pathlib.Path)):
             self._path = value
-        elif is_torch_available():
-            import torch
+        else:
+            try:
+                import torch
+
+                if isinstance(value, torch.Tensor):
+                    self._tensor = value
+                import numpy as np
 
-            if isinstance(value, torch.Tensor):
-                self._tensor = value
-            if isinstance(value, np.ndarray):
-                self._tensor = torch.from_numpy(value)
+                if isinstance(value, np.ndarray):
+                    self._tensor = torch.from_numpy(value)
+            except ModuleNotFoundError:
+                pass
 
         if self._path is None and self._raw is None and self._tensor is None:
             raise TypeError(f"Unsupported type for {self.__class__.__name__}: {type(value)}")
@@ -115,18 +117,20 @@ def _ipython_display_(self, include=None, exclude=None):
 
     def to_raw(self):
         """
-        Returns the "raw" version of that object. In the case of an AgentImage, it is a PIL.Image.
+        Returns the "raw" version of that object. In the case of an AgentImage, it is a PIL.Image.Image.
         """
         if self._raw is not None:
             return self._raw
 
         if self._path is not None:
-            self._raw = Image.open(self._path)
+            self._raw = PIL.Image.open(self._path)
             return self._raw
 
         if self._tensor is not None:
+            import numpy as np
+
             array = self._tensor.cpu().detach().numpy()
-            return Image.fromarray((255 - array * 255).astype(np.uint8))
+            return PIL.Image.fromarray((255 - array * 255).astype(np.uint8))
 
     def to_string(self):
         """
@@ -143,10 +147,12 @@ def to_string(self):
             return self._path
 
         if self._tensor is not None:
+            import numpy as np
+
             array = self._tensor.cpu().detach().numpy()
 
             # There is likely simpler than load into image into save
-            img = Image.fromarray((255 - array * 255).astype(np.uint8))
+            img = PIL.Image.fromarray((255 - array * 255).astype(np.uint8))
 
             directory = tempfile.mkdtemp()
             self._path = os.path.join(directory, str(uuid.uuid4()) + ".png")
@@ -172,10 +178,11 @@ class AgentAudio(AgentType, str):
     """
 
     def __init__(self, value, samplerate=16_000):
-        if not _is_package_available("soundfile") or not is_torch_available():
+        if not _is_package_available("soundfile") or not _is_package_available("torch"):
             raise ModuleNotFoundError(
                 "Please install 'audio' extra to use AgentAudio: `pip install 'smolagents[audio]'`"
             )
+        import numpy as np
         import torch
 
         super().__init__(value)
@@ -186,7 +193,7 @@ def __init__(self, value, samplerate=16_000):
         self.samplerate = samplerate
         if isinstance(value, (str, pathlib.Path)):
             self._path = value
-        elif is_torch_available() and isinstance(value, torch.Tensor):
+        elif isinstance(value, torch.Tensor):
             self._tensor = value
         elif isinstance(value, tuple):
             self.samplerate = value[0]
@@ -261,13 +268,15 @@ def handle_agent_output_types(output, output_type=None):
     # If the class does not have defined output, then we map according to the type
     if isinstance(output, str):
         return AgentText(output)
-    if isinstance(output, ImageType):
+    if isinstance(output, PIL.Image.Image):
         return AgentImage(output)
-    if is_torch_available():
+    try:
         import torch
 
         if isinstance(output, torch.Tensor):
             return AgentAudio(output)
+    except ModuleNotFoundError:
+        pass
     return output
 
 
diff --git a/src/smolagents/agents.py b/src/smolagents/agents.py
index a4d1b08f8..8ff8eb230 100644
--- a/src/smolagents/agents.py
+++ b/src/smolagents/agents.py
@@ -22,40 +22,49 @@
 import tempfile
 import textwrap
 import time
+from abc import ABC, abstractmethod
 from collections import deque
+from collections.abc import Callable, Generator
 from logging import getLogger
 from pathlib import Path
-from typing import Any, Callable, Dict, Generator, List, Optional, Set, Tuple, TypedDict, Union
+from typing import TYPE_CHECKING, Any, TypedDict
 
 import jinja2
 import yaml
 from huggingface_hub import create_repo, metadata_update, snapshot_download, upload_folder
 from jinja2 import StrictUndefined, Template
 from rich.console import Group
+from rich.live import Live
+from rich.markdown import Markdown
 from rich.panel import Panel
 from rich.rule import Rule
 from rich.text import Text
 
-from .agent_types import AgentAudio, AgentImage, AgentType, handle_agent_output_types
+
+if TYPE_CHECKING:
+    import PIL.Image
+
+from .agent_types import AgentAudio, AgentImage, handle_agent_output_types
 from .default_tools import TOOL_MAPPING, FinalAnswerTool
-from .e2b_executor import E2BExecutor
-from .local_python_executor import (
-    BASE_BUILTIN_MODULES,
-    LocalPythonInterpreter,
-    fix_final_answer_code,
-)
-from .memory import ActionStep, AgentMemory, PlanningStep, SystemPromptStep, TaskStep, ToolCall
-from .models import (
-    ChatMessage,
-    MessageRole,
-    Model,
+from .local_python_executor import BASE_BUILTIN_MODULES, LocalPythonExecutor, PythonExecutor, fix_final_answer_code
+from .memory import (
+    ActionStep,
+    AgentMemory,
+    FinalAnswerStep,
+    Message,
+    PlanningStep,
+    SystemPromptStep,
+    TaskStep,
+    ToolCall,
 )
+from .models import ChatMessage, MessageRole, Model, parse_json_if_needed
 from .monitoring import (
     YELLOW_HEX,
     AgentLogger,
     LogLevel,
     Monitor,
 )
+from .remote_executors import DockerExecutor, E2BExecutor
 from .tools import Tool
 from .utils import (
     AgentError,
@@ -63,9 +72,11 @@
     AgentGenerationError,
     AgentMaxStepsError,
     AgentParsingError,
+    AgentToolCallError,
+    AgentToolExecutionError,
+    is_valid_name,
     make_init_file,
     parse_code_blobs,
-    parse_json_tool_call,
     truncate_content,
 )
 
@@ -73,12 +84,12 @@
 logger = getLogger(__name__)
 
 
-def get_variable_names(self, template: str) -> Set[str]:
+def get_variable_names(self, template: str) -> set[str]:
     pattern = re.compile(r"\{\{([^{}]+)\}\}")
     return {match.group(1).strip() for match in pattern.finditer(template)}
 
 
-def populate_template(template: str, variables: Dict[str, Any]) -> str:
+def populate_template(template: str, variables: dict[str, Any]) -> str:
     compiled_template = Template(template, undefined=StrictUndefined)
     try:
         return compiled_template.render(**variables)
@@ -91,18 +102,12 @@ class PlanningPromptTemplate(TypedDict):
     Prompt templates for the planning step.
 
     Args:
-        initial_facts (`str`): Initial facts prompt.
-        initial_plan (`str`): Initial plan prompt.
-        update_facts_pre_messages (`str`): Update facts pre-messages prompt.
-        update_facts_post_messages (`str`): Update facts post-messages prompt.
+        plan (`str`): Initial plan prompt.
         update_plan_pre_messages (`str`): Update plan pre-messages prompt.
         update_plan_post_messages (`str`): Update plan post-messages prompt.
     """
 
-    initial_facts: str
     initial_plan: str
-    update_facts_pre_messages: str
-    update_facts_post_messages: str
     update_plan_pre_messages: str
     update_plan_post_messages: str
 
@@ -153,10 +158,7 @@ class PromptTemplates(TypedDict):
 EMPTY_PROMPT_TEMPLATES = PromptTemplates(
     system_prompt="",
     planning=PlanningPromptTemplate(
-        initial_facts="",
         initial_plan="",
-        update_facts_pre_messages="",
-        update_facts_post_messages="",
         update_plan_pre_messages="",
         update_plan_post_messages="",
     ),
@@ -165,7 +167,7 @@ class PromptTemplates(TypedDict):
 )
 
 
-class MultiStepAgent:
+class MultiStepAgent(ABC):
     """
     Agent class that solves the given task step by step, using the ReAct framework:
     While the objective is not reached, the agent will perform a cycle of action (given by the LLM) and observation (obtained from the environment).
@@ -174,7 +176,7 @@ class MultiStepAgent:
         tools (`list[Tool]`): [`Tool`]s that the agent can use.
         model (`Callable[[list[dict[str, str]]], ChatMessage]`): Model that will generate the agent's actions.
         prompt_templates ([`~agents.PromptTemplates`], *optional*): Prompt templates.
-        max_steps (`int`, default `6`): Maximum number of steps the agent can take to solve the task.
+        max_steps (`int`, default `20`): Maximum number of steps the agent can take to solve the task.
         tool_parser (`Callable`, *optional*): Function used to parse the tool calls from the LLM output.
         add_base_tools (`bool`, default `False`): Whether to add the base tools to the agent's tools.
         verbosity_level (`LogLevel`, default `LogLevel.INFO`): Level of verbosity of the agent's logs.
@@ -190,32 +192,43 @@ class MultiStepAgent:
 
     def __init__(
         self,
-        tools: List[Tool],
-        model: Callable[[List[Dict[str, str]]], ChatMessage],
-        prompt_templates: Optional[PromptTemplates] = None,
-        max_steps: int = 6,
-        tool_parser: Optional[Callable] = None,
+        tools: list[Tool],
+        model: Model,
+        prompt_templates: PromptTemplates | None = None,
+        max_steps: int = 20,
         add_base_tools: bool = False,
         verbosity_level: LogLevel = LogLevel.INFO,
-        grammar: Optional[Dict[str, str]] = None,
-        managed_agents: Optional[List] = None,
-        step_callbacks: Optional[List[Callable]] = None,
-        planning_interval: Optional[int] = None,
-        name: Optional[str] = None,
-        description: Optional[str] = None,
+        grammar: dict[str, str] | None = None,
+        managed_agents: list | None = None,
+        step_callbacks: list[Callable] | None = None,
+        planning_interval: int | None = None,
+        name: str | None = None,
+        description: str | None = None,
         provide_run_summary: bool = False,
-        final_answer_checks: Optional[List[Callable]] = None,
+        final_answer_checks: list[Callable] | None = None,
+        logger: AgentLogger | None = None,
     ):
         self.agent_name = self.__class__.__name__
         self.model = model
         self.prompt_templates = prompt_templates or EMPTY_PROMPT_TEMPLATES
+        if prompt_templates is not None:
+            missing_keys = set(EMPTY_PROMPT_TEMPLATES.keys()) - set(prompt_templates.keys())
+            assert not missing_keys, (
+                f"Some prompt templates are missing from your custom `prompt_templates`: {missing_keys}"
+            )
+            for key, value in EMPTY_PROMPT_TEMPLATES.items():
+                if isinstance(value, dict):
+                    for subkey in value.keys():
+                        assert key in prompt_templates.keys() and (subkey in prompt_templates[key].keys()), (
+                            f"Some prompt templates are missing from your custom `prompt_templates`: {subkey} under {key}"
+                        )
+
         self.max_steps = max_steps
         self.step_number = 0
-        self.tool_parser = tool_parser or parse_json_tool_call
         self.grammar = grammar
         self.planning_interval = planning_interval
-        self.state = {}
-        self.name = name
+        self.state: dict[str, Any] = {}
+        self.name = self._validate_name(name)
         self.description = description
         self.provide_run_summary = provide_run_summary
         self.final_answer_checks = final_answer_checks
@@ -225,15 +238,25 @@ def __init__(
         self._validate_tools_and_managed_agents(tools, managed_agents)
 
         self.system_prompt = self.initialize_system_prompt()
-        self.input_messages = None
-        self.task = None
+        self.task: str | None = None
         self.memory = AgentMemory(self.system_prompt)
-        self.logger = AgentLogger(level=verbosity_level)
+
+        if logger is None:
+            self.logger = AgentLogger(level=verbosity_level)
+        else:
+            self.logger = logger
+
         self.monitor = Monitor(self.model, self.logger)
         self.step_callbacks = step_callbacks if step_callbacks is not None else []
         self.step_callbacks.append(self.monitor.update_metrics)
 
-    def _setup_managed_agents(self, managed_agents):
+    def _validate_name(self, name: str | None) -> str | None:
+        if name is not None and not is_valid_name(name):
+            raise ValueError(f"Agent name '{name}' must be a valid Python identifier and not a reserved keyword.")
+        return name
+
+    def _setup_managed_agents(self, managed_agents: list | None = None) -> None:
+        """Setup managed agents with proper logging."""
         self.managed_agents = {}
         if managed_agents:
             assert all(agent.name and agent.description for agent in managed_agents), (
@@ -252,16 +275,14 @@ def _setup_tools(self, tools, add_base_tools):
                     if name != "python_interpreter" or self.__class__.__name__ == "ToolCallingAgent"
                 }
             )
-        self.tools["final_answer"] = FinalAnswerTool()
+        self.tools.setdefault("final_answer", FinalAnswerTool())
 
     def _validate_tools_and_managed_agents(self, tools, managed_agents):
         tool_and_managed_agent_names = [tool.name for tool in tools]
         if managed_agents is not None:
-            for agent in managed_agents:
-                tool_and_managed_agent_names.append(agent.name)
-                for tool in agent.tools.values():
-                    if tool.name != "final_answer":
-                        tool_and_managed_agent_names.append(tool.name)
+            tool_and_managed_agent_names += [agent.name for agent in managed_agents]
+        if self.name:
+            tool_and_managed_agent_names.append(self.name)
         if len(tool_and_managed_agent_names) != len(set(tool_and_managed_agent_names)):
             raise ValueError(
                 "Each tool or managed_agent should have a unique name! You passed these duplicate names: "
@@ -273,18 +294,22 @@ def run(
         task: str,
         stream: bool = False,
         reset: bool = True,
-        images: Optional[List[str]] = None,
-        additional_args: Optional[Dict] = None,
+        images: list["PIL.Image.Image"] | None = None,
+        additional_args: dict | None = None,
+        max_steps: int | None = None,
     ):
         """
         Run the agent for the given task.
 
         Args:
             task (`str`): Task to perform.
-            stream (`bool`): Whether to run in a streaming way.
+            stream (`bool`): Whether to run in streaming mode.
+                If `True`, returns a generator that yields each step as it is executed. You must iterate over this generator to process the individual steps (e.g., using a for loop or `next()`).
+                If `False`, executes all steps internally and returns only the final answer after completion.
             reset (`bool`): Whether to reset the conversation or keep it going from previous run.
-            images (`list[str]`, *optional*): Paths to image(s).
-            additional_args (`dict`): Any other variables that you want to pass to the agent run, for instance images or dataframes. Give them clear names!
+            images (`list[PIL.Image.Image]`, *optional*): Image(s) objects.
+            additional_args (`dict`, *optional*): Any other variables that you want to pass to the agent run, for instance images or dataframes. Give them clear names!
+            max_steps (`int`, *optional*): Maximum number of steps the agent can take to solve the task. if not provided, will use the agent's default value.
 
         Example:
         ```py
@@ -293,8 +318,9 @@ def run(
         agent.run("What is the result of 2 power 3.7384?")
         ```
         """
-
+        max_steps = max_steps or self.max_steps
         self.task = task
+        self.interrupt_switch = False
         if additional_args is not None:
             self.state.update(additional_args)
             self.task += f"""
@@ -313,41 +339,58 @@ def run(
             level=LogLevel.INFO,
             title=self.name if hasattr(self, "name") else None,
         )
-
         self.memory.steps.append(TaskStep(task=self.task, task_images=images))
 
+        if getattr(self, "python_executor", None):
+            self.python_executor.send_variables(variables=self.state)
+            self.python_executor.send_tools({**self.tools, **self.managed_agents})
+
         if stream:
             # The steps are returned as they are executed through a generator to iterate on.
-            return self._run(task=self.task, images=images)
-        # Outputs are returned only at the end as a string. We only look at the last step
-        return deque(self._run(task=self.task, images=images), maxlen=1)[0]
+            return self._run(task=self.task, max_steps=max_steps, images=images)
+        # Outputs are returned only at the end. We only look at the last step.
+        return deque(self._run(task=self.task, max_steps=max_steps, images=images), maxlen=1)[0].final_answer
 
-    def _run(self, task: str, images: List[str] | None = None) -> Generator[ActionStep | AgentType, None, None]:
+    def _run(
+        self, task: str, max_steps: int, images: list["PIL.Image.Image"] | None = None
+    ) -> Generator[ActionStep | PlanningStep | FinalAnswerStep]:
         final_answer = None
         self.step_number = 1
-        while final_answer is None and self.step_number <= self.max_steps:
+        while final_answer is None and self.step_number <= max_steps:
+            if self.interrupt_switch:
+                raise AgentError("Agent interrupted.", self.logger)
             step_start_time = time.time()
-            memory_step = self._create_memory_step(step_start_time, images)
+            if self.planning_interval is not None and (
+                self.step_number == 1 or (self.step_number - 1) % self.planning_interval == 0
+            ):
+                planning_step = self._generate_planning_step(
+                    task, is_first_step=(self.step_number == 1), step=self.step_number
+                )
+                self.memory.steps.append(planning_step)
+                yield planning_step
+            action_step = ActionStep(
+                step_number=self.step_number, start_time=step_start_time, observations_images=images
+            )
             try:
-                final_answer = self._execute_step(task, memory_step)
+                final_answer = self._execute_step(task, action_step)
+            except AgentGenerationError as e:
+                # Agent generation errors are not caused by a Model error but an implementation error: so we should raise them and exit.
+                raise e
             except AgentError as e:
-                memory_step.error = e
+                # Other AgentError types are caused by the Model, so we should log them and iterate.
+                action_step.error = e
             finally:
-                self._finalize_step(memory_step, step_start_time)
-                yield memory_step
+                self._finalize_step(action_step, step_start_time)
+                self.memory.steps.append(action_step)
+                yield action_step
                 self.step_number += 1
 
-        if final_answer is None and self.step_number == self.max_steps + 1:
+        if final_answer is None and self.step_number == max_steps + 1:
             final_answer = self._handle_max_steps_reached(task, images, step_start_time)
-            yield memory_step
-        yield handle_agent_output_types(final_answer)
-
-    def _create_memory_step(self, step_start_time: float, images: List[str] | None) -> ActionStep:
-        return ActionStep(step_number=self.step_number, start_time=step_start_time, observations_images=images)
+            yield action_step
+        yield FinalAnswerStep(handle_agent_output_types(final_answer))
 
-    def _execute_step(self, task: str, memory_step: ActionStep) -> Union[None, Any]:
-        if self.planning_interval is not None and self.step_number % self.planning_interval == 1:
-            self.planning_step(task, is_first_step=(self.step_number == 1), step=self.step_number)
+    def _execute_step(self, task: str, memory_step: ActionStep) -> None | Any:
         self.logger.log_rule(f"Step {self.step_number}", level=LogLevel.INFO)
         final_answer = self.step(memory_step)
         if final_answer is not None and self.final_answer_checks:
@@ -364,14 +407,13 @@ def _validate_final_answer(self, final_answer: Any):
     def _finalize_step(self, memory_step: ActionStep, step_start_time: float):
         memory_step.end_time = time.time()
         memory_step.duration = memory_step.end_time - step_start_time
-        self.memory.steps.append(memory_step)
         for callback in self.step_callbacks:
             # For compatibility with old callbacks that don't take the agent as an argument
             callback(memory_step) if len(inspect.signature(callback).parameters) == 1 else callback(
                 memory_step, agent=self
             )
 
-    def _handle_max_steps_reached(self, task: str, images: List[str], step_start_time: float) -> Any:
+    def _handle_max_steps_reached(self, task: str, images: list["PIL.Image.Image"], step_start_time: float) -> Any:
         final_answer = self.provide_final_answer(task, images)
         final_memory_step = ActionStep(
             step_number=self.step_number, error=AgentMaxStepsError("Reached max steps.", self.logger)
@@ -386,124 +428,70 @@ def _handle_max_steps_reached(self, task: str, images: List[str], step_start_tim
             )
         return final_answer
 
-    def planning_step(self, task, is_first_step: bool, step: int) -> None:
-        input_messages, facts_message, plan_message = (
-            self._generate_initial_plan(task) if is_first_step else self._generate_updated_plan(task, step)
-        )
-        self._record_planning_step(input_messages, facts_message, plan_message, is_first_step)
-
-    def _generate_initial_plan(self, task: str) -> Tuple[ChatMessage, ChatMessage]:
-        input_messages = [
-            {
+    def _generate_planning_step(self, task, is_first_step: bool, step: int) -> PlanningStep:
+        if is_first_step:
+            input_messages = [
+                {
+                    "role": MessageRole.USER,
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": populate_template(
+                                self.prompt_templates["planning"]["initial_plan"],
+                                variables={"task": task, "tools": self.tools, "managed_agents": self.managed_agents},
+                            ),
+                        }
+                    ],
+                }
+            ]
+            plan_message = self.model(input_messages, stop_sequences=["<end_plan>"])
+            plan = textwrap.dedent(
+                f"""Here are the facts I know and the plan of action that I will follow to solve the task:\n```\n{plan_message.content}\n```"""
+            )
+        else:
+            # Summary mode removes the system prompt and previous planning messages output by the model.
+            # Removing previous planning messages avoids influencing too much the new plan.
+            memory_messages = self.write_memory_to_messages(summary_mode=True)
+            plan_update_pre = {
+                "role": MessageRole.SYSTEM,
+                "content": [
+                    {
+                        "type": "text",
+                        "text": populate_template(
+                            self.prompt_templates["planning"]["update_plan_pre_messages"], variables={"task": task}
+                        ),
+                    }
+                ],
+            }
+            plan_update_post = {
                 "role": MessageRole.USER,
                 "content": [
                     {
                         "type": "text",
                         "text": populate_template(
-                            self.prompt_templates["planning"]["initial_facts"], variables={"task": task}
+                            self.prompt_templates["planning"]["update_plan_post_messages"],
+                            variables={
+                                "task": task,
+                                "tools": self.tools,
+                                "managed_agents": self.managed_agents,
+                                "remaining_steps": (self.max_steps - step),
+                            },
                         ),
                     }
                 ],
-            },
-        ]
-        facts_message = self.model(input_messages)
-
-        message_prompt_plan = {
-            "role": MessageRole.USER,
-            "content": [
-                {
-                    "type": "text",
-                    "text": populate_template(
-                        self.prompt_templates["planning"]["initial_plan"],
-                        variables={
-                            "task": task,
-                            "tools": self.tools,
-                            "managed_agents": self.managed_agents,
-                            "answer_facts": facts_message.content,
-                        },
-                    ),
-                }
-            ],
-        }
-        plan_message = self.model([message_prompt_plan], stop_sequences=["<end_plan>"])
-        return input_messages, facts_message, plan_message
-
-    def _generate_updated_plan(self, task: str, step: int) -> Tuple[ChatMessage, ChatMessage]:
-        # Do not take the system prompt message from the memory
-        # summary_mode=False: Do not take previous plan steps to avoid influencing the new plan
-        memory_messages = self.write_memory_to_messages()[1:]
-        facts_update_pre = {
-            "role": MessageRole.SYSTEM,
-            "content": [{"type": "text", "text": self.prompt_templates["planning"]["update_facts_pre_messages"]}],
-        }
-        facts_update_post = {
-            "role": MessageRole.USER,
-            "content": [{"type": "text", "text": self.prompt_templates["planning"]["update_facts_post_messages"]}],
-        }
-        input_messages = [facts_update_pre] + memory_messages + [facts_update_post]
-        facts_message = self.model(input_messages)
-
-        update_plan_pre = {
-            "role": MessageRole.SYSTEM,
-            "content": [
-                {
-                    "type": "text",
-                    "text": populate_template(
-                        self.prompt_templates["planning"]["update_plan_pre_messages"], variables={"task": task}
-                    ),
-                }
-            ],
-        }
-        update_plan_post = {
-            "role": MessageRole.USER,
-            "content": [
-                {
-                    "type": "text",
-                    "text": populate_template(
-                        self.prompt_templates["planning"]["update_plan_post_messages"],
-                        variables={
-                            "task": task,
-                            "tools": self.tools,
-                            "managed_agents": self.managed_agents,
-                            "facts_update": facts_message.content,
-                            "remaining_steps": (self.max_steps - step),
-                        },
-                    ),
-                }
-            ],
-        }
-        plan_message = self.model(
-            [update_plan_pre] + memory_messages + [update_plan_post], stop_sequences=["<end_plan>"]
-        )
-        return input_messages, facts_message, plan_message
-
-    def _record_planning_step(
-        self, input_messages: list, facts_message: ChatMessage, plan_message: ChatMessage, is_first_step: bool
-    ) -> None:
-        if is_first_step:
-            facts = textwrap.dedent(f"""Here are the facts that I know so far:\n```\n{facts_message.content}\n```""")
-            plan = textwrap.dedent(
-                f"""Here is the plan of action that I will follow to solve the task:\n```\n{plan_message.content}\n```"""
-            )
-            log_message = "Initial plan"
-        else:
-            facts = textwrap.dedent(
-                f"""Here is the updated list of the facts that I know:\n```\n{facts_message.content}\n```"""
-            )
+            }
+            input_messages = [plan_update_pre] + memory_messages + [plan_update_post]
+            plan_message = self.model(input_messages, stop_sequences=["<end_plan>"])
             plan = textwrap.dedent(
-                f"""I still need to solve the task I was given:\n```\n{self.task}\n```\n\nHere is my new/updated plan of action to solve the task:\n```\n{plan_message.content}\n```"""
-            )
-            log_message = "Updated plan"
-        self.memory.steps.append(
-            PlanningStep(
-                model_input_messages=input_messages,
-                facts=facts,
-                plan=plan,
-                model_output_message_plan=plan_message,
-                model_output_message_facts=facts_message,
+                f"""I still need to solve the task I was given:\n```\n{self.task}\n```\n\nHere are the facts I know and my new/updated plan of action to solve the task:\n```\n{plan_message.content}\n```"""
             )
+        log_headline = "Initial plan" if is_first_step else "Updated plan"
+        self.logger.log(Rule(f"[bold]{log_headline}", style="orange"), Text(plan), level=LogLevel.INFO)
+        return PlanningStep(
+            model_input_messages=input_messages,
+            plan=plan,
+            model_output_message=plan_message,
         )
-        self.logger.log(Rule(f"[bold]{log_message}", style="orange"), Text(plan), level=LogLevel.INFO)
 
     @property
     def logs(self):
@@ -512,14 +500,19 @@ def logs(self):
         )
         return [self.memory.system_prompt] + self.memory.steps
 
-    def initialize_system_prompt(self):
+    @abstractmethod
+    def initialize_system_prompt(self) -> str:
         """To be implemented in child classes"""
-        pass
+        ...
+
+    def interrupt(self):
+        """Interrupts the agent execution."""
+        self.interrupt_switch = True
 
     def write_memory_to_messages(
         self,
-        summary_mode: Optional[bool] = False,
-    ) -> List[Dict[str, str]]:
+        summary_mode: bool | None = False,
+    ) -> list[Message]:
         """
         Reads past llm_outputs, actions, and observations or errors from the memory into a series of messages
         that can be used as input to the LLM. Adds a number of keywords (such as PLAN, error, etc) to help
@@ -534,7 +527,7 @@ def visualize(self):
         """Creates a rich tree visualization of the agent's structure."""
         self.logger.visualize_agent_tree(self)
 
-    def extract_action(self, model_output: str, split_token: str) -> Tuple[str, str]:
+    def extract_action(self, model_output: str, split_token: str) -> tuple[str, str]:
         """
         Parse action from the LLM output
 
@@ -555,13 +548,13 @@ def extract_action(self, model_output: str, split_token: str) -> Tuple[str, str]
             )
         return rationale.strip(), action.strip()
 
-    def provide_final_answer(self, task: str, images: Optional[list[str]]) -> str:
+    def provide_final_answer(self, task: str, images: list["PIL.Image.Image"] | None = None) -> str:
         """
         Provide the final answer to the task, based on the logs of the agent's interactions.
 
         Args:
             task (`str`): Task to perform.
-            images (`list[str]`, *optional*): Paths to image(s).
+            images (`list[PIL.Image.Image]`, *optional*): Image(s) objects.
 
         Returns:
             `str`: Final answer to the task.
@@ -599,54 +592,8 @@ def provide_final_answer(self, task: str, images: Optional[list[str]]) -> str:
         except Exception as e:
             return f"Error in generating final LLM output:\n{e}"
 
-    def execute_tool_call(self, tool_name: str, arguments: Union[Dict[str, str], str]) -> Any:
-        """
-        Execute tool with the provided input and returns the result.
-        This method replaces arguments with the actual values from the state if they refer to state variables.
-
-        Args:
-            tool_name (`str`): Name of the Tool to execute (should be one from self.tools).
-            arguments (Dict[str, str]): Arguments passed to the Tool.
-        """
-        available_tools = {**self.tools, **self.managed_agents}
-        if tool_name not in available_tools:
-            error_msg = f"Unknown tool {tool_name}, should be instead one of {list(available_tools.keys())}."
-            raise AgentExecutionError(error_msg, self.logger)
-
-        try:
-            if isinstance(arguments, str):
-                if tool_name in self.managed_agents:
-                    observation = available_tools[tool_name].__call__(arguments)
-                else:
-                    observation = available_tools[tool_name].__call__(arguments, sanitize_inputs_outputs=True)
-            elif isinstance(arguments, dict):
-                for key, value in arguments.items():
-                    if isinstance(value, str) and value in self.state:
-                        arguments[key] = self.state[value]
-                if tool_name in self.managed_agents:
-                    observation = available_tools[tool_name].__call__(**arguments)
-                else:
-                    observation = available_tools[tool_name].__call__(**arguments, sanitize_inputs_outputs=True)
-            else:
-                error_msg = f"Arguments passed to tool should be a dict or string: got a {type(arguments)}."
-                raise AgentExecutionError(error_msg, self.logger)
-            return observation
-        except Exception as e:
-            if tool_name in self.tools:
-                tool = self.tools[tool_name]
-                error_msg = (
-                    f"Error when executing tool {tool_name} with arguments {arguments}: {type(e).__name__}: {e}\nYou should only use this tool with a correct input.\n"
-                    f"As a reminder, this tool's description is the following: '{tool.description}'.\nIt takes inputs: {tool.inputs} and returns output type {tool.output_type}"
-                )
-                raise AgentExecutionError(error_msg, self.logger)
-            elif tool_name in self.managed_agents:
-                error_msg = (
-                    f"Error in calling team member: {e}\nYou should only ask this team member with a correct request.\n"
-                    f"As a reminder, this team member's description is the following:\n{available_tools[tool_name]}"
-                )
-                raise AgentExecutionError(error_msg, self.logger)
-
-    def step(self, memory_step: ActionStep) -> Union[None, Any]:
+    @abstractmethod
+    def step(self, memory_step: ActionStep) -> None | Any:
         """To be implemented in children classes. Should return either None if the step is not final."""
         pass
 
@@ -661,7 +608,6 @@ def replay(self, detailed: bool = False):
 
     def __call__(self, task: str, **kwargs):
         """Adds additional prompting for the managed agent, runs it, and wraps the output.
-
         This method is called only by a managed agent.
         """
         full_task = populate_template(
@@ -680,7 +626,7 @@ def __call__(self, task: str, **kwargs):
             answer += "\n</summary_of_work>"
         return answer
 
-    def save(self, output_dir: str, relative_path: Optional[str] = None):
+    def save(self, output_dir: str | Path, relative_path: str | None = None):
         """
         Saves the relevant code files for your agent. This will copy the code of your agent in `output_dir` as well as autogenerate:
 
@@ -693,7 +639,7 @@ def save(self, output_dir: str, relative_path: Optional[str] = None):
           code)
 
         Args:
-            output_dir (`str`): The folder in which you want to save your tool.
+            output_dir (`str` or `Path`): The folder in which you want to save your agent.
         """
         make_init_file(output_dir)
 
@@ -730,6 +676,7 @@ def save(self, output_dir: str, relative_path: Optional[str] = None):
         # Save agent dictionary to json
         agent_dict = self.to_dict()
         agent_dict["tools"] = [tool.name for tool in self.tools.values()]
+        agent_dict["managed_agents"] = {agent.name: agent.__class__.__name__ for agent in self.managed_agents.values()}
         with open(os.path.join(output_dir, "agent.json"), "w", encoding="utf-8") as f:
             json.dump(agent_dict, f, indent=4)
 
@@ -798,8 +745,12 @@ def save(self, output_dir: str, relative_path: Optional[str] = None):
         with open(os.path.join(output_dir, "app.py"), "w", encoding="utf-8") as f:
             f.write(app_text + "\n")  # Append newline at the end
 
-    def to_dict(self) -> Dict[str, Any]:
-        """Converts agent into a dictionary."""
+    def to_dict(self) -> dict[str, Any]:
+        """Convert the agent to a dictionary representation.
+
+        Returns:
+            `dict`: Dictionary representation of the agent.
+        """
         # TODO: handle serializing step_callbacks and final_answer_checks
         for attr in ["final_answer_checks", "step_callbacks"]:
             if getattr(self, attr, None):
@@ -817,14 +768,13 @@ def to_dict(self) -> Dict[str, Any]:
             )
 
         agent_dict = {
+            "class": self.__class__.__name__,
             "tools": tool_dicts,
             "model": {
                 "class": self.model.__class__.__name__,
                 "data": self.model.to_dict(),
             },
-            "managed_agents": {
-                managed_agent.name: managed_agent.__class__.__name__ for managed_agent in self.managed_agents.values()
-            },
+            "managed_agents": [managed_agent.to_dict() for managed_agent in self.managed_agents.values()],
             "prompt_templates": self.prompt_templates,
             "max_steps": self.max_steps,
             "verbosity_level": int(self.logger.level),
@@ -832,21 +782,58 @@ def to_dict(self) -> Dict[str, Any]:
             "planning_interval": self.planning_interval,
             "name": self.name,
             "description": self.description,
-            "requirements": list(requirements),
+            "requirements": sorted(requirements),
         }
-        if hasattr(self, "authorized_imports"):
-            agent_dict["authorized_imports"] = self.authorized_imports
-        if hasattr(self, "use_e2b_executor"):
-            agent_dict["use_e2b_executor"] = self.use_e2b_executor
-        if hasattr(self, "max_print_outputs_length"):
-            agent_dict["max_print_outputs_length"] = self.max_print_outputs_length
         return agent_dict
 
+    @classmethod
+    def from_dict(cls, agent_dict: dict[str, Any], **kwargs) -> "MultiStepAgent":
+        """Create agent from a dictionary representation.
+
+        Args:
+            agent_dict (`dict[str, Any]`): Dictionary representation of the agent.
+            **kwargs: Additional keyword arguments that will override agent_dict values.
+
+        Returns:
+            `MultiStepAgent`: Instance of the agent class.
+        """
+        # Load model
+        model_info = agent_dict["model"]
+        model_class = getattr(importlib.import_module("smolagents.models"), model_info["class"])
+        model = model_class.from_dict(model_info["data"])
+        # Load tools
+        tools = []
+        for tool_info in agent_dict["tools"]:
+            tools.append(Tool.from_code(tool_info["code"]))
+        # Load managed agents
+        managed_agents = []
+        for managed_agent_name, managed_agent_class_name in agent_dict["managed_agents"].items():
+            managed_agent_class = getattr(importlib.import_module("smolagents.agents"), managed_agent_class_name)
+            managed_agents.append(managed_agent_class.from_dict(agent_dict["managed_agents"][managed_agent_name]))
+        # Extract base agent parameters
+        agent_args = {
+            "model": model,
+            "tools": tools,
+            "prompt_templates": agent_dict.get("prompt_templates"),
+            "max_steps": agent_dict.get("max_steps"),
+            "verbosity_level": agent_dict.get("verbosity_level"),
+            "grammar": agent_dict.get("grammar"),
+            "planning_interval": agent_dict.get("planning_interval"),
+            "name": agent_dict.get("name"),
+            "description": agent_dict.get("description"),
+        }
+        # Filter out None values to use defaults from __init__
+        agent_args = {k: v for k, v in agent_args.items() if v is not None}
+        # Update with any additional kwargs
+        agent_args.update(kwargs)
+        # Create agent instance
+        return cls(**agent_args)
+
     @classmethod
     def from_hub(
         cls,
         repo_id: str,
-        token: Optional[str] = None,
+        token: str | None = None,
         trust_remote_code: bool = False,
         **kwargs,
     ):
@@ -897,54 +884,43 @@ def from_hub(
         return cls.from_folder(download_folder, **kwargs)
 
     @classmethod
-    def from_folder(cls, folder: Union[str, Path], **kwargs):
+    def from_folder(cls, folder: str | Path, **kwargs):
         """Loads an agent from a local folder.
 
         Args:
             folder (`str` or `Path`): The folder where the agent is saved.
             **kwargs: Additional keyword arguments that will be passed to the agent's init.
         """
+        # Load agent.json
         folder = Path(folder)
         agent_dict = json.loads((folder / "agent.json").read_text())
 
-        # Recursively get managed agents
+        # Load managed agents from their respective folders, recursively
         managed_agents = []
-        for managed_agent_name, managed_agent_class in agent_dict["managed_agents"].items():
-            agent_cls = getattr(importlib.import_module("smolagents.agents"), managed_agent_class)
+        for managed_agent_name, managed_agent_class_name in agent_dict["managed_agents"].items():
+            agent_cls = getattr(importlib.import_module("smolagents.agents"), managed_agent_class_name)
             managed_agents.append(agent_cls.from_folder(folder / "managed_agents" / managed_agent_name))
+        agent_dict["managed_agents"] = {}
 
+        # Load tools
         tools = []
         for tool_name in agent_dict["tools"]:
             tool_code = (folder / "tools" / f"{tool_name}.py").read_text()
-            tools.append(Tool.from_code(tool_code))
+            tools.append({"name": tool_name, "code": tool_code})
+        agent_dict["tools"] = tools
 
-        model_class: Model = getattr(importlib.import_module("smolagents.models"), agent_dict["model"]["class"])
-        model = model_class.from_dict(agent_dict["model"]["data"])
+        # Add managed agents to kwargs to override the empty list in from_dict
+        if managed_agents:
+            kwargs["managed_agents"] = managed_agents
 
-        args = dict(
-            model=model,
-            tools=tools,
-            managed_agents=managed_agents,
-            name=agent_dict["name"],
-            description=agent_dict["description"],
-            max_steps=agent_dict["max_steps"],
-            planning_interval=agent_dict["planning_interval"],
-            grammar=agent_dict["grammar"],
-            verbosity_level=agent_dict["verbosity_level"],
-        )
-        if cls.__name__ == "CodeAgent":
-            args["additional_authorized_imports"] = agent_dict["authorized_imports"]
-            args["use_e2b_executor"] = agent_dict["use_e2b_executor"]
-            args["max_print_outputs_length"] = agent_dict["max_print_outputs_length"]
-        args.update(kwargs)
-        return cls(**args)
+        return cls.from_dict(agent_dict, **kwargs)
 
     def push_to_hub(
         self,
         repo_id: str,
         commit_message: str = "Upload agent",
-        private: Optional[bool] = None,
-        token: Optional[Union[bool, str]] = None,
+        private: bool | None = None,
+        token: bool | str | None = None,
         create_pr: bool = False,
     ) -> str:
         """
@@ -1008,10 +984,10 @@ class ToolCallingAgent(MultiStepAgent):
 
     def __init__(
         self,
-        tools: List[Tool],
-        model: Callable[[List[Dict[str, str]]], ChatMessage],
-        prompt_templates: Optional[PromptTemplates] = None,
-        planning_interval: Optional[int] = None,
+        tools: list[Tool],
+        model: Callable[[list[dict[str, str]]], ChatMessage],
+        prompt_templates: PromptTemplates | None = None,
+        planning_interval: int | None = None,
         **kwargs,
     ):
         prompt_templates = prompt_templates or yaml.safe_load(
@@ -1032,34 +1008,49 @@ def initialize_system_prompt(self) -> str:
         )
         return system_prompt
 
-    def step(self, memory_step: ActionStep) -> Union[None, Any]:
+    def step(self, memory_step: ActionStep) -> None | Any:
         """
         Perform one step in the ReAct framework: the agent thinks, acts, and observes the result.
         Returns None if the step is not final.
         """
         memory_messages = self.write_memory_to_messages()
 
-        self.input_messages = memory_messages
+        input_messages = memory_messages.copy()
 
         # Add new step in logs
-        memory_step.model_input_messages = memory_messages.copy()
+        memory_step.model_input_messages = input_messages
 
         try:
-            model_message: ChatMessage = self.model(
-                memory_messages,
+            chat_message: ChatMessage = self.model(
+                input_messages,
+                stop_sequences=["Observation:", "Calling tools:"],
                 tools_to_call_from=list(self.tools.values()),
-                stop_sequences=["Observation:"],
             )
-            memory_step.model_output_message = model_message
-            if model_message.tool_calls is None or len(model_message.tool_calls) == 0:
-                raise Exception("Model did not call any tools. Call `final_answer` tool to return a final answer.")
-            tool_call = model_message.tool_calls[0]
-            tool_name, tool_call_id = tool_call.function.name, tool_call.id
-            tool_arguments = tool_call.function.arguments
+            memory_step.model_output_message = chat_message
+            model_output = chat_message.content
+            self.logger.log_markdown(
+                content=model_output if model_output else str(chat_message.raw),
+                title="Output message of the LLM:",
+                level=LogLevel.DEBUG,
+            )
 
+            memory_step.model_output_message.content = model_output
+            memory_step.model_output = model_output
         except Exception as e:
-            raise AgentGenerationError(f"Error in generating tool call with model:\n{e}", self.logger) from e
+            raise AgentGenerationError(f"Error while generating output:\n{e}", self.logger) from e
 
+        if chat_message.tool_calls is None or len(chat_message.tool_calls) == 0:
+            try:
+                chat_message = self.model.parse_tool_calls(chat_message)
+            except Exception as e:
+                raise AgentParsingError(f"Error while parsing tool call from model output: {e}", self.logger)
+        else:
+            for tool_call in chat_message.tool_calls:
+                tool_call.function.arguments = parse_json_if_needed(tool_call.function.arguments)
+        tool_call = chat_message.tool_calls[0]  # type: ignore
+        tool_name, tool_call_id = tool_call.function.name, tool_call.id
+        tool_arguments = tool_call.function.arguments
+        memory_step.model_output = str(f"Called Tool: '{tool_name}' with arguments: {tool_arguments}")
         memory_step.tool_calls = [ToolCall(name=tool_name, arguments=tool_arguments, id=tool_call_id)]
 
         # Execute
@@ -1115,6 +1106,79 @@ def step(self, memory_step: ActionStep) -> Union[None, Any]:
             memory_step.observations = updated_information
             return None
 
+    def _substitute_state_variables(self, arguments: dict[str, str] | str) -> dict[str, Any] | str:
+        """Replace string values in arguments with their corresponding state values if they exist."""
+        if isinstance(arguments, dict):
+            return {
+                key: self.state.get(value, value) if isinstance(value, str) else value
+                for key, value in arguments.items()
+            }
+        return arguments
+
+    def execute_tool_call(self, tool_name: str, arguments: dict[str, str] | str) -> Any:
+        """
+        Execute a tool or managed agent with the provided arguments.
+
+        The arguments are replaced with the actual values from the state if they refer to state variables.
+
+        Args:
+            tool_name (`str`): Name of the tool or managed agent to execute.
+            arguments (dict[str, str] | str): Arguments passed to the tool call.
+        """
+        # Check if the tool exists
+        available_tools = {**self.tools, **self.managed_agents}
+        if tool_name not in available_tools:
+            raise AgentToolExecutionError(
+                f"Unknown tool {tool_name}, should be one of: {', '.join(available_tools)}.", self.logger
+            )
+
+        # Get the tool and substitute state variables in arguments
+        tool = available_tools[tool_name]
+        arguments = self._substitute_state_variables(arguments)
+        is_managed_agent = tool_name in self.managed_agents
+
+        try:
+            # Call tool with appropriate arguments
+            if isinstance(arguments, dict):
+                return tool(**arguments) if is_managed_agent else tool(**arguments, sanitize_inputs_outputs=True)
+            elif isinstance(arguments, str):
+                return tool(arguments) if is_managed_agent else tool(arguments, sanitize_inputs_outputs=True)
+            else:
+                raise TypeError(f"Unsupported arguments type: {type(arguments)}")
+
+        except TypeError as e:
+            # Handle invalid arguments
+            description = getattr(tool, "description", "No description")
+            if is_managed_agent:
+                error_msg = (
+                    f"Invalid request to team member '{tool_name}' with arguments {json.dumps(arguments)}: {e}\n"
+                    "You should call this team member with a valid request.\n"
+                    f"Team member description: {description}"
+                )
+            else:
+                error_msg = (
+                    f"Invalid call to tool '{tool_name}' with arguments {json.dumps(arguments)}: {e}\n"
+                    "You should call this tool with correct input arguments.\n"
+                    f"Expected inputs: {json.dumps(tool.inputs)}\n"
+                    f"Returns output type: {tool.output_type}\n"
+                    f"Tool description: '{description}'"
+                )
+            raise AgentToolCallError(error_msg, self.logger) from e
+
+        except Exception as e:
+            # Handle execution errors
+            if is_managed_agent:
+                error_msg = (
+                    f"Error executing request to team member '{tool_name}' with arguments {json.dumps(arguments)}: {e}\n"
+                    "Please try again or request to another team member"
+                )
+            else:
+                error_msg = (
+                    f"Error executing tool '{tool_name}' with arguments {json.dumps(arguments)}: {type(e).__name__}: {e}\n"
+                    "Please try again or use another tool"
+                )
+            raise AgentToolExecutionError(error_msg, self.logger) from e
+
 
 class CodeAgent(MultiStepAgent):
     """
@@ -1122,32 +1186,34 @@ class CodeAgent(MultiStepAgent):
 
     Args:
         tools (`list[Tool]`): [`Tool`]s that the agent can use.
-        model (`Callable[[list[dict[str, str]]], ChatMessage]`): Model that will generate the agent's actions.
+        model (`Model`): Model that will generate the agent's actions.
         prompt_templates ([`~agents.PromptTemplates`], *optional*): Prompt templates.
         grammar (`dict[str, str]`, *optional*): Grammar used to parse the LLM output.
         additional_authorized_imports (`list[str]`, *optional*): Additional authorized imports for the agent.
         planning_interval (`int`, *optional*): Interval at which the agent will run a planning step.
-        use_e2b_executor (`bool`, default `False`): Whether to use the E2B executor for remote code execution.
+        executor_type (`str`, default `"local"`): Which executor type to use between `"local"`, `"e2b"`, or `"docker"`.
+        executor_kwargs (`dict`, *optional*): Additional arguments to pass to initialize the executor.
         max_print_outputs_length (`int`, *optional*): Maximum length of the print outputs.
+        stream_outputs (`bool`, *optional*, default `False`): Whether to stream outputs during execution.
         **kwargs: Additional keyword arguments.
-
     """
 
     def __init__(
         self,
-        tools: List[Tool],
-        model: Callable[[List[Dict[str, str]]], ChatMessage],
-        prompt_templates: Optional[PromptTemplates] = None,
-        grammar: Optional[Dict[str, str]] = None,
-        additional_authorized_imports: Optional[List[str]] = None,
-        planning_interval: Optional[int] = None,
-        use_e2b_executor: bool = False,
-        max_print_outputs_length: Optional[int] = None,
+        tools: list[Tool],
+        model: Model,
+        prompt_templates: PromptTemplates | None = None,
+        grammar: dict[str, str] | None = None,
+        additional_authorized_imports: list[str] | None = None,
+        planning_interval: int | None = None,
+        executor_type: str | None = "local",
+        executor_kwargs: dict[str, Any] | None = None,
+        max_print_outputs_length: int | None = None,
+        stream_outputs: bool = False,
         **kwargs,
     ):
         self.additional_authorized_imports = additional_authorized_imports if additional_authorized_imports else []
-        self.authorized_imports = list(set(BASE_BUILTIN_MODULES) | set(self.additional_authorized_imports))
-        self.use_e2b_executor = use_e2b_executor
+        self.authorized_imports = sorted(set(BASE_BUILTIN_MODULES) | set(self.additional_authorized_imports))
         self.max_print_outputs_length = max_print_outputs_length
         prompt_templates = prompt_templates or yaml.safe_load(
             importlib.resources.files("smolagents.prompts").joinpath("code_agent.yaml").read_text()
@@ -1160,30 +1226,36 @@ def __init__(
             planning_interval=planning_interval,
             **kwargs,
         )
+        self.stream_outputs = stream_outputs
+        if self.stream_outputs and not hasattr(self.model, "generate_stream"):
+            raise ValueError(
+                "`stream_outputs` is set to True, but the model class implements no `generate_stream` method."
+            )
         if "*" in self.additional_authorized_imports:
             self.logger.log(
                 "Caution: you set an authorization for all imports, meaning your agent can decide to import any package it deems necessary. This might raise issues if the package is not installed in your environment.",
-                0,
-            )
-
-        if use_e2b_executor and len(self.managed_agents) > 0:
-            raise Exception(
-                f"You passed both {use_e2b_executor=} and some managed agents. Managed agents is not yet supported with remote code execution."
-            )
-
-        all_tools = {**self.tools, **self.managed_agents}
-        if use_e2b_executor:
-            self.python_executor = E2BExecutor(
-                self.additional_authorized_imports,
-                list(all_tools.values()),
-                self.logger,
-            )
-        else:
-            self.python_executor = LocalPythonInterpreter(
-                self.additional_authorized_imports,
-                all_tools,
-                max_print_outputs_length=max_print_outputs_length,
+                level=LogLevel.INFO,
             )
+        self.executor_type = executor_type or "local"
+        self.executor_kwargs = executor_kwargs or {}
+        self.python_executor = self.create_python_executor()
+
+    def create_python_executor(self) -> PythonExecutor:
+        match self.executor_type:
+            case "e2b" | "docker":
+                if self.managed_agents:
+                    raise Exception("Managed agents are not yet supported with remote code execution.")
+                if self.executor_type == "e2b":
+                    return E2BExecutor(self.additional_authorized_imports, self.logger, **self.executor_kwargs)
+                else:
+                    return DockerExecutor(self.additional_authorized_imports, self.logger, **self.executor_kwargs)
+            case "local":
+                return LocalPythonExecutor(
+                    self.additional_authorized_imports,
+                    max_print_outputs_length=self.max_print_outputs_length,
+                )
+            case _:  # if applicable
+                raise ValueError(f"Unsupported executor type: {self.executor_type}")
 
     def initialize_system_prompt(self) -> str:
         system_prompt = populate_template(
@@ -1200,37 +1272,60 @@ def initialize_system_prompt(self) -> str:
         )
         return system_prompt
 
-    def step(self, memory_step: ActionStep) -> Union[None, Any]:
+    def step(self, memory_step: ActionStep) -> None | Any:
         """
         Perform one step in the ReAct framework: the agent thinks, acts, and observes the result.
         Returns None if the step is not final.
         """
         memory_messages = self.write_memory_to_messages()
 
-        self.input_messages = memory_messages.copy()
-
-        # Add new step in logs
-        memory_step.model_input_messages = memory_messages.copy()
+        input_messages = memory_messages.copy()
+        ### Generate model output ###
+        memory_step.model_input_messages = input_messages
         try:
             additional_args = {"grammar": self.grammar} if self.grammar is not None else {}
-            chat_message: ChatMessage = self.model(
-                self.input_messages,
-                stop_sequences=["<end_code>", "Observation:"],
-                **additional_args,
-            )
-            memory_step.model_output_message = chat_message
-            model_output = chat_message.content
+            if self.stream_outputs:
+                output_stream = self.model.generate_stream(
+                    input_messages,
+                    stop_sequences=["<end_code>", "Observation:", "Calling tools:"],
+                    **additional_args,
+                )
+                output_text = ""
+                with Live("", console=self.logger.console, vertical_overflow="visible") as live:
+                    for event in output_stream:
+                        if event.content is not None:
+                            output_text += event.content
+                            live.update(Markdown(output_text))
+
+                model_output = output_text
+                chat_message = ChatMessage(role="assistant", content=model_output)
+                memory_step.model_output_message = chat_message
+                model_output = chat_message.content
+            else:
+                chat_message: ChatMessage = self.model(
+                    input_messages,
+                    stop_sequences=["<end_code>", "Observation:", "Calling tools:"],
+                    **additional_args,
+                )
+                memory_step.model_output_message = chat_message
+                model_output = chat_message.content
+                self.logger.log_markdown(
+                    content=model_output,
+                    title="Output message of the LLM:",
+                    level=LogLevel.DEBUG,
+                )
+
+            # This adds <end_code> sequence to the history.
+            # This will nudge ulterior LLM calls to finish with <end_code>, thus efficiently stopping generation.
+            if model_output and model_output.strip().endswith("```"):
+                model_output += "<end_code>"
+                memory_step.model_output_message.content = model_output
+
             memory_step.model_output = model_output
         except Exception as e:
             raise AgentGenerationError(f"Error in generating model output:\n{e}", self.logger) from e
 
-        self.logger.log_markdown(
-            content=model_output,
-            title="Output message of the LLM:",
-            level=LogLevel.DEBUG,
-        )
-
-        # Parse
+        ### Parse output ###
         try:
             code_action = fix_final_answer_code(parse_code_blobs(model_output))
         except Exception as e:
@@ -1245,14 +1340,11 @@ def step(self, memory_step: ActionStep) -> Union[None, Any]:
             )
         ]
 
-        # Execute
+        ### Execute action ###
         self.logger.log_code(title="Executing parsed code:", content=code_action, level=LogLevel.INFO)
         is_final_answer = False
         try:
-            output, execution_logs, is_final_answer = self.python_executor(
-                code_action,
-                self.state,
-            )
+            output, execution_logs, is_final_answer = self.python_executor(code_action)
             execution_outputs_console = []
             if len(execution_logs) > 0:
                 execution_outputs_console += [
@@ -1291,3 +1383,41 @@ def step(self, memory_step: ActionStep) -> Union[None, Any]:
         self.logger.log(Group(*execution_outputs_console), level=LogLevel.INFO)
         memory_step.action_output = output
         return output if is_final_answer else None
+
+    def to_dict(self) -> dict[str, Any]:
+        """Convert the agent to a dictionary representation.
+
+        Returns:
+            `dict`: Dictionary representation of the agent.
+        """
+        agent_dict = super().to_dict()
+        agent_dict["authorized_imports"] = self.authorized_imports
+        agent_dict["executor_type"] = self.executor_type
+        agent_dict["executor_kwargs"] = self.executor_kwargs
+        agent_dict["max_print_outputs_length"] = self.max_print_outputs_length
+        return agent_dict
+
+    @classmethod
+    def from_dict(cls, agent_dict: dict[str, Any], **kwargs) -> "CodeAgent":
+        """Create CodeAgent from a dictionary representation.
+
+        Args:
+            agent_dict (`dict[str, Any]`): Dictionary representation of the agent.
+            **kwargs: Additional keyword arguments that will override agent_dict values.
+
+        Returns:
+            `CodeAgent`: Instance of the CodeAgent class.
+        """
+        # Add CodeAgent-specific parameters to kwargs
+        code_agent_kwargs = {
+            "additional_authorized_imports": agent_dict.get("authorized_imports"),
+            "executor_type": agent_dict.get("executor_type"),
+            "executor_kwargs": agent_dict.get("executor_kwargs"),
+            "max_print_outputs_length": agent_dict.get("max_print_outputs_length"),
+        }
+        # Filter out None values
+        code_agent_kwargs = {k: v for k, v in code_agent_kwargs.items() if v is not None}
+        # Update with any additional kwargs
+        code_agent_kwargs.update(kwargs)
+        # Call the parent class's from_dict method
+        return super().from_dict(agent_dict, **code_agent_kwargs)
diff --git a/src/smolagents/cli.py b/src/smolagents/cli.py
index bcf984532..ccb8295ef 100644
--- a/src/smolagents/cli.py
+++ b/src/smolagents/cli.py
@@ -19,15 +19,15 @@
 
 from dotenv import load_dotenv
 
-from smolagents import CodeAgent, HfApiModel, LiteLLMModel, Model, OpenAIServerModel, Tool, TransformersModel
+from smolagents import CodeAgent, InferenceClientModel, LiteLLMModel, Model, OpenAIServerModel, Tool, TransformersModel
 from smolagents.default_tools import TOOL_MAPPING
 
 
 leopard_prompt = "How many seconds would it take for a leopard at full speed to run through Pont des Arts?"
 
 
-def parse_arguments(description):
-    parser = argparse.ArgumentParser(description=description)
+def parse_arguments():
+    parser = argparse.ArgumentParser(description="Run a CodeAgent with all specified parameters")
     parser.add_argument(
         "prompt",
         type=str,
@@ -38,8 +38,8 @@ def parse_arguments(description):
     parser.add_argument(
         "--model-type",
         type=str,
-        default="HfApiModel",
-        help="The model type to use (e.g., HfApiModel, OpenAIServerModel, LiteLLMModel, TransformersModel)",
+        default="InferenceClientModel",
+        help="The model type to use (e.g., InferenceClientModel, OpenAIServerModel, LiteLLMModel, TransformersModel)",
     )
     parser.add_argument(
         "--model-id",
@@ -66,6 +66,12 @@ def parse_arguments(description):
         help="The verbosity level, as an int in [0, 1, 2].",
     )
     group = parser.add_argument_group("api options", "Options for API-based model types")
+    group.add_argument(
+        "--provider",
+        type=str,
+        default=None,
+        help="The inference provider to use for the model",
+    )
     group.add_argument(
         "--api-base",
         type=str,
@@ -79,7 +85,13 @@ def parse_arguments(description):
     return parser.parse_args()
 
 
-def load_model(model_type: str, model_id: str, api_base: str | None, api_key: str | None) -> Model:
+def load_model(
+    model_type: str,
+    model_id: str,
+    api_base: str | None = None,
+    api_key: str | None = None,
+    provider: str | None = None,
+) -> Model:
     if model_type == "OpenAIServerModel":
         return OpenAIServerModel(
             api_key=api_key or os.getenv("FIREWORKS_API_KEY"),
@@ -89,29 +101,37 @@ def load_model(model_type: str, model_id: str, api_base: str | None, api_key: st
     elif model_type == "LiteLLMModel":
         return LiteLLMModel(
             model_id=model_id,
-            api_key=api_key or os.getenv("OPENAI_API_KEY"),
+            api_key=api_key,
             api_base=api_base,
         )
     elif model_type == "TransformersModel":
-        return TransformersModel(model_id=model_id, device_map="auto", flatten_messages_as_text=False)
-    elif model_type == "HfApiModel":
-        return HfApiModel(
-            token=api_key or os.getenv("HF_API_KEY"),
+        return TransformersModel(model_id=model_id, device_map="auto")
+    elif model_type == "InferenceClientModel":
+        return InferenceClientModel(
             model_id=model_id,
+            token=api_key or os.getenv("HF_API_KEY"),
+            provider=provider,
         )
     else:
         raise ValueError(f"Unsupported model type: {model_type}")
 
 
-def main():
+def run_smolagent(
+    prompt: str,
+    tools: list[str],
+    model_type: str,
+    model_id: str,
+    api_base: str | None = None,
+    api_key: str | None = None,
+    imports: list[str] | None = None,
+    provider: str | None = None,
+) -> None:
     load_dotenv()
 
-    args = parse_arguments(description="Run a CodeAgent with all specified parameters")
-
-    model = load_model(args.model_type, args.model_id, args.api_base, args.api_key)
+    model = load_model(model_type, model_id, api_base=api_base, api_key=api_key, provider=provider)
 
     available_tools = []
-    for tool_name in args.tools:
+    for tool_name in tools:
         if "/" in tool_name:
             available_tools.append(Tool.from_space(tool_name))
         else:
@@ -120,10 +140,24 @@ def main():
             else:
                 raise ValueError(f"Tool {tool_name} is not recognized either as a default tool or a Space.")
 
-    print(f"Running agent with these tools: {args.tools}")
-    agent = CodeAgent(tools=available_tools, model=model, additional_authorized_imports=args.imports)
+    print(f"Running agent with these tools: {tools}")
+    agent = CodeAgent(tools=available_tools, model=model, additional_authorized_imports=imports)
+
+    agent.run(prompt)
 
-    agent.run(args.prompt)
+
+def main() -> None:
+    args = parse_arguments()
+    run_smolagent(
+        args.prompt,
+        args.tools,
+        args.model_type,
+        args.model_id,
+        provider=args.provider,
+        api_base=args.api_base,
+        api_key=args.api_key,
+        imports=args.imports,
+    )
 
 
 if __name__ == "__main__":
diff --git a/src/smolagents/default_tools.py b/src/smolagents/default_tools.py
index 2ea7834f6..d12a38d5a 100644
--- a/src/smolagents/default_tools.py
+++ b/src/smolagents/default_tools.py
@@ -14,9 +14,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import re
 from dataclasses import dataclass
-from typing import Any, Dict, Optional
+from typing import Any
 
 from .local_python_executor import (
     BASE_BUILTIN_MODULES,
@@ -29,7 +28,7 @@
 @dataclass
 class PreTool:
     name: str
-    inputs: Dict[str, str]
+    inputs: dict[str, str]
     output_type: type
     task: str
     description: str
@@ -57,7 +56,7 @@ def __init__(self, *args, authorized_imports=None, **kwargs):
                 "type": "string",
                 "description": (
                     "The code snippet to evaluate. All variables used in this snippet must be defined in this same snippet, "
-                    f"else you will get an error. This code can only import the following python libraries: {authorized_imports}."
+                    f"else you will get an error. This code can only import the following python libraries: {self.authorized_imports}."
                 ),
             }
         }
@@ -138,7 +137,7 @@ class GoogleSearchTool(Tool):
     output_type = "string"
 
     def __init__(self, provider: str = "serpapi"):
-        super().__init__(self)
+        super().__init__()
         import os
 
         self.provider = provider
@@ -152,7 +151,7 @@ def __init__(self, provider: str = "serpapi"):
         if self.api_key is None:
             raise ValueError(f"Missing API key. Make sure you have '{api_key_env_name}' in your env variables.")
 
-    def forward(self, query: str, filter_year: Optional[int] = None) -> str:
+    def forward(self, query: str, filter_year: int | None = None) -> str:
         import requests
 
         if self.provider == "serpapi":
@@ -224,8 +223,14 @@ class VisitWebpageTool(Tool):
     }
     output_type = "string"
 
+    def __init__(self, max_output_length: int = 40000):
+        super().__init__()
+        self.max_output_length = max_output_length
+
     def forward(self, url: str) -> str:
         try:
+            import re
+
             import requests
             from markdownify import markdownify
             from requests.exceptions import RequestException
@@ -246,7 +251,7 @@ def forward(self, url: str) -> str:
             # Remove multiple line breaks
             markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
 
-            return truncate_content(markdown_content, 10000)
+            return truncate_content(markdown_content, self.max_output_length)
 
         except requests.exceptions.Timeout:
             return "The request timed out. Please try again later or check the URL."
@@ -256,6 +261,102 @@ def forward(self, url: str) -> str:
             return f"An unexpected error occurred: {str(e)}"
 
 
+class WikipediaSearchTool(Tool):
+    """
+    WikipediaSearchTool searches Wikipedia and returns a summary or full text of the given topic, along with the page URL.
+
+    Attributes:
+        user_agent (str): A custom user-agent string to identify the project. This is required as per Wikipedia API policies, read more here: http://github.com/martin-majlis/Wikipedia-API/blob/master/README.rst
+        language (str): The language in which to retrieve Wikipedia articles.
+                http://meta.wikimedia.org/wiki/List_of_Wikipedias
+        content_type (str): Defines the content to fetch. Can be "summary" for a short summary or "text" for the full article.
+        extract_format (str): Defines the output format. Can be `"WIKI"` or `"HTML"`.
+
+    Example:
+        >>> from smolagents import CodeAgent, InferenceClientModel, WikipediaSearchTool
+        >>> agent = CodeAgent(
+        >>>     tools=[
+        >>>            WikipediaSearchTool(
+        >>>                user_agent="MyResearchBot (myemail@example.com)",
+        >>>                language="en",
+        >>>                content_type="summary",  # or "text"
+        >>>                extract_format="WIKI",
+        >>>            )
+        >>>        ],
+        >>>     model=InferenceClientModel(),
+        >>> )
+        >>> agent.run("Python_(programming_language)")
+    """
+
+    name = "wikipedia_search"
+    description = "Searches Wikipedia and returns a summary or full text of the given topic, along with the page URL."
+    inputs = {
+        "query": {
+            "type": "string",
+            "description": "The topic to search on Wikipedia.",
+        }
+    }
+    output_type = "string"
+
+    def __init__(
+        self,
+        user_agent: str = "Smolagents (myemail@example.com)",
+        language: str = "en",
+        content_type: str = "text",
+        extract_format: str = "WIKI",
+    ):
+        super().__init__()
+        try:
+            import wikipediaapi
+        except ImportError as e:
+            raise ImportError(
+                "You must install `wikipedia-api` to run this tool: for instance run `pip install wikipedia-api`"
+            ) from e
+        if not user_agent:
+            raise ValueError("User-agent is required. Provide a meaningful identifier for your project.")
+
+        self.user_agent = user_agent
+        self.language = language
+        self.content_type = content_type
+
+        # Map string format to wikipediaapi.ExtractFormat
+        extract_format_map = {
+            "WIKI": wikipediaapi.ExtractFormat.WIKI,
+            "HTML": wikipediaapi.ExtractFormat.HTML,
+        }
+
+        if extract_format not in extract_format_map:
+            raise ValueError("Invalid extract_format. Choose between 'WIKI' or 'HTML'.")
+
+        self.extract_format = extract_format_map[extract_format]
+
+        self.wiki = wikipediaapi.Wikipedia(
+            user_agent=self.user_agent, language=self.language, extract_format=self.extract_format
+        )
+
+    def forward(self, query: str) -> str:
+        try:
+            page = self.wiki.page(query)
+
+            if not page.exists():
+                return f"No Wikipedia page found for '{query}'. Try a different query."
+
+            title = page.title
+            url = page.fullurl
+
+            if self.content_type == "summary":
+                text = page.summary
+            elif self.content_type == "text":
+                text = page.text
+            else:
+                return "⚠️ Invalid `content_type`. Use either 'summary' or 'text'."
+
+            return f"✅ **Wikipedia Page:** {title}\n\n**Content:** {text}\n\n🔗 **Read more:** {url}"
+
+        except Exception as e:
+            return f"Error fetching Wikipedia summary: {str(e)}"
+
+
 class SpeechToTextTool(PipelineTool):
     default_checkpoint = "openai/whisper-large-v3-turbo"
     description = "This is a tool that transcribes an audio into text. It returns the transcribed text."
@@ -307,5 +408,6 @@ def decode(self, outputs):
     "DuckDuckGoSearchTool",
     "GoogleSearchTool",
     "VisitWebpageTool",
+    "WikipediaSearchTool",
     "SpeechToTextTool",
 ]
diff --git a/src/smolagents/e2b_executor.py b/src/smolagents/e2b_executor.py
deleted file mode 100644
index 10b0170ee..000000000
--- a/src/smolagents/e2b_executor.py
+++ /dev/null
@@ -1,162 +0,0 @@
-#!/usr/bin/env python
-# coding=utf-8
-
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import base64
-import pickle
-import re
-import textwrap
-from io import BytesIO
-from typing import Any, List, Tuple
-
-from PIL import Image
-
-from .tool_validation import validate_tool_attributes
-from .tools import Tool
-from .utils import BASE_BUILTIN_MODULES, instance_to_source
-
-
-try:
-    from dotenv import load_dotenv
-
-    load_dotenv()
-except ModuleNotFoundError:
-    pass
-
-
-class E2BExecutor:
-    def __init__(self, additional_imports: List[str], tools: List[Tool], logger):
-        self.logger = logger
-        try:
-            from e2b_code_interpreter import Sandbox
-        except ModuleNotFoundError:
-            raise ModuleNotFoundError(
-                """Please install 'e2b' extra to use E2BExecutor: `pip install "smolagents[e2b]"`"""
-            )
-        self.logger = logger
-        self.logger.log("Initializing E2B executor, hold on...")
-
-        self.custom_tools = {}
-        self.final_answer = False
-        self.final_answer_pattern = re.compile(r"final_answer\((.*?)\)")
-        self.sbx = Sandbox()  # "qywp2ctmu2q7jzprcf4j")
-        # TODO: validate installing agents package or not
-        # print("Installing agents package on remote executor...")
-        # self.sbx.commands.run(
-        #     "pip install git+https://github.com/huggingface/smolagents.git",
-        #     timeout=300
-        # )
-        # print("Installation of agents package finished.")
-        additional_imports = additional_imports + ["smolagents"]
-        if len(additional_imports) > 0:
-            execution = self.sbx.commands.run("pip install " + " ".join(additional_imports))
-            if execution.error:
-                raise Exception(f"Error installing dependencies: {execution.error}")
-            else:
-                logger.log(f"Installation of {additional_imports} succeeded!", 0)
-
-        tool_codes = []
-        for tool in tools:
-            validate_tool_attributes(tool.__class__, check_imports=False)
-            tool_code = instance_to_source(tool, base_cls=Tool)
-            tool_code = tool_code.replace("from smolagents.tools import Tool", "")
-            tool_code += f"\n{tool.name} = {tool.__class__.__name__}()\n"
-            tool_codes.append(tool_code)
-
-        tool_definition_code = "\n".join([f"import {module}" for module in BASE_BUILTIN_MODULES])
-        tool_definition_code += textwrap.dedent(
-            """
-        class Tool:
-            def __call__(self, *args, **kwargs):
-                return self.forward(*args, **kwargs)
-
-            def forward(self, *args, **kwargs):
-                pass # to be implemented in child class
-        """
-        )
-        tool_definition_code += "\n\n".join(tool_codes)
-
-        tool_definition_execution = self.run_code_raise_errors(tool_definition_code)
-        self.logger.log(tool_definition_execution.logs)
-
-    def run_code_raise_errors(self, code: str):
-        if self.final_answer_pattern.search(code) is not None:
-            self.final_answer = True
-        execution = self.sbx.run_code(
-            code,
-        )
-        if execution.error:
-            execution_logs = "\n".join([str(log) for log in execution.logs.stdout])
-            logs = execution_logs
-            logs += "Executing code yielded an error:"
-            logs += execution.error.name
-            logs += execution.error.value
-            logs += execution.error.traceback
-            raise ValueError(logs)
-        return execution
-
-    def __call__(self, code_action: str, additional_args: dict) -> Tuple[Any, Any]:
-        if len(additional_args) > 0:
-            # Pickle additional_args to server
-            import tempfile
-
-            with tempfile.NamedTemporaryFile() as f:
-                pickle.dump(additional_args, f)
-                f.flush()
-                with open(f.name, "rb") as file:
-                    self.sbx.files.write("/home/state.pkl", file)
-            remote_unloading_code = """import pickle
-import os
-print("File path", os.path.getsize('/home/state.pkl'))
-with open('/home/state.pkl', 'rb') as f:
-    pickle_dict = pickle.load(f)
-locals().update({key: value for key, value in pickle_dict.items()})
-"""
-            execution = self.run_code_raise_errors(remote_unloading_code)
-            execution_logs = "\n".join([str(log) for log in execution.logs.stdout])
-            self.logger.log(execution_logs, 1)
-
-        execution = self.run_code_raise_errors(code_action)
-        execution_logs = "\n".join([str(log) for log in execution.logs.stdout])
-        if not execution.results:
-            return None, execution_logs, self.final_answer
-        else:
-            for result in execution.results:
-                if result.is_main_result:
-                    for attribute_name in ["jpeg", "png"]:
-                        if getattr(result, attribute_name) is not None:
-                            image_output = getattr(result, attribute_name)
-                            decoded_bytes = base64.b64decode(image_output.encode("utf-8"))
-                            return Image.open(BytesIO(decoded_bytes)), execution_logs, self.final_answer
-                    for attribute_name in [
-                        "chart",
-                        "data",
-                        "html",
-                        "javascript",
-                        "json",
-                        "latex",
-                        "markdown",
-                        "pdf",
-                        "svg",
-                        "text",
-                    ]:
-                        if getattr(result, attribute_name) is not None:
-                            return getattr(result, attribute_name), execution_logs, self.final_answer
-            if self.final_answer:
-                raise ValueError("No main result returned by executor!")
-            return None, execution_logs, False
-
-
-__all__ = ["E2BExecutor"]
diff --git a/src/smolagents/gradio_ui.py b/src/smolagents/gradio_ui.py
index 11094a52c..83fbaff3d 100644
--- a/src/smolagents/gradio_ui.py
+++ b/src/smolagents/gradio_ui.py
@@ -16,23 +16,39 @@
 import os
 import re
 import shutil
-from typing import Optional
 
-from smolagents.agent_types import AgentAudio, AgentImage, AgentText, handle_agent_output_types
-from smolagents.agents import ActionStep, MultiStepAgent
-from smolagents.memory import MemoryStep
+from smolagents.agent_types import AgentAudio, AgentImage, AgentText
+from smolagents.agents import MultiStepAgent, PlanningStep
+from smolagents.memory import ActionStep, FinalAnswerStep, MemoryStep
 from smolagents.utils import _is_package_available
 
 
+def get_step_footnote_content(step_log: MemoryStep, step_name: str) -> str:
+    """Get a footnote string for a step log with duration and token information"""
+    step_footnote = f"**{step_name}**"
+    if hasattr(step_log, "input_token_count") and hasattr(step_log, "output_token_count"):
+        token_str = f" | Input tokens:{step_log.input_token_count:,} | Output tokens: {step_log.output_token_count:,}"
+        step_footnote += token_str
+    if hasattr(step_log, "duration"):
+        step_duration = f" | Duration: {round(float(step_log.duration), 2)}" if step_log.duration else None
+        step_footnote += step_duration
+    step_footnote_content = f"""<span style="color: #bbbbc2; font-size: 12px;">{step_footnote}</span> """
+    return step_footnote_content
+
+
 def pull_messages_from_step(
     step_log: MemoryStep,
 ):
     """Extract ChatMessage objects from agent steps with proper nesting"""
+    if not _is_package_available("gradio"):
+        raise ModuleNotFoundError(
+            "Please install 'gradio' extra to use the GradioUI: `pip install 'smolagents[gradio]'`"
+        )
     import gradio as gr
 
     if isinstance(step_log, ActionStep):
         # Output the step number
-        step_number = f"Step {step_log.step_number}" if step_log.step_number is not None else ""
+        step_number = f"Step {step_log.step_number}" if step_log.step_number is not None else "Step"
         yield gr.ChatMessage(role="assistant", content=f"**{step_number}**")
 
         # First yield the thought/reasoning from the LLM
@@ -74,76 +90,98 @@ def pull_messages_from_step(
                 metadata={
                     "title": f"🛠️ Used tool {first_tool_call.name}",
                     "id": parent_id,
-                    "status": "pending",
+                    "status": "done",
                 },
             )
             yield parent_message_tool
 
-            # Nesting execution logs under the tool call if they exist
-            if hasattr(step_log, "observations") and (
-                step_log.observations is not None and step_log.observations.strip()
-            ):  # Only yield execution logs if there's actual content
-                log_content = step_log.observations.strip()
-                if log_content:
-                    log_content = re.sub(r"^Execution logs:\s*", "", log_content)
-                    yield gr.ChatMessage(
-                        role="assistant",
-                        content=f"{log_content}",
-                        metadata={"title": "📝 Execution Logs", "parent_id": parent_id, "status": "done"},
-                    )
-
-            # Nesting any errors under the tool call
-            if hasattr(step_log, "error") and step_log.error is not None:
+        # Display execution logs if they exist
+        if hasattr(step_log, "observations") and (
+            step_log.observations is not None and step_log.observations.strip()
+        ):  # Only yield execution logs if there's actual content
+            log_content = step_log.observations.strip()
+            if log_content:
+                log_content = re.sub(r"^Execution logs:\s*", "", log_content)
                 yield gr.ChatMessage(
                     role="assistant",
-                    content=str(step_log.error),
-                    metadata={"title": "💥 Error", "parent_id": parent_id, "status": "done"},
+                    content=f"```bash\n{log_content}\n",
+                    metadata={"title": "📝 Execution Logs", "status": "done"},
                 )
 
-            # Update parent message metadata to done status without yielding a new message
-            parent_message_tool.metadata["status"] = "done"
+        # Display any errors
+        if hasattr(step_log, "error") and step_log.error is not None:
+            yield gr.ChatMessage(
+                role="assistant",
+                content=str(step_log.error),
+                metadata={"title": "💥 Error", "status": "done"},
+            )
+
+        # Update parent message metadata to done status without yielding a new message
+        if getattr(step_log, "observations_images", []):
+            for image in step_log.observations_images:
+                path_image = AgentImage(image).to_string()
+                yield gr.ChatMessage(
+                    role="assistant",
+                    content={"path": path_image, "mime_type": f"image/{path_image.split('.')[-1]}"},
+                    metadata={"title": "🖼️ Output Image", "status": "done"},
+                )
 
         # Handle standalone errors but not from tool calls
-        elif hasattr(step_log, "error") and step_log.error is not None:
+        if hasattr(step_log, "error") and step_log.error is not None:
             yield gr.ChatMessage(role="assistant", content=str(step_log.error), metadata={"title": "💥 Error"})
 
-        # Calculate duration and token information
-        step_footnote = f"{step_number}"
-        if hasattr(step_log, "input_token_count") and hasattr(step_log, "output_token_count"):
-            token_str = (
-                f" | Input-tokens:{step_log.input_token_count:,} | Output-tokens:{step_log.output_token_count:,}"
+        yield gr.ChatMessage(role="assistant", content=get_step_footnote_content(step_log, step_number))
+        yield gr.ChatMessage(role="assistant", content="-----", metadata={"status": "done"})
+
+    elif isinstance(step_log, PlanningStep):
+        yield gr.ChatMessage(role="assistant", content="**Planning step**")
+        yield gr.ChatMessage(role="assistant", content=step_log.plan)
+        yield gr.ChatMessage(role="assistant", content=get_step_footnote_content(step_log, "Planning step"))
+        yield gr.ChatMessage(role="assistant", content="-----", metadata={"status": "done"})
+
+    elif isinstance(step_log, FinalAnswerStep):
+        final_answer = step_log.final_answer
+        if isinstance(final_answer, AgentText):
+            yield gr.ChatMessage(
+                role="assistant",
+                content=f"**Final answer:**\n{final_answer.to_string()}\n",
+            )
+        elif isinstance(final_answer, AgentImage):
+            yield gr.ChatMessage(
+                role="assistant",
+                content={"path": final_answer.to_string(), "mime_type": "image/png"},
             )
-            step_footnote += token_str
-        if hasattr(step_log, "duration"):
-            step_duration = f" | Duration: {round(float(step_log.duration), 2)}" if step_log.duration else None
-            step_footnote += step_duration
-        step_footnote = f"""<span style="color: #bbbbc2; font-size: 12px;">{step_footnote}</span> """
-        yield gr.ChatMessage(role="assistant", content=f"{step_footnote}")
-        yield gr.ChatMessage(role="assistant", content="-----")
+        elif isinstance(final_answer, AgentAudio):
+            yield gr.ChatMessage(
+                role="assistant",
+                content={"path": final_answer.to_string(), "mime_type": "audio/wav"},
+            )
+        else:
+            yield gr.ChatMessage(role="assistant", content=f"**Final answer:** {str(final_answer)}")
+
+    else:
+        raise ValueError(f"Unsupported step type: {type(step_log)}")
 
 
 def stream_to_gradio(
     agent,
     task: str,
+    task_images: list | None = None,
     reset_agent_memory: bool = False,
-    additional_args: Optional[dict] = None,
+    additional_args: dict | None = None,
 ):
     """Runs an agent with the given task and streams the messages from the agent as gradio ChatMessages."""
-    if not _is_package_available("gradio"):
-        raise ModuleNotFoundError(
-            "Please install 'gradio' extra to use the GradioUI: `pip install 'smolagents[gradio]'`"
-        )
-    import gradio as gr
-
     total_input_tokens = 0
     total_output_tokens = 0
 
-    for step_log in agent.run(task, stream=True, reset=reset_agent_memory, additional_args=additional_args):
+    for step_log in agent.run(
+        task, images=task_images, stream=True, reset=reset_agent_memory, additional_args=additional_args
+    ):
         # Track tokens if model provides them
         if getattr(agent.model, "last_input_token_count", None) is not None:
             total_input_tokens += agent.model.last_input_token_count
             total_output_tokens += agent.model.last_output_token_count
-            if isinstance(step_log, ActionStep):
+            if isinstance(step_log, (ActionStep, PlanningStep)):
                 step_log.input_token_count = agent.model.last_input_token_count
                 step_log.output_token_count = agent.model.last_output_token_count
 
@@ -152,27 +190,6 @@ def stream_to_gradio(
         ):
             yield message
 
-    final_answer = step_log  # Last log is the run's final_answer
-    final_answer = handle_agent_output_types(final_answer)
-
-    if isinstance(final_answer, AgentText):
-        yield gr.ChatMessage(
-            role="assistant",
-            content=f"**Final answer:**\n{final_answer.to_string()}\n",
-        )
-    elif isinstance(final_answer, AgentImage):
-        yield gr.ChatMessage(
-            role="assistant",
-            content={"path": final_answer.to_string(), "mime_type": "image/png"},
-        )
-    elif isinstance(final_answer, AgentAudio):
-        yield gr.ChatMessage(
-            role="assistant",
-            content={"path": final_answer.to_string(), "mime_type": "audio/wav"},
-        )
-    else:
-        yield gr.ChatMessage(role="assistant", content=f"**Final answer:** {str(final_answer)}")
-
 
 class GradioUI:
     """A one-line interface to launch your agent in Gradio"""
@@ -184,19 +201,32 @@ def __init__(self, agent: MultiStepAgent, file_upload_folder: str | None = None)
             )
         self.agent = agent
         self.file_upload_folder = file_upload_folder
+        self.name = getattr(agent, "name") or "Agent interface"
+        self.description = getattr(agent, "description", None)
         if self.file_upload_folder is not None:
             if not os.path.exists(file_upload_folder):
                 os.mkdir(file_upload_folder)
 
-    def interact_with_agent(self, prompt, messages):
+    def interact_with_agent(self, prompt, messages, session_state):
         import gradio as gr
 
-        messages.append(gr.ChatMessage(role="user", content=prompt))
-        yield messages
-        for msg in stream_to_gradio(self.agent, task=prompt, reset_agent_memory=False):
-            messages.append(msg)
+        # Get the agent type from the template agent
+        if "agent" not in session_state:
+            session_state["agent"] = self.agent
+
+        try:
+            messages.append(gr.ChatMessage(role="user", content=prompt))
+            yield messages
+
+            for msg in stream_to_gradio(session_state["agent"], task=prompt, reset_agent_memory=False):
+                messages.append(msg)
+                yield messages
+
+            yield messages
+        except Exception as e:
+            print(f"Error in interaction: {str(e)}")
+            messages.append(gr.ChatMessage(role="assistant", content=f"Error: {str(e)}"))
             yield messages
-        yield messages
 
     def upload_file(self, file, file_uploads_log, allowed_file_types=None):
         """
@@ -227,6 +257,8 @@ def upload_file(self, file, file_uploads_log, allowed_file_types=None):
         return gr.Textbox(f"File uploaded: {file_path}", visible=True), file_uploads_log + [file_path]
 
     def log_user_message(self, text_input, file_uploads_log):
+        import gradio as gr
+
         return (
             text_input
             + (
@@ -235,14 +267,56 @@ def log_user_message(self, text_input, file_uploads_log):
                 else ""
             ),
             "",
+            gr.Button(interactive=False),
         )
 
-    def launch(self, share: bool = False, **kwargs):
+    def launch(self, share: bool = True, **kwargs):
+        self.create_app().launch(debug=True, share=share, **kwargs)
+
+    def create_app(self):
         import gradio as gr
 
-        with gr.Blocks(fill_height=True) as demo:
+        with gr.Blocks(theme="ocean", fill_height=True) as demo:
+            # Add session state to store session-specific data
+            session_state = gr.State({})
             stored_messages = gr.State([])
             file_uploads_log = gr.State([])
+
+            with gr.Sidebar():
+                gr.Markdown(
+                    f"# {self.name.replace('_', ' ').capitalize()}"
+                    "\n> This web ui allows you to interact with a `smolagents` agent that can use tools and execute steps to complete tasks."
+                    + (f"\n\n**Agent description:**\n{self.description}" if self.description else "")
+                )
+
+                with gr.Group():
+                    gr.Markdown("**Your request**", container=True)
+                    text_input = gr.Textbox(
+                        lines=3,
+                        label="Chat Message",
+                        container=False,
+                        placeholder="Enter your prompt here and press Shift+Enter or press the button",
+                    )
+                    submit_btn = gr.Button("Submit", variant="primary")
+
+                # If an upload folder is provided, enable the upload feature
+                if self.file_upload_folder is not None:
+                    upload_file = gr.File(label="Upload a file")
+                    upload_status = gr.Textbox(label="Upload Status", interactive=False, visible=False)
+                    upload_file.change(
+                        self.upload_file,
+                        [upload_file, file_uploads_log],
+                        [upload_status, file_uploads_log],
+                    )
+
+                gr.HTML("<br><br><h4><center>Powered by:</center></h4>")
+                with gr.Row():
+                    gr.HTML("""<div style="display: flex; align-items: center; gap: 8px; font-family: system-ui, -apple-system, sans-serif;">
+            <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/mascot_smol.png" style="width: 32px; height: 32px; object-fit: contain;" alt="logo">
+            <a target="_blank" href="https://github.com/huggingface/smolagents"><b>huggingface/smolagents</b></a>
+            </div>""")
+
+            # Main chat interface
             chatbot = gr.Chatbot(
                 label="Agent",
                 type="messages",
@@ -253,23 +327,39 @@ def launch(self, share: bool = False, **kwargs):
                 resizeable=True,
                 scale=1,
             )
-            # If an upload folder is provided, enable the upload feature
-            if self.file_upload_folder is not None:
-                upload_file = gr.File(label="Upload a file")
-                upload_status = gr.Textbox(label="Upload Status", interactive=False, visible=False)
-                upload_file.change(
-                    self.upload_file,
-                    [upload_file, file_uploads_log],
-                    [upload_status, file_uploads_log],
-                )
-            text_input = gr.Textbox(lines=1, label="Chat Message")
+
+            # Set up event handlers
             text_input.submit(
                 self.log_user_message,
                 [text_input, file_uploads_log],
-                [stored_messages, text_input],
-            ).then(self.interact_with_agent, [stored_messages, chatbot], [chatbot])
+                [stored_messages, text_input, submit_btn],
+            ).then(self.interact_with_agent, [stored_messages, chatbot, session_state], [chatbot]).then(
+                lambda: (
+                    gr.Textbox(
+                        interactive=True, placeholder="Enter your prompt here and press Shift+Enter or the button"
+                    ),
+                    gr.Button(interactive=True),
+                ),
+                None,
+                [text_input, submit_btn],
+            )
+
+            submit_btn.click(
+                self.log_user_message,
+                [text_input, file_uploads_log],
+                [stored_messages, text_input, submit_btn],
+            ).then(self.interact_with_agent, [stored_messages, chatbot, session_state], [chatbot]).then(
+                lambda: (
+                    gr.Textbox(
+                        interactive=True, placeholder="Enter your prompt here and press Shift+Enter or the button"
+                    ),
+                    gr.Button(interactive=True),
+                ),
+                None,
+                [text_input, submit_btn],
+            )
 
-        demo.launch(debug=True, share=share, **kwargs)
+        return demo
 
 
 __all__ = ["stream_to_gradio", "GradioUI"]
diff --git a/src/smolagents/local_python_executor.py b/src/smolagents/local_python_executor.py
index a48e1e11e..0bfa53b7f 100644
--- a/src/smolagents/local_python_executor.py
+++ b/src/smolagents/local_python_executor.py
@@ -21,14 +21,13 @@
 import logging
 import math
 import re
-from collections.abc import Mapping
+from collections.abc import Callable, Mapping
+from functools import wraps
 from importlib import import_module
-from types import ModuleType
-from typing import Any, Callable, Dict, List, Optional, Tuple
-
-import numpy as np
-import pandas as pd
+from types import BuiltinFunctionType, FunctionType, ModuleType
+from typing import Any
 
+from .tools import Tool
 from .utils import BASE_BUILTIN_MODULES, truncate_content
 
 
@@ -59,6 +58,12 @@ def custom_print(*args):
     return None
 
 
+def nodunder_getattr(obj, name, default=None):
+    if name.startswith("__") and name.endswith("__"):
+        raise InterpreterError(f"Forbidden access to dunder attribute: {name}")
+    return getattr(obj, name, default)
+
+
 BASE_PYTHON_TOOLS = {
     "print": custom_print,
     "isinstance": isinstance,
@@ -106,7 +111,7 @@ def custom_print(*args):
     "iter": iter,
     "divmod": divmod,
     "callable": callable,
-    "getattr": getattr,
+    "getattr": nodunder_getattr,
     "hasattr": hasattr,
     "setattr": setattr,
     "issubclass": issubclass,
@@ -114,25 +119,31 @@ def custom_print(*args):
     "complex": complex,
 }
 
-DANGEROUS_PATTERNS = (
-    "_os",
+# Non-exhaustive list of dangerous modules that should not be imported
+DANGEROUS_MODULES = [
+    "builtins",
+    "io",
+    "multiprocessing",
     "os",
-    "subprocess",
-    "_subprocess",
+    "pathlib",
     "pty",
-    "system",
-    "popen",
-    "spawn",
     "shutil",
-    "sys",
-    "pathlib",
-    "io",
     "socket",
-    "compile",
-    "eval",
-    "exec",
-    "multiprocessing",
-)
+    "subprocess",
+    "sys",
+]
+
+DANGEROUS_FUNCTIONS = [
+    "builtins.compile",
+    "builtins.eval",
+    "builtins.exec",
+    "builtins.globals",
+    "builtins.locals",
+    "builtins.__import__",
+    "os.popen",
+    "os.system",
+    "posix.system",
+]
 
 
 class PrintContainer:
@@ -211,12 +222,88 @@ def fix_final_answer_code(code: str) -> str:
     return code
 
 
+def build_import_tree(authorized_imports: list[str]) -> dict[str, Any]:
+    tree = {}
+    for import_path in authorized_imports:
+        parts = import_path.split(".")
+        current = tree
+        for part in parts:
+            if part not in current:
+                current[part] = {}
+            current = current[part]
+    return tree
+
+
+def check_import_authorized(import_to_check: str, authorized_imports: list[str]) -> bool:
+    current_node = build_import_tree(authorized_imports)
+    for part in import_to_check.split("."):
+        if "*" in current_node:
+            return True
+        if part not in current_node:
+            return False
+        current_node = current_node[part]
+    return True
+
+
+def safer_eval(func: Callable):
+    """
+    Decorator to make the evaluation of a function safer by checking its return value.
+
+    Args:
+        func: Function to make safer.
+
+    Returns:
+        Callable: Safer function with return value check.
+    """
+
+    @wraps(func)
+    def _check_return(
+        expression,
+        state,
+        static_tools,
+        custom_tools,
+        authorized_imports=BASE_BUILTIN_MODULES,
+    ):
+        result = func(expression, state, static_tools, custom_tools, authorized_imports=authorized_imports)
+        if isinstance(result, ModuleType):
+            if not check_import_authorized(result.__name__, authorized_imports):
+                raise InterpreterError(f"Forbidden access to module: {result.__name__}")
+        elif isinstance(result, dict) and result.get("__spec__"):
+            if not check_import_authorized(result["__name__"], authorized_imports):
+                raise InterpreterError(f"Forbidden access to module: {result['__name__']}")
+        elif isinstance(result, (FunctionType, BuiltinFunctionType)):
+            for qualified_function_name in DANGEROUS_FUNCTIONS:
+                module_name, function_name = qualified_function_name.rsplit(".", 1)
+                if (
+                    function_name not in static_tools
+                    and result.__name__ == function_name
+                    and result.__module__ == module_name
+                ):
+                    raise InterpreterError(f"Forbidden access to function: {function_name}")
+        return result
+
+    return _check_return
+
+
+def evaluate_attribute(
+    expression: ast.Attribute,
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
+) -> Any:
+    if expression.attr.startswith("__") and expression.attr.endswith("__"):
+        raise InterpreterError(f"Forbidden access to dunder attribute: {expression.attr}")
+    value = evaluate_ast(expression.value, state, static_tools, custom_tools, authorized_imports)
+    return getattr(value, expression.attr)
+
+
 def evaluate_unaryop(
     expression: ast.UnaryOp,
-    state: Dict[str, Any],
-    static_tools: Dict[str, Callable],
-    custom_tools: Dict[str, Callable],
-    authorized_imports: List[str],
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
 ) -> Any:
     operand = evaluate_ast(expression.operand, state, static_tools, custom_tools, authorized_imports)
     if isinstance(expression.op, ast.USub):
@@ -233,10 +320,10 @@ def evaluate_unaryop(
 
 def evaluate_lambda(
     lambda_expression: ast.Lambda,
-    state: Dict[str, Any],
-    static_tools: Dict[str, Callable],
-    custom_tools: Dict[str, Callable],
-    authorized_imports: List[str],
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
 ) -> Callable:
     args = [arg.arg for arg in lambda_expression.args.args]
 
@@ -257,10 +344,10 @@ def lambda_func(*values: Any) -> Any:
 
 def evaluate_while(
     while_loop: ast.While,
-    state: Dict[str, Any],
-    static_tools: Dict[str, Callable],
-    custom_tools: Dict[str, Callable],
-    authorized_imports: List[str],
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
 ) -> None:
     iterations = 0
     while evaluate_ast(while_loop.test, state, static_tools, custom_tools, authorized_imports):
@@ -279,11 +366,13 @@ def evaluate_while(
 
 def create_function(
     func_def: ast.FunctionDef,
-    state: Dict[str, Any],
-    static_tools: Dict[str, Callable],
-    custom_tools: Dict[str, Callable],
-    authorized_imports: List[str],
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
 ) -> Callable:
+    source_code = ast.unparse(func_def)
+
     def new_func(*args: Any, **kwargs: Any) -> Any:
         func_state = state.copy()
         arg_names = [arg.arg for arg in func_def.args.args]
@@ -334,15 +423,20 @@ def new_func(*args: Any, **kwargs: Any) -> Any:
 
         return result
 
+    # Store original AST, source code, and name
+    new_func.__ast__ = func_def
+    new_func.__source__ = source_code
+    new_func.__name__ = func_def.name
+
     return new_func
 
 
 def evaluate_function_def(
     func_def: ast.FunctionDef,
-    state: Dict[str, Any],
-    static_tools: Dict[str, Callable],
-    custom_tools: Dict[str, Callable],
-    authorized_imports: List[str],
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
 ) -> Callable:
     custom_tools[func_def.name] = create_function(func_def, state, static_tools, custom_tools, authorized_imports)
     return custom_tools[func_def.name]
@@ -350,10 +444,10 @@ def evaluate_function_def(
 
 def evaluate_class_def(
     class_def: ast.ClassDef,
-    state: Dict[str, Any],
-    static_tools: Dict[str, Callable],
-    custom_tools: Dict[str, Callable],
-    authorized_imports: List[str],
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
 ) -> type:
     class_name = class_def.name
     bases = [evaluate_ast(base, state, static_tools, custom_tools, authorized_imports) for base in class_def.bases]
@@ -361,7 +455,7 @@ def evaluate_class_def(
 
     for stmt in class_def.body:
         if isinstance(stmt, ast.FunctionDef):
-            class_dict[stmt.name] = evaluate_function_def(stmt, state, static_tools, custom_tools, authorized_imports)
+            class_dict[stmt.name] = evaluate_ast(stmt, state, static_tools, custom_tools, authorized_imports)
         elif isinstance(stmt, ast.Assign):
             for target in stmt.targets:
                 if isinstance(target, ast.Name):
@@ -380,6 +474,14 @@ def evaluate_class_def(
                         custom_tools,
                         authorized_imports,
                     )
+        elif (
+            isinstance(stmt, ast.Expr)
+            and stmt == class_def.body[0]
+            and isinstance(stmt.value, ast.Constant)
+            and isinstance(stmt.value.value, str)
+        ):
+            # Check if it is a docstring: first statement in class body which is a string literal expression
+            class_dict["__doc__"] = stmt.value.value
         else:
             raise InterpreterError(f"Unsupported statement in class body: {stmt.__class__.__name__}")
 
@@ -388,12 +490,29 @@ def evaluate_class_def(
     return new_class
 
 
+def evaluate_annassign(
+    annassign: ast.AnnAssign,
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
+) -> Any:
+    # If there's a value to assign, evaluate it
+    if annassign.value:
+        value = evaluate_ast(annassign.value, state, static_tools, custom_tools, authorized_imports)
+        # Set the value for the target
+        set_value(annassign.target, value, state, static_tools, custom_tools, authorized_imports)
+        return value
+    # For declarations without values (x: int), just return None
+    return None
+
+
 def evaluate_augassign(
     expression: ast.AugAssign,
-    state: Dict[str, Any],
-    static_tools: Dict[str, Callable],
-    custom_tools: Dict[str, Callable],
-    authorized_imports: List[str],
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
 ) -> Any:
     def get_current_value(target: ast.AST) -> Any:
         if isinstance(target, ast.Name):
@@ -462,29 +581,30 @@ def get_current_value(target: ast.AST) -> Any:
 
 def evaluate_boolop(
     node: ast.BoolOp,
-    state: Dict[str, Any],
-    static_tools: Dict[str, Callable],
-    custom_tools: Dict[str, Callable],
-    authorized_imports: List[str],
-) -> bool:
-    if isinstance(node.op, ast.And):
-        for value in node.values:
-            if not evaluate_ast(value, state, static_tools, custom_tools, authorized_imports):
-                return False
-        return True
-    elif isinstance(node.op, ast.Or):
-        for value in node.values:
-            if evaluate_ast(value, state, static_tools, custom_tools, authorized_imports):
-                return True
-        return False
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
+) -> Any:
+    # Determine which value should trigger short-circuit based on operation type:
+    # - 'and' returns the first falsy value encountered (or the last value if all are truthy)
+    # - 'or' returns the first truthy value encountered (or the last value if all are falsy)
+    is_short_circuit_value = (lambda x: not x) if isinstance(node.op, ast.And) else (lambda x: bool(x))
+    for value in node.values:
+        result = evaluate_ast(value, state, static_tools, custom_tools, authorized_imports)
+        # Short-circuit: return immediately if the condition is met
+        if is_short_circuit_value(result):
+            return result
+    # If no short-circuit occurred, return the last evaluated value
+    return result
 
 
 def evaluate_binop(
     binop: ast.BinOp,
-    state: Dict[str, Any],
-    static_tools: Dict[str, Callable],
-    custom_tools: Dict[str, Callable],
-    authorized_imports: List[str],
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
 ) -> Any:
     # Recursively evaluate the left and right operands
     left_val = evaluate_ast(binop.left, state, static_tools, custom_tools, authorized_imports)
@@ -521,24 +641,23 @@ def evaluate_binop(
 
 def evaluate_assign(
     assign: ast.Assign,
-    state: Dict[str, Any],
-    static_tools: Dict[str, Callable],
-    custom_tools: Dict[str, Callable],
-    authorized_imports: List[str],
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
 ) -> Any:
     result = evaluate_ast(assign.value, state, static_tools, custom_tools, authorized_imports)
     if len(assign.targets) == 1:
         target = assign.targets[0]
         set_value(target, result, state, static_tools, custom_tools, authorized_imports)
     else:
-        if len(assign.targets) != len(result):
-            raise InterpreterError(f"Assign failed: expected {len(result)} values but got {len(assign.targets)}.")
         expanded_values = []
         for tgt in assign.targets:
             if isinstance(tgt, ast.Starred):
                 expanded_values.extend(result)
             else:
                 expanded_values.append(result)
+
         for tgt, val in zip(assign.targets, expanded_values):
             set_value(tgt, val, state, static_tools, custom_tools, authorized_imports)
     return result
@@ -547,10 +666,10 @@ def evaluate_assign(
 def set_value(
     target: ast.AST,
     value: Any,
-    state: Dict[str, Any],
-    static_tools: Dict[str, Callable],
-    custom_tools: Dict[str, Callable],
-    authorized_imports: List[str],
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
 ) -> None:
     if isinstance(target, ast.Name):
         if target.id in static_tools:
@@ -577,22 +696,26 @@ def set_value(
 
 def evaluate_call(
     call: ast.Call,
-    state: Dict[str, Any],
-    static_tools: Dict[str, Callable],
-    custom_tools: Dict[str, Callable],
-    authorized_imports: List[str],
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
 ) -> Any:
-    if not (
-        isinstance(call.func, ast.Attribute) or isinstance(call.func, ast.Name) or isinstance(call.func, ast.Subscript)
-    ):
+    if not isinstance(call.func, (ast.Call, ast.Lambda, ast.Attribute, ast.Name, ast.Subscript)):
         raise InterpreterError(f"This is not a correct function: {call.func}).")
-    if isinstance(call.func, ast.Attribute):
+
+    func, func_name = None, None
+
+    if isinstance(call.func, ast.Call):
+        func = evaluate_ast(call.func, state, static_tools, custom_tools, authorized_imports)
+    elif isinstance(call.func, ast.Lambda):
+        func = evaluate_ast(call.func, state, static_tools, custom_tools, authorized_imports)
+    elif isinstance(call.func, ast.Attribute):
         obj = evaluate_ast(call.func.value, state, static_tools, custom_tools, authorized_imports)
         func_name = call.func.attr
         if not hasattr(obj, func_name):
             raise InterpreterError(f"Object {obj} has no attribute {func_name}")
         func = getattr(obj, func_name)
-
     elif isinstance(call.func, ast.Name):
         func_name = call.func.id
         if func_name in state:
@@ -605,20 +728,14 @@ def evaluate_call(
             func = ERRORS[func_name]
         else:
             raise InterpreterError(
-                f"It is not permitted to evaluate other functions than the provided tools or functions defined/imported in previous code (tried to execute {call.func.id})."
+                f"Forbidden function evaluation: '{call.func.id}' is not among the explicitly allowed tools or defined/imported in the preceding code"
             )
-
     elif isinstance(call.func, ast.Subscript):
-        value = evaluate_ast(call.func.value, state, static_tools, custom_tools, authorized_imports)
-        index = evaluate_ast(call.func.slice, state, static_tools, custom_tools, authorized_imports)
-        if isinstance(value, (list, tuple)):
-            func = value[index]
-        else:
-            raise InterpreterError(f"Cannot subscript object of type {type(value).__name__}")
-
+        func = evaluate_ast(call.func, state, static_tools, custom_tools, authorized_imports)
         if not callable(func):
             raise InterpreterError(f"This is not a correct function: {call.func}).")
         func_name = None
+
     args = []
     for arg in call.args:
         if isinstance(arg, ast.Starred):
@@ -647,71 +764,43 @@ def evaluate_call(
             return super(cls, instance)
         else:
             raise InterpreterError("super() takes at most 2 arguments")
-    else:
-        if func_name == "print":
-            state["_print_outputs"] += " ".join(map(str, args)) + "\n"
-            return None
-        else:  # Assume it's a callable object
-            if (
-                (inspect.getmodule(func) == builtins)
-                and inspect.isbuiltin(func)
-                and (func not in static_tools.values())
-            ):
-                raise InterpreterError(
-                    f"Invoking a builtin function that has not been explicitly added as a tool is not allowed ({func_name})."
-                )
-            return func(*args, **kwargs)
+    elif func_name == "print":
+        state["_print_outputs"] += " ".join(map(str, args)) + "\n"
+        return None
+    else:  # Assume it's a callable object
+        if (inspect.getmodule(func) == builtins) and inspect.isbuiltin(func) and (func not in static_tools.values()):
+            raise InterpreterError(
+                f"Invoking a builtin function that has not been explicitly added as a tool is not allowed ({func_name})."
+            )
+        return func(*args, **kwargs)
 
 
 def evaluate_subscript(
     subscript: ast.Subscript,
-    state: Dict[str, Any],
-    static_tools: Dict[str, Callable],
-    custom_tools: Dict[str, Callable],
-    authorized_imports: List[str],
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
 ) -> Any:
     index = evaluate_ast(subscript.slice, state, static_tools, custom_tools, authorized_imports)
     value = evaluate_ast(subscript.value, state, static_tools, custom_tools, authorized_imports)
-
-    if isinstance(value, str) and isinstance(index, str):
-        raise InterpreterError("You're trying to subscript a string with a string index, which is impossible")
-    if isinstance(value, pd.core.indexing._LocIndexer):
-        parent_object = value.obj
-        return parent_object.loc[index]
-    if isinstance(value, pd.core.indexing._iLocIndexer):
-        parent_object = value.obj
-        return parent_object.iloc[index]
-    if isinstance(value, (pd.DataFrame, pd.Series, np.ndarray)):
-        return value[index]
-    elif isinstance(value, pd.core.groupby.generic.DataFrameGroupBy):
-        return value[index]
-    elif isinstance(index, slice):
-        return value[index]
-    elif isinstance(value, (list, tuple)):
-        if not (-len(value) <= index < len(value)):
-            raise InterpreterError(f"Index {index} out of bounds for list of length {len(value)}")
-        return value[int(index)]
-    elif isinstance(value, str):
-        if not (-len(value) <= index < len(value)):
-            raise InterpreterError(f"Index {index} out of bounds for string of length {len(value)}")
-        return value[index]
-    elif index in value:
+    try:
         return value[index]
-    else:
-        error_message = f"Could not index {value} with '{index}'."
+    except (KeyError, IndexError, TypeError) as e:
+        error_message = f"Could not index {value} with '{index}': {type(e).__name__}: {e}"
         if isinstance(index, str) and isinstance(value, Mapping):
             close_matches = difflib.get_close_matches(index, list(value.keys()))
             if len(close_matches) > 0:
-                error_message += f" Maybe you meant one of these indexes instead: {str(close_matches)}"
-        raise InterpreterError(error_message)
+                error_message += f". Maybe you meant one of these indexes instead: {str(close_matches)}"
+        raise InterpreterError(error_message) from e
 
 
 def evaluate_name(
     name: ast.Name,
-    state: Dict[str, Any],
-    static_tools: Dict[str, Callable],
-    custom_tools: Dict[str, Callable],
-    authorized_imports: List[str],
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
 ) -> Any:
     if name.id in state:
         return state[name.id]
@@ -729,10 +818,10 @@ def evaluate_name(
 
 def evaluate_condition(
     condition: ast.Compare,
-    state: Dict[str, Any],
-    static_tools: Dict[str, Callable],
-    custom_tools: Dict[str, Callable],
-    authorized_imports: List[str],
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
 ) -> bool | object:
     result = True
     left = evaluate_ast(condition.left, state, static_tools, custom_tools, authorized_imports)
@@ -771,10 +860,10 @@ def evaluate_condition(
 
 def evaluate_if(
     if_statement: ast.If,
-    state: Dict[str, Any],
-    static_tools: Dict[str, Callable],
-    custom_tools: Dict[str, Callable],
-    authorized_imports: List[str],
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
 ) -> Any:
     result = None
     test_result = evaluate_ast(if_statement.test, state, static_tools, custom_tools, authorized_imports)
@@ -793,10 +882,10 @@ def evaluate_if(
 
 def evaluate_for(
     for_loop: ast.For,
-    state: Dict[str, Any],
-    static_tools: Dict[str, Callable],
-    custom_tools: Dict[str, Callable],
-    authorized_imports: List[str],
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
 ) -> Any:
     result = None
     iterator = evaluate_ast(for_loop.iter, state, static_tools, custom_tools, authorized_imports)
@@ -826,12 +915,12 @@ def evaluate_for(
 
 def evaluate_listcomp(
     listcomp: ast.ListComp,
-    state: Dict[str, Any],
-    static_tools: Dict[str, Callable],
-    custom_tools: Dict[str, Callable],
-    authorized_imports: List[str],
-) -> List[Any]:
-    def inner_evaluate(generators: List[ast.comprehension], index: int, current_state: Dict[str, Any]) -> List[Any]:
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
+) -> list[Any]:
+    def inner_evaluate(generators: list[ast.comprehension], index: int, current_state: dict[str, Any]) -> list[Any]:
         if index >= len(generators):
             return [
                 evaluate_ast(
@@ -868,12 +957,47 @@ def inner_evaluate(generators: List[ast.comprehension], index: int, current_stat
     return inner_evaluate(listcomp.generators, 0, state)
 
 
+def evaluate_setcomp(
+    setcomp: ast.SetComp,
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
+) -> set[Any]:
+    result = set()
+    for gen in setcomp.generators:
+        iter_value = evaluate_ast(gen.iter, state, static_tools, custom_tools, authorized_imports)
+        for value in iter_value:
+            new_state = state.copy()
+            set_value(
+                gen.target,
+                value,
+                new_state,
+                static_tools,
+                custom_tools,
+                authorized_imports,
+            )
+            if all(
+                evaluate_ast(if_clause, new_state, static_tools, custom_tools, authorized_imports)
+                for if_clause in gen.ifs
+            ):
+                element = evaluate_ast(
+                    setcomp.elt,
+                    new_state,
+                    static_tools,
+                    custom_tools,
+                    authorized_imports,
+                )
+                result.add(element)
+    return result
+
+
 def evaluate_try(
     try_node: ast.Try,
-    state: Dict[str, Any],
-    static_tools: Dict[str, Callable],
-    custom_tools: Dict[str, Callable],
-    authorized_imports: List[str],
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
 ) -> None:
     try:
         for stmt in try_node.body:
@@ -905,10 +1029,10 @@ def evaluate_try(
 
 def evaluate_raise(
     raise_node: ast.Raise,
-    state: Dict[str, Any],
-    static_tools: Dict[str, Callable],
-    custom_tools: Dict[str, Callable],
-    authorized_imports: List[str],
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
 ) -> None:
     if raise_node.exc is not None:
         exc = evaluate_ast(raise_node.exc, state, static_tools, custom_tools, authorized_imports)
@@ -929,10 +1053,10 @@ def evaluate_raise(
 
 def evaluate_assert(
     assert_node: ast.Assert,
-    state: Dict[str, Any],
-    static_tools: Dict[str, Callable],
-    custom_tools: Dict[str, Callable],
-    authorized_imports: List[str],
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
 ) -> None:
     test_result = evaluate_ast(assert_node.test, state, static_tools, custom_tools, authorized_imports)
     if not test_result:
@@ -947,10 +1071,10 @@ def evaluate_assert(
 
 def evaluate_with(
     with_node: ast.With,
-    state: Dict[str, Any],
-    static_tools: Dict[str, Callable],
-    custom_tools: Dict[str, Callable],
-    authorized_imports: List[str],
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
 ) -> None:
     contexts = []
     for item in with_node.items:
@@ -995,18 +1119,9 @@ def get_safe_module(raw_module, authorized_imports, visited=None):
 
     # Copy all attributes by reference, recursively checking modules
     for attr_name in dir(raw_module):
-        # Skip dangerous patterns at any level
-        if any(
-            pattern in raw_module.__name__.split(".") + [attr_name]
-            and not check_module_authorized(pattern, authorized_imports)
-            for pattern in DANGEROUS_PATTERNS
-        ):
-            logger.info(f"Skipping dangerous attribute {raw_module.__name__}.{attr_name}")
-            continue
-
         try:
             attr_value = getattr(raw_module, attr_name)
-        except ImportError as e:
+        except (ImportError, AttributeError) as e:
             # lazy / dynamic loading module -> INFO log and skip
             logger.info(
                 f"Skipping import error while copying {raw_module.__name__}.{attr_name}: {type(e).__name__} - {e}"
@@ -1021,22 +1136,10 @@ def get_safe_module(raw_module, authorized_imports, visited=None):
     return safe_module
 
 
-def check_module_authorized(module_name, authorized_imports):
-    if "*" in authorized_imports:
-        return True
-    else:
-        module_path = module_name.split(".")
-        if any([module in DANGEROUS_PATTERNS and module not in authorized_imports for module in module_path]):
-            return False
-        # ["A", "B", "C"] -> ["A", "A.B", "A.B.C"]
-        module_subpaths = [".".join(module_path[:i]) for i in range(1, len(module_path) + 1)]
-        return any(subpath in authorized_imports for subpath in module_subpaths)
-
-
-def import_modules(expression, state, authorized_imports):
+def evaluate_import(expression, state, authorized_imports):
     if isinstance(expression, ast.Import):
         for alias in expression.names:
-            if check_module_authorized(alias.name, authorized_imports):
+            if check_import_authorized(alias.name, authorized_imports):
                 raw_module = import_module(alias.name)
                 state[alias.asname or alias.name] = get_safe_module(raw_module, authorized_imports)
             else:
@@ -1045,7 +1148,7 @@ def import_modules(expression, state, authorized_imports):
                 )
         return None
     elif isinstance(expression, ast.ImportFrom):
-        if check_module_authorized(expression.module, authorized_imports):
+        if check_import_authorized(expression.module, authorized_imports):
             raw_module = __import__(expression.module, fromlist=[alias.name for alias in expression.names])
             module = get_safe_module(raw_module, authorized_imports)
             if expression.names[0].name == "*":  # Handle "from module import *"
@@ -1071,11 +1174,11 @@ def import_modules(expression, state, authorized_imports):
 
 def evaluate_dictcomp(
     dictcomp: ast.DictComp,
-    state: Dict[str, Any],
-    static_tools: Dict[str, Callable],
-    custom_tools: Dict[str, Callable],
-    authorized_imports: List[str],
-) -> Dict[Any, Any]:
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
+) -> dict[Any, Any]:
     result = {}
     for gen in dictcomp.generators:
         iter_value = evaluate_ast(gen.iter, state, static_tools, custom_tools, authorized_imports)
@@ -1113,10 +1216,10 @@ def evaluate_dictcomp(
 
 def evaluate_delete(
     delete_node: ast.Delete,
-    state: Dict[str, Any],
-    static_tools: Dict[str, Callable],
-    custom_tools: Dict[str, Callable],
-    authorized_imports: List[str],
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
 ) -> None:
     """
     Evaluate a delete statement (del x, del x[y]).
@@ -1147,12 +1250,13 @@ def evaluate_delete(
             raise InterpreterError(f"Deletion of {type(target).__name__} targets is not supported")
 
 
+@safer_eval
 def evaluate_ast(
     expression: ast.AST,
-    state: Dict[str, Any],
-    static_tools: Dict[str, Callable],
-    custom_tools: Dict[str, Callable],
-    authorized_imports: List[str] = BASE_BUILTIN_MODULES,
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str] = BASE_BUILTIN_MODULES,
 ):
     """
     Evaluate an abstract syntax tree using the content of the variables stored in a state and only evaluating a given
@@ -1169,21 +1273,23 @@ def evaluate_ast(
         static_tools (`Dict[str, Callable]`):
             Functions that may be called during the evaluation. Trying to change one of these static_tools will raise an error.
         custom_tools (`Dict[str, Callable]`):
-            Functions that may be called during the evaluation. These static_tools can be overwritten.
+            Functions that may be called during the evaluation. These custom_tools can be overwritten.
         authorized_imports (`List[str]`):
             The list of modules that can be imported by the code. By default, only a few safe modules are allowed.
             If it contains "*", it will authorize any import. Use this at your own risk!
     """
-    if state.setdefault("_operations_count", 0) >= MAX_OPERATIONS:
+    if state.setdefault("_operations_count", {"counter": 0})["counter"] >= MAX_OPERATIONS:
         raise InterpreterError(
             f"Reached the max number of operations of {MAX_OPERATIONS}. Maybe there is an infinite loop somewhere in the code, or you're just asking too many calculations."
         )
-    state["_operations_count"] += 1
+    state["_operations_count"]["counter"] += 1
     common_params = (state, static_tools, custom_tools, authorized_imports)
     if isinstance(expression, ast.Assign):
         # Assignment -> we evaluate the assignment which should update the state
         # We return the variable assigned as it may be used to determine the final result.
         return evaluate_assign(expression, *common_params)
+    elif isinstance(expression, ast.AnnAssign):
+        return evaluate_annassign(expression, *common_params)
     elif isinstance(expression, ast.AugAssign):
         return evaluate_augassign(expression, *common_params)
     elif isinstance(expression, ast.Call):
@@ -1196,6 +1302,10 @@ def evaluate_ast(
         return tuple((evaluate_ast(elt, *common_params) for elt in expression.elts))
     elif isinstance(expression, (ast.ListComp, ast.GeneratorExp)):
         return evaluate_listcomp(expression, *common_params)
+    elif isinstance(expression, ast.DictComp):
+        return evaluate_dictcomp(expression, *common_params)
+    elif isinstance(expression, ast.SetComp):
+        return evaluate_setcomp(expression, *common_params)
     elif isinstance(expression, ast.UnaryOp):
         return evaluate_unaryop(expression, *common_params)
     elif isinstance(expression, ast.Starred):
@@ -1260,20 +1370,17 @@ def evaluate_ast(
         else:
             return evaluate_ast(expression.orelse, *common_params)
     elif isinstance(expression, ast.Attribute):
-        value = evaluate_ast(expression.value, *common_params)
-        return getattr(value, expression.attr)
+        return evaluate_attribute(expression, *common_params)
     elif isinstance(expression, ast.Slice):
         return slice(
             evaluate_ast(expression.lower, *common_params) if expression.lower is not None else None,
             evaluate_ast(expression.upper, *common_params) if expression.upper is not None else None,
             evaluate_ast(expression.step, *common_params) if expression.step is not None else None,
         )
-    elif isinstance(expression, ast.DictComp):
-        return evaluate_dictcomp(expression, *common_params)
     elif isinstance(expression, ast.While):
         return evaluate_while(expression, *common_params)
     elif isinstance(expression, (ast.Import, ast.ImportFrom)):
-        return import_modules(expression, state, authorized_imports)
+        return evaluate_import(expression, state, authorized_imports)
     elif isinstance(expression, ast.ClassDef):
         return evaluate_class_def(expression, *common_params)
     elif isinstance(expression, ast.Try):
@@ -1304,10 +1411,10 @@ def __init__(self, value):
 
 def evaluate_python_code(
     code: str,
-    static_tools: Optional[Dict[str, Callable]] = None,
-    custom_tools: Optional[Dict[str, Callable]] = None,
-    state: Optional[Dict[str, Any]] = None,
-    authorized_imports: List[str] = BASE_BUILTIN_MODULES,
+    static_tools: dict[str, Callable] | None = None,
+    custom_tools: dict[str, Callable] | None = None,
+    state: dict[str, Any] | None = None,
+    authorized_imports: list[str] = BASE_BUILTIN_MODULES,
     max_print_outputs_length: int = DEFAULT_MAX_LEN_OUTPUT,
 ):
     """
@@ -1346,11 +1453,15 @@ def evaluate_python_code(
     custom_tools = custom_tools if custom_tools is not None else {}
     result = None
     state["_print_outputs"] = PrintContainer()
+    state["_operations_count"] = {"counter": 0}
 
-    def final_answer(value):
-        raise FinalAnswerException(value)
+    if "final_answer" in static_tools:
+        previous_final_answer = static_tools["final_answer"]
 
-    static_tools["final_answer"] = final_answer
+        def final_answer(answer):  # Using 'answer' as the argument like in the original function
+            raise FinalAnswerException(previous_final_answer(answer))
+
+        static_tools["final_answer"] = final_answer
 
     try:
         for node in expression.body:
@@ -1375,29 +1486,27 @@ def final_answer(value):
         )
 
 
-class LocalPythonInterpreter:
+class PythonExecutor:
+    pass
+
+
+class LocalPythonExecutor(PythonExecutor):
     def __init__(
         self,
-        additional_authorized_imports: List[str],
-        tools: Dict,
-        max_print_outputs_length: Optional[int] = None,
+        additional_authorized_imports: list[str],
+        max_print_outputs_length: int | None = None,
     ):
         self.custom_tools = {}
-        self.state = {}
+        self.state = {"__name__": "__main__"}
         self.max_print_outputs_length = max_print_outputs_length
         if max_print_outputs_length is None:
             self.max_print_outputs_length = DEFAULT_MAX_LEN_OUTPUT
         self.additional_authorized_imports = additional_authorized_imports
         self.authorized_imports = list(set(BASE_BUILTIN_MODULES) | set(self.additional_authorized_imports))
-        # Add base trusted tools to list
-        self.static_tools = {
-            **tools,
-            **BASE_PYTHON_TOOLS.copy(),
-        }
         # TODO: assert self.authorized imports are all installed locally
+        self.static_tools = None
 
-    def __call__(self, code_action: str, additional_variables: Dict) -> Tuple[Any, str, bool]:
-        self.state.update(additional_variables)
+    def __call__(self, code_action: str) -> tuple[Any, str, bool]:
         output, is_final_answer = evaluate_python_code(
             code_action,
             static_tools=self.static_tools,
@@ -1409,5 +1518,11 @@ def __call__(self, code_action: str, additional_variables: Dict) -> Tuple[Any, s
         logs = str(self.state["_print_outputs"])
         return output, logs, is_final_answer
 
+    def send_variables(self, variables: dict):
+        self.state.update(variables)
+
+    def send_tools(self, tools: dict[str, Tool]):
+        self.static_tools = {**tools, **BASE_PYTHON_TOOLS.copy()}
+
 
-__all__ = ["evaluate_python_code", "LocalPythonInterpreter"]
+__all__ = ["evaluate_python_code", "LocalPythonExecutor"]
diff --git a/src/smolagents/mcp_client.py b/src/smolagents/mcp_client.py
new file mode 100644
index 000000000..000e6e08b
--- /dev/null
+++ b/src/smolagents/mcp_client.py
@@ -0,0 +1,124 @@
+#!/usr/bin/env python
+# coding=utf-8
+
+# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from types import TracebackType
+from typing import TYPE_CHECKING, Any
+
+from smolagents.tools import Tool
+
+
+__all__ = ["MCPClient"]
+
+if TYPE_CHECKING:
+    from mcpadapt.core import StdioServerParameters
+
+
+class MCPClient:
+    """Manages the connection to an MCP server and make its tools available to SmolAgents.
+
+    Note: tools can only be accessed after the connection has been started with the
+        `connect()` method, done during the init. If you don't use the context manager
+        we strongly encourage to use "try ... finally" to ensure the connection is cleaned up.
+
+    Args:
+        server_parameters (StdioServerParameters | dict[str, Any] | list[StdioServerParameters | dict[str, Any]]):
+            MCP server parameters (stdio or sse). Can be a list if you want to connect multiple MCPs at once.
+
+    Example:
+        ```python
+        # fully managed context manager + stdio
+        with MCPClient(...) as tools:
+            # tools are now available
+
+        # context manager + sse
+        with MCPClient({"url": "http://localhost:8000/sse"}) as tools:
+            # tools are now available
+
+        # manually manage the connection via the mcp_client object:
+        try:
+            mcp_client = MCPClient(...)
+            tools = mcp_client.get_tools()
+
+            # use your tools here.
+        finally:
+            mcp_client.stop()
+        ```
+    """
+
+    def __init__(
+        self,
+        server_parameters: "StdioServerParameters" | dict[str, Any] | list["StdioServerParameters" | dict[str, Any]],
+    ):
+        try:
+            from mcpadapt.core import MCPAdapt
+            from mcpadapt.smolagents_adapter import SmolAgentsAdapter
+        except ModuleNotFoundError:
+            raise ModuleNotFoundError("Please install 'mcp' extra to use MCPClient: `pip install 'smolagents[mcp]'`")
+        self._adapter = MCPAdapt(server_parameters, SmolAgentsAdapter())
+        self._tools: list[Tool] | None = None
+        self.connect()
+
+    def connect(self):
+        """Connect to the MCP server and initialize the tools."""
+        self._tools: list[Tool] = self._adapter.__enter__()
+
+    def disconnect(
+        self,
+        exc_type: type[BaseException] | None = None,
+        exc_value: BaseException | None = None,
+        exc_traceback: TracebackType | None = None,
+    ):
+        """Disconnect from the MCP server"""
+        self._adapter.__exit__(exc_type, exc_value, exc_traceback)
+
+    def get_tools(self) -> list[Tool]:
+        """The SmolAgents tools available from the MCP server.
+
+        Note: for now, this always returns the tools available at the creation of the session,
+        but it will in a future release return also new tools available from the MCP server if
+        any at call time.
+
+        Raises:
+            ValueError: If the MCP server tools is None (usually assuming the server is not started).
+
+        Returns:
+            list[Tool]: The SmolAgents tools available from the MCP server.
+        """
+        if self._tools is None:
+            raise ValueError(
+                "Couldn't retrieve tools from MCP server, run `mcp_client.connect()` first before accessing `tools`"
+            )
+        return self._tools
+
+    def __enter__(self) -> list[Tool]:
+        """Connect to the MCP server and return the tools directly.
+
+        Note that because of the `.connect` in the init, the mcp_client
+        is already connected at this point.
+        """
+        return self._tools
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc_value: BaseException | None,
+        exc_traceback: TracebackType | None,
+    ):
+        """Disconnect from the MCP server."""
+        self.disconnect(exc_type, exc_value, exc_traceback)
diff --git a/src/smolagents/memory.py b/src/smolagents/memory.py
index 5875db596..38fa9e1e9 100644
--- a/src/smolagents/memory.py
+++ b/src/smolagents/memory.py
@@ -1,6 +1,6 @@
 from dataclasses import asdict, dataclass
 from logging import getLogger
-from typing import TYPE_CHECKING, Any, Dict, List, TypedDict, Union
+from typing import TYPE_CHECKING, Any, TypedDict
 
 from smolagents.models import ChatMessage, MessageRole
 from smolagents.monitoring import AgentLogger, LogLevel
@@ -8,6 +8,8 @@
 
 
 if TYPE_CHECKING:
+    import PIL.Image
+
     from smolagents.models import ChatMessage
     from smolagents.monitoring import AgentLogger
 
@@ -17,7 +19,7 @@
 
 class Message(TypedDict):
     role: MessageRole
-    content: str | list[dict]
+    content: str | list[dict[str, Any]]
 
 
 @dataclass
@@ -42,23 +44,23 @@ class MemoryStep:
     def dict(self):
         return asdict(self)
 
-    def to_messages(self, **kwargs) -> List[Dict[str, Any]]:
+    def to_messages(self, summary_mode: bool = False) -> list[Message]:
         raise NotImplementedError
 
 
 @dataclass
 class ActionStep(MemoryStep):
-    model_input_messages: List[Message] | None = None
-    tool_calls: List[ToolCall] | None = None
+    model_input_messages: list[Message] | None = None
+    tool_calls: list[ToolCall] | None = None
     start_time: float | None = None
     end_time: float | None = None
     step_number: int | None = None
     error: AgentError | None = None
     duration: float | None = None
-    model_output_message: ChatMessage = None
+    model_output_message: ChatMessage | None = None
     model_output: str | None = None
     observations: str | None = None
-    observations_images: List[str] | None = None
+    observations_images: list["PIL.Image.Image"] | None = None
     action_output: Any = None
 
     def dict(self):
@@ -77,10 +79,8 @@ def dict(self):
             "action_output": make_json_serializable(self.action_output),
         }
 
-    def to_messages(self, summary_mode: bool = False, show_model_input_messages: bool = False) -> List[Message]:
+    def to_messages(self, summary_mode: bool = False) -> list[Message]:
         messages = []
-        if self.model_input_messages is not None and show_model_input_messages:
-            messages.append(Message(role=MessageRole.SYSTEM, content=self.model_input_messages))
         if self.model_output is not None and not summary_mode:
             messages.append(
                 Message(role=MessageRole.ASSISTANT, content=[{"type": "text", "text": self.model_output.strip()}])
@@ -89,7 +89,7 @@ def to_messages(self, summary_mode: bool = False, show_model_input_messages: boo
         if self.tool_calls is not None:
             messages.append(
                 Message(
-                    role=MessageRole.ASSISTANT,
+                    role=MessageRole.TOOL_CALL,
                     content=[
                         {
                             "type": "text",
@@ -99,6 +99,20 @@ def to_messages(self, summary_mode: bool = False, show_model_input_messages: boo
                 )
             )
 
+        if self.observations_images:
+            messages.append(
+                Message(
+                    role=MessageRole.USER,
+                    content=[
+                        {
+                            "type": "image",
+                            "image": image,
+                        }
+                        for image in self.observations_images
+                    ],
+                )
+            )
+
         if self.observations is not None:
             messages.append(
                 Message(
@@ -106,7 +120,7 @@ def to_messages(self, summary_mode: bool = False, show_model_input_messages: boo
                     content=[
                         {
                             "type": "text",
-                            "text": f"Call id: {self.tool_calls[0].id}\nObservation:\n{self.observations}",
+                            "text": f"Observation:\n{self.observations}",
                         }
                     ],
                 )
@@ -123,54 +137,31 @@ def to_messages(self, summary_mode: bool = False, show_model_input_messages: boo
                 Message(role=MessageRole.TOOL_RESPONSE, content=[{"type": "text", "text": message_content}])
             )
 
-        if self.observations_images:
-            messages.append(
-                Message(
-                    role=MessageRole.USER,
-                    content=[{"type": "text", "text": "Here are the observed images:"}]
-                    + [
-                        {
-                            "type": "image",
-                            "image": image,
-                        }
-                        for image in self.observations_images
-                    ],
-                )
-            )
         return messages
 
 
 @dataclass
 class PlanningStep(MemoryStep):
-    model_input_messages: List[Message]
-    model_output_message_facts: ChatMessage
-    facts: str
-    model_output_message_plan: ChatMessage
+    model_input_messages: list[Message]
+    model_output_message: ChatMessage
     plan: str
 
-    def to_messages(self, summary_mode: bool, **kwargs) -> List[Message]:
-        messages = []
-        messages.append(
-            Message(
-                role=MessageRole.ASSISTANT, content=[{"type": "text", "text": f"[FACTS LIST]:\n{self.facts.strip()}"}]
-            )
-        )
-
-        if not summary_mode:  # This step is not shown to a model writing a plan to avoid influencing the new plan
-            messages.append(
-                Message(
-                    role=MessageRole.ASSISTANT, content=[{"type": "text", "text": f"[PLAN]:\n{self.plan.strip()}"}]
-                )
-            )
-        return messages
+    def to_messages(self, summary_mode: bool = False) -> list[Message]:
+        if summary_mode:
+            return []
+        return [
+            Message(role=MessageRole.ASSISTANT, content=[{"type": "text", "text": self.plan.strip()}]),
+            Message(role=MessageRole.USER, content=[{"type": "text", "text": "Now proceed and carry out this plan."}]),
+            # This second message creates a role change to prevent models models from simply continuing the plan message
+        ]
 
 
 @dataclass
 class TaskStep(MemoryStep):
     task: str
-    task_images: List[str] | None = None
+    task_images: list["PIL.Image.Image"] | None = None
 
-    def to_messages(self, summary_mode: bool = False, **kwargs) -> List[Message]:
+    def to_messages(self, summary_mode: bool = False) -> list[Message]:
         content = [{"type": "text", "text": f"New task:\n{self.task}"}]
         if self.task_images:
             for image in self.task_images:
@@ -183,16 +174,21 @@ def to_messages(self, summary_mode: bool = False, **kwargs) -> List[Message]:
 class SystemPromptStep(MemoryStep):
     system_prompt: str
 
-    def to_messages(self, summary_mode: bool = False, **kwargs) -> List[Message]:
+    def to_messages(self, summary_mode: bool = False) -> list[Message]:
         if summary_mode:
             return []
         return [Message(role=MessageRole.SYSTEM, content=[{"type": "text", "text": self.system_prompt}])]
 
 
+@dataclass
+class FinalAnswerStep(MemoryStep):
+    final_answer: Any
+
+
 class AgentMemory:
     def __init__(self, system_prompt: str):
         self.system_prompt = SystemPromptStep(system_prompt=system_prompt)
-        self.steps: List[Union[TaskStep, ActionStep, PlanningStep]] = []
+        self.steps: list[TaskStep | ActionStep | PlanningStep] = []
 
     def reset(self):
         self.steps = []
@@ -221,14 +217,15 @@ def replay(self, logger: AgentLogger, detailed: bool = False):
                 logger.log_task(step.task, "", level=LogLevel.ERROR)
             elif isinstance(step, ActionStep):
                 logger.log_rule(f"Step {step.step_number}", level=LogLevel.ERROR)
-                if detailed:
-                    logger.log_messages(step.model_input_messages)
-                logger.log_markdown(title="Agent output:", content=step.model_output, level=LogLevel.ERROR)
+                if detailed and step.model_input_messages is not None:
+                    logger.log_messages(step.model_input_messages, level=LogLevel.ERROR)
+                if step.model_output is not None:
+                    logger.log_markdown(title="Agent output:", content=step.model_output, level=LogLevel.ERROR)
             elif isinstance(step, PlanningStep):
                 logger.log_rule("Planning step", level=LogLevel.ERROR)
-                if detailed:
+                if detailed and step.model_input_messages is not None:
                     logger.log_messages(step.model_input_messages, level=LogLevel.ERROR)
-                logger.log_markdown(title="Agent output:", content=step.facts + "\n" + step.plan, level=LogLevel.ERROR)
+                logger.log_markdown(title="Agent output:", content=step.plan, level=LogLevel.ERROR)
 
 
 __all__ = ["AgentMemory"]
diff --git a/src/smolagents/models.py b/src/smolagents/models.py
index 2a586edfe..433f8fbe5 100644
--- a/src/smolagents/models.py
+++ b/src/smolagents/models.py
@@ -1,6 +1,3 @@
-#!/usr/bin/env python
-# coding=utf-8
-
 # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -17,24 +14,29 @@
 import json
 import logging
 import os
-import random
+import re
 import uuid
+import warnings
+from collections.abc import Generator
 from copy import deepcopy
 from dataclasses import asdict, dataclass
 from enum import Enum
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
-
-from huggingface_hub import InferenceClient
-from huggingface_hub.utils import is_torch_available
-from PIL import Image
+from threading import Thread
+from typing import TYPE_CHECKING, Any
 
 from .tools import Tool
-from .utils import _is_package_available, encode_image_base64, make_image_url
+from .utils import _is_package_available, encode_image_base64, make_image_url, parse_json_blob
 
 
 if TYPE_CHECKING:
+    from huggingface_hub import (
+        ChatCompletionOutputFunctionDefinition,
+        ChatCompletionOutputMessage,
+        ChatCompletionOutputToolCall,
+    )
     from transformers import StoppingCriteriaList
 
+
 logger = logging.getLogger(__name__)
 
 DEFAULT_JSONAGENT_REGEX_GRAMMAR = {
@@ -61,15 +63,18 @@ def convert(obj):
 class ChatMessageToolCallDefinition:
     arguments: Any
     name: str
-    description: Optional[str] = None
+    description: str | None = None
 
     @classmethod
-    def from_hf_api(cls, tool_call_definition) -> "ChatMessageToolCallDefinition":
-        return cls(
-            arguments=tool_call_definition.arguments,
-            name=tool_call_definition.name,
-            description=tool_call_definition.description,
+    def from_hf_api(
+        cls, tool_call_definition: "ChatCompletionOutputFunctionDefinition"
+    ) -> "ChatMessageToolCallDefinition":
+        warnings.warn(
+            "ChatMessageToolCallDefinition.from_hf_api is deprecated and will be removed in version 1.16.0. "
+            "Please use ChatMessageToolCallDefinition with asdict() instead.",
+            FutureWarning,
         )
+        return cls(**asdict(tool_call_definition))
 
 
 @dataclass
@@ -79,33 +84,27 @@ class ChatMessageToolCall:
     type: str
 
     @classmethod
-    def from_hf_api(cls, tool_call) -> "ChatMessageToolCall":
-        return cls(
-            function=ChatMessageToolCallDefinition.from_hf_api(tool_call.function),
-            id=tool_call.id,
-            type=tool_call.type,
+    def from_hf_api(cls, tool_call: "ChatCompletionOutputToolCall") -> "ChatMessageToolCall":
+        warnings.warn(
+            "ChatMessageToolCall.from_hf_api is deprecated and will be removed in version 1.16.0. "
+            "Please use ChatMessageToolCall with asdict() instead.",
+            FutureWarning,
         )
+        return cls(**asdict(tool_call))
 
 
 @dataclass
 class ChatMessage:
     role: str
-    content: Optional[str] = None
-    tool_calls: Optional[List[ChatMessageToolCall]] = None
-    raw: Optional[Any] = None  # Stores the raw output from the API
+    content: str | None = None
+    tool_calls: list[ChatMessageToolCall] | None = None
+    raw: Any | None = None  # Stores the raw output from the API
 
     def model_dump_json(self):
         return json.dumps(get_dict_from_nested_dataclasses(self, ignore_key="raw"))
 
     @classmethod
-    def from_hf_api(cls, message, raw) -> "ChatMessage":
-        tool_calls = None
-        if getattr(message, "tool_calls", None) is not None:
-            tool_calls = [ChatMessageToolCall.from_hf_api(tool_call) for tool_call in message.tool_calls]
-        return cls(role=message.role, content=message.content, tool_calls=tool_calls, raw=raw)
-
-    @classmethod
-    def from_dict(cls, data: dict) -> "ChatMessage":
+    def from_dict(cls, data: dict, raw: Any | None = None) -> "ChatMessage":
         if data.get("tool_calls"):
             tool_calls = [
                 ChatMessageToolCall(
@@ -114,13 +113,22 @@ def from_dict(cls, data: dict) -> "ChatMessage":
                 for tc in data["tool_calls"]
             ]
             data["tool_calls"] = tool_calls
-        return cls(**data)
+        return cls(role=data["role"], content=data.get("content"), tool_calls=data.get("tool_calls"), raw=raw)
 
     def dict(self):
         return json.dumps(get_dict_from_nested_dataclasses(self))
 
+    @classmethod
+    def from_hf_api(cls, message: "ChatCompletionOutputMessage", raw) -> "ChatMessage":
+        warnings.warn(
+            "ChatMessage.from_hf_api is deprecated and will be removed in version 1.16.0. "
+            "Please use ChatMessage.from_dict with asdict() instead.",
+            FutureWarning,
+        )
+        return cls.from_dict(asdict(message), raw=raw)
+
 
-def parse_json_if_needed(arguments: Union[str, dict]) -> Union[str, dict]:
+def parse_json_if_needed(arguments: str | dict) -> str | dict:
     if isinstance(arguments, dict):
         return arguments
     else:
@@ -130,11 +138,10 @@ def parse_json_if_needed(arguments: Union[str, dict]) -> Union[str, dict]:
             return arguments
 
 
-def parse_tool_args_if_needed(message: ChatMessage) -> ChatMessage:
-    if message.tool_calls is not None:
-        for tool_call in message.tool_calls:
-            tool_call.function.arguments = parse_json_if_needed(tool_call.function.arguments)
-    return message
+@dataclass
+class CompletionDelta:
+    content: str | None = None
+    tool_calls: list[ChatMessageToolCall] | None = None
 
 
 class MessageRole(str, Enum):
@@ -155,7 +162,7 @@ def roles(cls):
 }
 
 
-def get_tool_json_schema(tool: Tool) -> Dict:
+def get_tool_json_schema(tool: Tool) -> dict:
     properties = deepcopy(tool.inputs)
     required = []
     for key, value in properties.items():
@@ -177,7 +184,7 @@ def get_tool_json_schema(tool: Tool) -> Dict:
     }
 
 
-def remove_stop_sequences(content: str, stop_sequences: List[str]) -> str:
+def remove_stop_sequences(content: str, stop_sequences: list[str]) -> str:
     for stop_seq in stop_sequences:
         if content[-len(stop_seq) :] == stop_seq:
             content = content[: -len(stop_seq)]
@@ -185,11 +192,11 @@ def remove_stop_sequences(content: str, stop_sequences: List[str]) -> str:
 
 
 def get_clean_message_list(
-    message_list: List[Dict[str, str]],
-    role_conversions: Dict[MessageRole, MessageRole] = {},
+    message_list: list[dict[str, str | list[dict]]],
+    role_conversions: dict[MessageRole, MessageRole] | dict[str, str] = {},
     convert_images_to_image_urls: bool = False,
     flatten_messages_as_text: bool = False,
-) -> List[Dict[str, str]]:
+) -> list[dict[str, str | list[dict]]]:
     """
     Subsequent messages with the same role will be concatenated to a single message.
     output_message_list is a list of messages that will be used to generate the final message that is chat template compatible with transformers LLM chat template.
@@ -200,7 +207,7 @@ def get_clean_message_list(
         convert_images_to_image_urls (`bool`, default `False`): Whether to convert images to image URLs.
         flatten_messages_as_text (`bool`, default `False`): Whether to flatten messages as text.
     """
-    output_message_list = []
+    output_message_list: list[dict[str, str | list[dict]]] = []
     message_list = deepcopy(message_list)  # Avoid modifying the original list
     for message in message_list:
         role = message["role"]
@@ -208,10 +215,11 @@ def get_clean_message_list(
             raise ValueError(f"Incorrect role {role}, only {MessageRole.roles()} are supported for now.")
 
         if role in role_conversions:
-            message["role"] = role_conversions[role]
+            message["role"] = role_conversions[role]  # type: ignore
         # encode images if needed
         if isinstance(message["content"], list):
             for element in message["content"]:
+                assert isinstance(element, dict), "Error: this element should be a dict:" + str(element)
                 if element["type"] == "image":
                     assert not flatten_messages_as_text, f"Cannot use images with {flatten_messages_as_text=}"
                     if convert_images_to_image_urls:
@@ -227,9 +235,14 @@ def get_clean_message_list(
         if len(output_message_list) > 0 and message["role"] == output_message_list[-1]["role"]:
             assert isinstance(message["content"], list), "Error: wrong content:" + str(message["content"])
             if flatten_messages_as_text:
-                output_message_list[-1]["content"] += message["content"][0]["text"]
+                output_message_list[-1]["content"] += "\n" + message["content"][0]["text"]
             else:
-                output_message_list[-1]["content"] += message["content"]
+                for el in message["content"]:
+                    if el["type"] == "text" and output_message_list[-1]["content"][-1]["type"] == "text":
+                        # Merge consecutive text messages rather than creating new ones
+                        output_message_list[-1]["content"][-1]["text"] += "\n" + el["text"]
+                    else:
+                        output_message_list[-1]["content"].append(el)
         else:
             if flatten_messages_as_text:
                 content = message["content"][0]["text"]
@@ -239,23 +252,69 @@ def get_clean_message_list(
     return output_message_list
 
 
+def get_tool_call_from_text(text: str, tool_name_key: str, tool_arguments_key: str) -> ChatMessageToolCall:
+    tool_call_dictionary, _ = parse_json_blob(text)
+    try:
+        tool_name = tool_call_dictionary[tool_name_key]
+    except Exception as e:
+        raise ValueError(
+            f"Key {tool_name_key=} not found in the generated tool call. Got keys: {list(tool_call_dictionary.keys())} instead"
+        ) from e
+    tool_arguments = tool_call_dictionary.get(tool_arguments_key, None)
+    if isinstance(tool_arguments, str):
+        tool_arguments = parse_json_if_needed(tool_arguments)
+    return ChatMessageToolCall(
+        id=str(uuid.uuid4()),
+        type="function",
+        function=ChatMessageToolCallDefinition(name=tool_name, arguments=tool_arguments),
+    )
+
+
+def supports_stop_parameter(model_id: str) -> bool:
+    """
+    Check if the model supports the `stop` parameter.
+
+    Not supported with reasoning models openai/o3 and openai/o4-mini (and their versioned variants).
+
+    Args:
+        model_id (`str`): Model identifier (e.g. "openai/o3", "o4-mini-2025-04-16")
+
+    Returns:
+        bool: True if the model supports the stop parameter, False otherwise
+    """
+    model_name = model_id.split("/")[-1]
+    # o3 and o4-mini (including versioned variants, o3-2025-04-16) don't support stop parameter
+    pattern = r"^(o3[-\d]*|o4-mini[-\d]*)$"
+    return not re.match(pattern, model_name)
+
+
 class Model:
-    def __init__(self, **kwargs):
-        self.last_input_token_count = None
-        self.last_output_token_count = None
+    def __init__(
+        self,
+        flatten_messages_as_text: bool = False,
+        tool_name_key: str = "name",
+        tool_arguments_key: str = "arguments",
+        model_id: str | None = None,
+        **kwargs,
+    ):
+        self.flatten_messages_as_text = flatten_messages_as_text
+        self.tool_name_key = tool_name_key
+        self.tool_arguments_key = tool_arguments_key
         self.kwargs = kwargs
+        self.last_input_token_count: int | None = None
+        self.last_output_token_count: int | None = None
+        self.model_id: str | None = model_id
 
     def _prepare_completion_kwargs(
         self,
-        messages: List[Dict[str, str]],
-        stop_sequences: Optional[List[str]] = None,
-        grammar: Optional[str] = None,
-        tools_to_call_from: Optional[List[Tool]] = None,
-        custom_role_conversions: Optional[Dict[str, str]] = None,
+        messages: list[dict[str, str | list[dict]]],
+        stop_sequences: list[str] | None = None,
+        grammar: str | None = None,
+        tools_to_call_from: list[Tool] | None = None,
+        custom_role_conversions: dict[str, str] | None = None,
         convert_images_to_image_urls: bool = False,
-        flatten_messages_as_text: bool = False,
         **kwargs,
-    ) -> Dict:
+    ) -> dict[str, Any]:
         """
         Prepare parameters required for model invocation, handling parameter priorities.
 
@@ -265,13 +324,13 @@ def _prepare_completion_kwargs(
         3. Default values in self.kwargs
         """
         # Clean and standardize the message list
+        flatten_messages_as_text = kwargs.pop("flatten_messages_as_text", self.flatten_messages_as_text)
         messages = get_clean_message_list(
             messages,
             role_conversions=custom_role_conversions or tool_role_conversions,
             convert_images_to_image_urls=convert_images_to_image_urls,
             flatten_messages_as_text=flatten_messages_as_text,
         )
-
         # Use self.kwargs as the base configuration
         completion_kwargs = {
             **self.kwargs,
@@ -280,7 +339,9 @@ def _prepare_completion_kwargs(
 
         # Handle specific parameters
         if stop_sequences is not None:
-            completion_kwargs["stop"] = stop_sequences
+            # Some models do not support stop parameter
+            if supports_stop_parameter(self.model_id or ""):
+                completion_kwargs["stop"] = stop_sequences
         if grammar is not None:
             completion_kwargs["grammar"] = grammar
 
@@ -298,24 +359,26 @@ def _prepare_completion_kwargs(
 
         return completion_kwargs
 
-    def get_token_counts(self) -> Dict[str, int]:
+    def get_token_counts(self) -> dict[str, int]:
+        if self.last_input_token_count is None or self.last_output_token_count is None:
+            raise ValueError("Token counts are not available")
         return {
             "input_token_count": self.last_input_token_count,
             "output_token_count": self.last_output_token_count,
         }
 
-    def __call__(
+    def generate(
         self,
-        messages: List[Dict[str, str]],
-        stop_sequences: Optional[List[str]] = None,
-        grammar: Optional[str] = None,
-        tools_to_call_from: Optional[List[Tool]] = None,
+        messages: list[dict[str, str | list[dict]]],
+        stop_sequences: list[str] | None = None,
+        grammar: str | None = None,
+        tools_to_call_from: list[Tool] | None = None,
         **kwargs,
     ) -> ChatMessage:
         """Process the input messages and return the model's response.
 
         Parameters:
-            messages (`List[Dict[str, str]]`):
+            messages (`list[dict[str, str]]`):
                 A list of message dictionaries to be processed. Each dictionary should have the structure `{"role": "user/system", "content": "message content"}`.
             stop_sequences (`List[str]`, *optional*):
                 A list of strings that will stop the generation if encountered in the model's output.
@@ -329,9 +392,25 @@ def __call__(
         Returns:
             `ChatMessage`: A chat message object containing the model's response.
         """
-        pass  # To be implemented in child classes!
+        raise NotImplementedError("This method must be implemented in child classes")
+
+    def __call__(self, *args, **kwargs):
+        return self.generate(*args, **kwargs)
+
+    def parse_tool_calls(self, message: ChatMessage) -> ChatMessage:
+        """Sometimes APIs do not return the tool call as a specific object, so we need to parse it."""
+        message.role = MessageRole.ASSISTANT  # Overwrite role if needed
+        if not message.tool_calls:
+            assert message.content is not None, "Message contains no content and no tool calls"
+            message.tool_calls = [
+                get_tool_call_from_text(message.content, self.tool_name_key, self.tool_arguments_key)
+            ]
+        assert len(message.tool_calls) > 0, "No tool call was found in the model output"
+        for tool_call in message.tool_calls:
+            tool_call.function.arguments = parse_json_if_needed(tool_call.function.arguments)
+        return message
 
-    def to_dict(self) -> Dict:
+    def to_dict(self) -> dict:
         """
         Converts the model into a JSON-compatible dictionary.
         """
@@ -366,7 +445,7 @@ def to_dict(self) -> Dict:
         return model_dictionary
 
     @classmethod
-    def from_dict(cls, model_dictionary: Dict[str, Any]) -> "Model":
+    def from_dict(cls, model_dictionary: dict[str, Any]) -> "Model":
         model_instance = cls(
             **{
                 k: v
@@ -379,89 +458,109 @@ def from_dict(cls, model_dictionary: Dict[str, Any]) -> "Model":
         return model_instance
 
 
-class HfApiModel(Model):
-    """A class to interact with Hugging Face's Inference API for language model interaction.
-
-    This model allows you to communicate with Hugging Face's models using the Inference API. It can be used in both serverless mode or with a dedicated endpoint, supporting features like stop sequences and grammar customization.
+class VLLMModel(Model):
+    """Model to use [vLLM](https://docs.vllm.ai/) for fast LLM inference and serving.
 
     Parameters:
-        model_id (`str`, *optional*, defaults to `"Qwen/Qwen2.5-Coder-32B-Instruct"`):
-            The Hugging Face model ID to be used for inference. This can be a path or model identifier from the Hugging Face model hub.
-        provider (`str`, *optional*):
-            Name of the provider to use for inference. Can be `"replicate"`, `"together"`, `"fal-ai"`, `"sambanova"` or `"hf-inference"`.
-            defaults to hf-inference (HF Inference API).
-        token (`str`, *optional*):
-            Token used by the Hugging Face API for authentication. This token need to be authorized 'Make calls to the serverless Inference API'.
-            If the model is gated (like Llama-3 models), the token also needs 'Read access to contents of all public gated repos you can access'.
-            If not provided, the class will try to use environment variable 'HF_TOKEN', else use the token stored in the Hugging Face CLI configuration.
-        timeout (`int`, *optional*, defaults to 120):
-            Timeout for the API request, in seconds.
-        custom_role_conversions (`dict[str, str]`, *optional*):
-            Custom role conversion mapping to convert message roles in others.
-            Useful for specific models that do not support specific message roles like "system".
-        **kwargs:
-            Additional keyword arguments to pass to the Hugging Face API.
-
-    Raises:
-        ValueError:
-            If the model name is not provided.
-
-    Example:
-    ```python
-    >>> engine = HfApiModel(
-    ...     model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
-    ...     token="your_hf_token_here",
-    ...     max_tokens=5000,
-    ... )
-    >>> messages = [{"role": "user", "content": "Explain quantum mechanics in simple terms."}]
-    >>> response = engine(messages, stop_sequences=["END"])
-    >>> print(response)
-    "Quantum mechanics is the branch of physics that studies..."
-    ```
+        model_id (`str`):
+            The Hugging Face model ID to be used for inference.
+            This can be a path or model identifier from the Hugging Face model hub.
+        model_kwargs (`dict[str, Any]`, *optional*):
+            Additional keyword arguments to pass to the vLLM model (like revision, max_model_len, etc.).
     """
 
     def __init__(
         self,
-        model_id: str = "Qwen/Qwen2.5-Coder-32B-Instruct",
-        provider: Optional[str] = None,
-        token: Optional[str] = None,
-        timeout: Optional[int] = 120,
-        custom_role_conversions: Optional[Dict[str, str]] = None,
+        model_id,
+        model_kwargs: dict[str, Any] | None = None,
         **kwargs,
     ):
+        if not _is_package_available("vllm"):
+            raise ModuleNotFoundError("Please install 'vllm' extra to use VLLMModel: `pip install 'smolagents[vllm]'`")
+
+        from vllm import LLM  # type: ignore
+        from vllm.transformers_utils.tokenizer import get_tokenizer  # type: ignore
+
+        self.model_kwargs = model_kwargs or {}
         super().__init__(**kwargs)
         self.model_id = model_id
-        self.provider = provider
-        if token is None:
-            token = os.getenv("HF_TOKEN")
-        self.client = InferenceClient(self.model_id, provider=provider, token=token, timeout=timeout)
-        self.custom_role_conversions = custom_role_conversions
+        self.model = LLM(model=model_id, **self.model_kwargs)
+        assert self.model is not None
+        self.tokenizer = get_tokenizer(model_id)
+        self._is_vlm = False  # VLLMModel does not support vision models yet.
+
+    def cleanup(self):
+        import gc
+
+        import torch
+        from vllm.distributed.parallel_state import (  # type: ignore
+            destroy_distributed_environment,
+            destroy_model_parallel,
+        )
+
+        destroy_model_parallel()
+        if self.model is not None:
+            # taken from https://github.com/vllm-project/vllm/issues/1908#issuecomment-2076870351
+            del self.model.llm_engine.model_executor.driver_worker
+        gc.collect()
+        destroy_distributed_environment()
+        torch.cuda.empty_cache()
 
-    def __call__(
+    def generate(
         self,
-        messages: List[Dict[str, str]],
-        stop_sequences: Optional[List[str]] = None,
-        grammar: Optional[str] = None,
-        tools_to_call_from: Optional[List[Tool]] = None,
+        messages: list[dict[str, str | list[dict]]],
+        stop_sequences: list[str] | None = None,
+        grammar: str | None = None,
+        tools_to_call_from: list[Tool] | None = None,
         **kwargs,
     ) -> ChatMessage:
+        from vllm import SamplingParams  # type: ignore
+
         completion_kwargs = self._prepare_completion_kwargs(
             messages=messages,
+            flatten_messages_as_text=(not self._is_vlm),
             stop_sequences=stop_sequences,
             grammar=grammar,
             tools_to_call_from=tools_to_call_from,
-            convert_images_to_image_urls=True,
-            custom_role_conversions=self.custom_role_conversions,
             **kwargs,
         )
-        response = self.client.chat_completion(**completion_kwargs)
+        messages = completion_kwargs.pop("messages")
+        prepared_stop_sequences = completion_kwargs.pop("stop", [])
+        tools = completion_kwargs.pop("tools", None)
+        completion_kwargs.pop("tool_choice", None)
 
-        self.last_input_token_count = response.usage.prompt_tokens
-        self.last_output_token_count = response.usage.completion_tokens
-        message = ChatMessage.from_hf_api(response.choices[0].message, raw=response)
         if tools_to_call_from is not None:
-            return parse_tool_args_if_needed(message)
-        return message
+            prompt = self.tokenizer.apply_chat_template(
+                messages,
+                tools=tools,
+                add_generation_prompt=True,
+                tokenize=False,
+            )
+        else:
+            prompt = self.tokenizer.apply_chat_template(
+                messages,
+                tokenize=False,
+            )
+
+        sampling_params = SamplingParams(
+            n=kwargs.get("n", 1),
+            temperature=kwargs.get("temperature", 0.0),
+            max_tokens=kwargs.get("max_tokens", 2048),
+            stop=prepared_stop_sequences,
+        )
+
+        out = self.model.generate(
+            prompt,
+            sampling_params=sampling_params,
+        )
+        output_text = out[0].outputs[0].text
+        self.last_input_token_count = len(out[0].prompt_token_ids)
+        self.last_output_token_count = len(out[0].outputs[0].token_ids)
+        return ChatMessage(
+            role=MessageRole.ASSISTANT,
+            content=output_text,
+            raw={"out": output_text, "completion_kwargs": completion_kwargs},
+        )
 
 
 class MLXModel(Model):
@@ -510,50 +609,31 @@ def __init__(
         trust_remote_code: bool = False,
         **kwargs,
     ):
-        super().__init__(**kwargs)
+        super().__init__(
+            flatten_messages_as_text=True, model_id=model_id, **kwargs
+        )  # mlx-lm doesn't support vision models
         if not _is_package_available("mlx_lm"):
             raise ModuleNotFoundError(
                 "Please install 'mlx-lm' extra to use 'MLXModel': `pip install 'smolagents[mlx-lm]'`"
             )
-        import mlx_lm
+        import mlx_lm  # type: ignore
 
         self.model_id = model_id
         self.model, self.tokenizer = mlx_lm.load(model_id, tokenizer_config={"trust_remote_code": trust_remote_code})
         self.stream_generate = mlx_lm.stream_generate
         self.tool_name_key = tool_name_key
         self.tool_arguments_key = tool_arguments_key
+        self.is_vlm = False  # mlx-lm doesn't support vision models
 
-    def _to_message(self, text, tools_to_call_from):
-        if tools_to_call_from:
-            # solution for extracting tool JSON without assuming a specific model output format
-            maybe_json = "{" + text.split("{", 1)[-1][::-1].split("}", 1)[-1][::-1] + "}"
-            parsed_text = json.loads(maybe_json)
-            tool_name = parsed_text.get(self.tool_name_key, None)
-            tool_arguments = parsed_text.get(self.tool_arguments_key, None)
-            if tool_name:
-                return ChatMessage(
-                    role="assistant",
-                    content="",
-                    tool_calls=[
-                        ChatMessageToolCall(
-                            id=uuid.uuid4(),
-                            type="function",
-                            function=ChatMessageToolCallDefinition(name=tool_name, arguments=tool_arguments),
-                        )
-                    ],
-                )
-        return ChatMessage(role="assistant", content=text)
-
-    def __call__(
+    def generate(
         self,
-        messages: List[Dict[str, str]],
-        stop_sequences: Optional[List[str]] = None,
-        grammar: Optional[str] = None,
-        tools_to_call_from: Optional[List[Tool]] = None,
+        messages: list[dict[str, str | list[dict]]],
+        stop_sequences: list[str] | None = None,
+        grammar: str | None = None,
+        tools_to_call_from: list[Tool] | None = None,
         **kwargs,
     ) -> ChatMessage:
         completion_kwargs = self._prepare_completion_kwargs(
-            flatten_messages_as_text=True,  # mlx-lm doesn't support vision models
             messages=messages,
             stop_sequences=stop_sequences,
             grammar=grammar,
@@ -561,7 +641,7 @@ def __call__(
             **kwargs,
         )
         messages = completion_kwargs.pop("messages")
-        prepared_stop_sequences = completion_kwargs.pop("stop", [])
+        stops = completion_kwargs.pop("stop", [])
         tools = completion_kwargs.pop("tools", None)
         completion_kwargs.pop("tool_choice", None)
 
@@ -574,17 +654,16 @@ def __call__(
         self.last_input_token_count = len(prompt_ids)
         self.last_output_token_count = 0
         text = ""
-
-        for _ in self.stream_generate(self.model, self.tokenizer, prompt=prompt_ids, **completion_kwargs):
+        for response in self.stream_generate(self.model, self.tokenizer, prompt=prompt_ids, **completion_kwargs):
             self.last_output_token_count += 1
-            text += _.text
-            for stop_sequence in prepared_stop_sequences:
-                stop_sequence_start = text.rfind(stop_sequence)
-                if stop_sequence_start != -1:
-                    text = text[:stop_sequence_start]
-                    return self._to_message(text, tools_to_call_from)
+            text += response.text
+            if any((stop_index := text.rfind(stop)) != -1 for stop in stops):
+                text = text[:stop_index]
+                break
 
-        return self._to_message(text, tools_to_call_from)
+        return ChatMessage(
+            role=MessageRole.ASSISTANT, content=text, raw={"out": text, "completion_kwargs": completion_kwargs}
+        )
 
 
 class TransformersModel(Model):
@@ -596,8 +675,9 @@ class TransformersModel(Model):
     > You must have `transformers` and `torch` installed on your machine. Please run `pip install smolagents[transformers]` if it's not the case.
 
     Parameters:
-        model_id (`str`, *optional*, defaults to `"Qwen/Qwen2.5-Coder-32B-Instruct"`):
+        model_id (`str`):
             The Hugging Face model ID to be used for inference. This can be a path or model identifier from the Hugging Face model hub.
+            For example, `"Qwen/Qwen2.5-Coder-32B-Instruct"`.
         device_map (`str`, *optional*):
             The device_map to initialize your model with.
         torch_dtype (`str`, *optional*):
@@ -628,67 +708,79 @@ class TransformersModel(Model):
 
     def __init__(
         self,
-        model_id: Optional[str] = None,
-        device_map: Optional[str] = None,
-        torch_dtype: Optional[str] = None,
+        model_id: str | None = None,
+        device_map: str | None = None,
+        torch_dtype: str | None = None,
         trust_remote_code: bool = False,
         **kwargs,
     ):
-        super().__init__(**kwargs)
-        if not is_torch_available() or not _is_package_available("transformers"):
+        try:
+            import torch
+            from transformers import (
+                AutoModelForCausalLM,
+                AutoModelForImageTextToText,
+                AutoProcessor,
+                AutoTokenizer,
+                TextIteratorStreamer,
+            )
+        except ModuleNotFoundError:
             raise ModuleNotFoundError(
                 "Please install 'transformers' extra to use 'TransformersModel': `pip install 'smolagents[transformers]'`"
             )
-        import torch
-        from transformers import AutoModelForCausalLM, AutoModelForImageTextToText, AutoProcessor, AutoTokenizer
 
-        default_model_id = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
-        if model_id is None:
-            model_id = default_model_id
-            logger.warning(f"`model_id`not provided, using this default tokenizer for token counts: '{model_id}'")
-        self.model_id = model_id
+        if not model_id:
+            warnings.warn(
+                "The 'model_id' parameter will be required in version 2.0.0. "
+                "Please update your code to pass this parameter to avoid future errors. "
+                "For now, it defaults to 'HuggingFaceTB/SmolLM2-1.7B-Instruct'.",
+                FutureWarning,
+            )
+            model_id = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
 
-        default_max_tokens = 5000
+        default_max_tokens = 4096
         max_new_tokens = kwargs.get("max_new_tokens") or kwargs.get("max_tokens")
         if not max_new_tokens:
             kwargs["max_new_tokens"] = default_max_tokens
             logger.warning(
                 f"`max_new_tokens` not provided, using this default value for `max_new_tokens`: {default_max_tokens}"
             )
-        self.kwargs = kwargs
 
         if device_map is None:
             device_map = "cuda" if torch.cuda.is_available() else "cpu"
         logger.info(f"Using device: {device_map}")
         self._is_vlm = False
         try:
-            self.model = AutoModelForCausalLM.from_pretrained(
+            self.model = AutoModelForImageTextToText.from_pretrained(
                 model_id,
                 device_map=device_map,
                 torch_dtype=torch_dtype,
                 trust_remote_code=trust_remote_code,
             )
-            self.tokenizer = AutoTokenizer.from_pretrained(model_id)
+            self.processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=trust_remote_code)
+            self._is_vlm = True
+            self.streamer = TextIteratorStreamer(self.processor.tokenizer, skip_prompt=True, skip_special_tokens=True)  # type: ignore
+
         except ValueError as e:
             if "Unrecognized configuration class" in str(e):
-                self.model = AutoModelForImageTextToText.from_pretrained(model_id, device_map=device_map)
-                self.processor = AutoProcessor.from_pretrained(model_id)
-                self._is_vlm = True
+                self.model = AutoModelForCausalLM.from_pretrained(
+                    model_id,
+                    device_map=device_map,
+                    torch_dtype=torch_dtype,
+                    trust_remote_code=trust_remote_code,
+                )
+                self.tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=trust_remote_code)
+                self.streamer = TextIteratorStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)  # type: ignore
             else:
                 raise e
         except Exception as e:
-            logger.warning(
-                f"Failed to load tokenizer and model for {model_id=}: {e}. Loading default tokenizer and model instead from {default_model_id=}."
-            )
-            self.model_id = default_model_id
-            self.tokenizer = AutoTokenizer.from_pretrained(default_model_id)
-            self.model = AutoModelForCausalLM.from_pretrained(model_id, device_map=device_map, torch_dtype=torch_dtype)
+            raise ValueError(f"Failed to load tokenizer and model for {model_id=}: {e}") from e
+        super().__init__(flatten_messages_as_text=not self._is_vlm, model_id=model_id, **kwargs)
 
-    def make_stopping_criteria(self, stop_sequences: List[str], tokenizer) -> "StoppingCriteriaList":
+    def make_stopping_criteria(self, stop_sequences: list[str], tokenizer) -> "StoppingCriteriaList":
         from transformers import StoppingCriteria, StoppingCriteriaList
 
         class StopOnStrings(StoppingCriteria):
-            def __init__(self, stop_strings: List[str], tokenizer):
+            def __init__(self, stop_strings: list[str], tokenizer):
                 self.stop_strings = stop_strings
                 self.tokenizer = tokenizer
                 self.stream = ""
@@ -705,20 +797,18 @@ def __call__(self, input_ids, scores, **kwargs):
 
         return StoppingCriteriaList([StopOnStrings(stop_sequences, tokenizer)])
 
-    def __call__(
+    def _prepare_completion_args(
         self,
-        messages: List[Dict[str, str]],
-        stop_sequences: Optional[List[str]] = None,
-        grammar: Optional[str] = None,
-        tools_to_call_from: Optional[List[Tool]] = None,
-        images: Optional[List[Image.Image]] = None,
+        messages: list[dict[str, str | list[dict]]],
+        stop_sequences: list[str] | None = None,
+        grammar: str | None = None,
+        tools_to_call_from: list[Tool] | None = None,
         **kwargs,
-    ) -> ChatMessage:
+    ) -> dict[str, Any]:
         completion_kwargs = self._prepare_completion_kwargs(
             messages=messages,
             stop_sequences=stop_sequences,
             grammar=grammar,
-            flatten_messages_as_text=(not self._is_vlm),
             **kwargs,
         )
 
@@ -730,144 +820,201 @@ def __call__(
             or kwargs.get("max_tokens")
             or self.kwargs.get("max_new_tokens")
             or self.kwargs.get("max_tokens")
+            or 1024
         )
+        prompt_tensor = (self.processor if hasattr(self, "processor") else self.tokenizer).apply_chat_template(
+            messages,  # type: ignore
+            tools=[get_tool_json_schema(tool) for tool in tools_to_call_from] if tools_to_call_from else None,
+            return_tensors="pt",
+            add_generation_prompt=True if tools_to_call_from else False,
+            tokenize=True,
+            return_dict=True,
+        )
+        prompt_tensor = prompt_tensor.to(self.model.device)  # type: ignore
+        if hasattr(prompt_tensor, "input_ids"):
+            prompt_tensor = prompt_tensor["input_ids"]
 
-        if max_new_tokens:
-            completion_kwargs["max_new_tokens"] = max_new_tokens
-
-        if hasattr(self, "processor"):
-            images = [Image.open(image) for image in images] if images else None
-            prompt_tensor = self.processor.apply_chat_template(
-                messages,
-                tools=[get_tool_json_schema(tool) for tool in tools_to_call_from] if tools_to_call_from else None,
-                return_tensors="pt",
-                tokenize=True,
-                return_dict=True,
-                images=images,
-                add_generation_prompt=True if tools_to_call_from else False,
-            )
-        else:
-            prompt_tensor = self.tokenizer.apply_chat_template(
-                messages,
-                tools=[get_tool_json_schema(tool) for tool in tools_to_call_from] if tools_to_call_from else None,
-                return_tensors="pt",
-                return_dict=True,
-                add_generation_prompt=True if tools_to_call_from else False,
-            )
-
-        prompt_tensor = prompt_tensor.to(self.model.device)
-        count_prompt_tokens = prompt_tensor["input_ids"].shape[1]
-
-        if stop_sequences:
-            stopping_criteria = self.make_stopping_criteria(
-                stop_sequences, tokenizer=self.processor if hasattr(self, "processor") else self.tokenizer
-            )
-        else:
-            stopping_criteria = None
-
-        out = self.model.generate(
-            **prompt_tensor,
+        model_tokenizer = self.processor.tokenizer if hasattr(self, "processor") else self.tokenizer
+        stopping_criteria = (
+            self.make_stopping_criteria(stop_sequences, tokenizer=model_tokenizer) if stop_sequences else None
+        )
+        completion_kwargs["max_new_tokens"] = max_new_tokens
+        return dict(
+            inputs=prompt_tensor,
+            use_cache=True,
             stopping_criteria=stopping_criteria,
             **completion_kwargs,
         )
+
+    def generate(
+        self,
+        messages: list[dict[str, str | list[dict]]],
+        stop_sequences: list[str] | None = None,
+        grammar: str | None = None,
+        tools_to_call_from: list[Tool] | None = None,
+        **kwargs,
+    ) -> ChatMessage:
+        generation_kwargs = self._prepare_completion_args(
+            messages=messages,
+            stop_sequences=stop_sequences,
+            grammar=grammar,
+            tools_to_call_from=tools_to_call_from,
+            **kwargs,
+        )
+        count_prompt_tokens = generation_kwargs["inputs"].shape[1]  # type: ignore
+        out = self.model.generate(
+            **generation_kwargs,
+        )
         generated_tokens = out[0, count_prompt_tokens:]
         if hasattr(self, "processor"):
-            output = self.processor.decode(generated_tokens, skip_special_tokens=True)
+            output_text = self.processor.decode(generated_tokens, skip_special_tokens=True)
         else:
-            output = self.tokenizer.decode(generated_tokens, skip_special_tokens=True)
+            output_text = self.tokenizer.decode(generated_tokens, skip_special_tokens=True)
         self.last_input_token_count = count_prompt_tokens
         self.last_output_token_count = len(generated_tokens)
 
         if stop_sequences is not None:
-            output = remove_stop_sequences(output, stop_sequences)
+            output_text = remove_stop_sequences(output_text, stop_sequences)
+
+        return ChatMessage(
+            role=MessageRole.ASSISTANT,
+            content=output_text,
+            raw={
+                "out": output_text,
+                "completion_kwargs": {key: value for key, value in generation_kwargs.items() if key != "inputs"},
+            },
+        )
 
-        if tools_to_call_from is None:
-            return ChatMessage(
-                role="assistant",
-                content=output,
-                raw={"out": out, "completion_kwargs": completion_kwargs},
-            )
-        else:
-            if "Action:" in output:
-                output = output.split("Action:", 1)[1].strip()
-            try:
-                start_index = output.index("{")
-                end_index = output.rindex("}")
-                output = output[start_index : end_index + 1]
-            except Exception as e:
-                raise Exception("No json blob found in output!") from e
-
-            try:
-                parsed_output = json.loads(output)
-            except json.JSONDecodeError as e:
-                raise ValueError(f"Tool call '{output}' has an invalid JSON structure: {e}")
-            tool_name = parsed_output.get("name")
-            tool_arguments = parsed_output.get("arguments")
-            return ChatMessage(
-                role="assistant",
-                content="",
-                tool_calls=[
-                    ChatMessageToolCall(
-                        id="".join(random.choices("0123456789", k=5)),
-                        type="function",
-                        function=ChatMessageToolCallDefinition(name=tool_name, arguments=tool_arguments),
-                    )
-                ],
-                raw={"out": out, "completion_kwargs": completion_kwargs},
-            )
+    def generate_stream(
+        self,
+        messages: list[dict[str, str | list[dict]]],
+        stop_sequences: list[str] | None = None,
+        grammar: str | None = None,
+        tools_to_call_from: list[Tool] | None = None,
+        **kwargs,
+    ) -> Generator:
+        generation_kwargs = self._prepare_completion_args(
+            messages=messages,
+            stop_sequences=stop_sequences,
+            grammar=grammar,
+            tools_to_call_from=tools_to_call_from,
+            **kwargs,
+        )
+        count_prompt_tokens = generation_kwargs["inputs"].shape[1]  # type: ignore
+
+        thread = Thread(target=self.model.generate, kwargs={"streamer": self.streamer, **generation_kwargs})
+        thread.start()
+
+        self.last_output_token_count = 0
+
+        # Generate with streaming
+        for new_text in self.streamer:
+            yield CompletionDelta(content=new_text, tool_calls=None)
+            self.last_output_token_count += 1
 
+        self.last_input_token_count = count_prompt_tokens
+        thread.join()
+
+
+class ApiModel(Model):
+    """
+    Base class for API-based language models.
 
-class LiteLLMModel(Model):
-    """This model connects to [LiteLLM](https://www.litellm.ai/) as a gateway to hundreds of LLMs.
+    This class serves as a foundation for implementing models that interact with
+    external APIs. It handles the common functionality for managing model IDs,
+    custom role mappings, and API client connections.
+
+    Parameters:
+        model_id (`str`):
+            The identifier for the model to be used with the API.
+        custom_role_conversions (`dict[str, str`], **optional**):
+            Mapping to convert  between internal role names and API-specific role names. Defaults to None.
+        client (`Any`, **optional**):
+            Pre-configured API client instance. If not provided, a default client will be created. Defaults to None.
+        **kwargs: Additional keyword arguments to pass to the parent class.
+    """
+
+    def __init__(
+        self, model_id: str, custom_role_conversions: dict[str, str] | None = None, client: Any | None = None, **kwargs
+    ):
+        super().__init__(model_id=model_id, **kwargs)
+        self.custom_role_conversions = custom_role_conversions or {}
+        self.client = client or self.create_client()
+
+    def create_client(self):
+        """Create the API client for the specific service."""
+        raise NotImplementedError("Subclasses must implement this method to create a client")
+
+
+class LiteLLMModel(ApiModel):
+    """Model to use [LiteLLM Python SDK](https://docs.litellm.ai/docs/#litellm-python-sdk) to access hundreds of LLMs.
 
     Parameters:
         model_id (`str`):
             The model identifier to use on the server (e.g. "gpt-3.5-turbo").
         api_base (`str`, *optional*):
-            The base URL of the OpenAI-compatible API server.
+            The base URL of the provider API to call the model.
         api_key (`str`, *optional*):
             The API key to use for authentication.
         custom_role_conversions (`dict[str, str]`, *optional*):
             Custom role conversion mapping to convert message roles in others.
             Useful for specific models that do not support specific message roles like "system".
+        flatten_messages_as_text (`bool`, *optional*): Whether to flatten messages as text.
+            Defaults to `True` for models that start with "ollama", "groq", "cerebras".
         **kwargs:
             Additional keyword arguments to pass to the OpenAI API.
     """
 
     def __init__(
         self,
-        model_id: str = "anthropic/claude-3-5-sonnet-20240620",
-        api_base=None,
-        api_key=None,
-        custom_role_conversions: Optional[Dict[str, str]] = None,
+        model_id: str | None = None,
+        api_base: str | None = None,
+        api_key: str | None = None,
+        custom_role_conversions: dict[str, str] | None = None,
+        flatten_messages_as_text: bool | None = None,
         **kwargs,
     ):
-        super().__init__(**kwargs)
-        self.model_id = model_id
+        if not model_id:
+            warnings.warn(
+                "The 'model_id' parameter will be required in version 2.0.0. "
+                "Please update your code to pass this parameter to avoid future errors. "
+                "For now, it defaults to 'anthropic/claude-3-5-sonnet-20240620'.",
+                FutureWarning,
+            )
+            model_id = "anthropic/claude-3-5-sonnet-20240620"
         self.api_base = api_base
         self.api_key = api_key
-        self.custom_role_conversions = custom_role_conversions
-        self.flatten_messages_as_text = (
-            kwargs.get("flatten_messages_as_text")
-            if "flatten_messages_as_text" in kwargs
-            else self.model_id.startswith(("ollama", "groq", "cerebras"))
+        flatten_messages_as_text = (
+            flatten_messages_as_text
+            if flatten_messages_as_text is not None
+            else model_id.startswith(("ollama", "groq", "cerebras"))
+        )
+        super().__init__(
+            model_id=model_id,
+            custom_role_conversions=custom_role_conversions,
+            flatten_messages_as_text=flatten_messages_as_text,
+            **kwargs,
         )
 
-    def __call__(
-        self,
-        messages: List[Dict[str, str]],
-        stop_sequences: Optional[List[str]] = None,
-        grammar: Optional[str] = None,
-        tools_to_call_from: Optional[List[Tool]] = None,
-        **kwargs,
-    ) -> ChatMessage:
+    def create_client(self):
+        """Create the LiteLLM client."""
         try:
             import litellm
-        except ModuleNotFoundError:
+        except ModuleNotFoundError as e:
             raise ModuleNotFoundError(
                 "Please install 'litellm' extra to use LiteLLMModel: `pip install 'smolagents[litellm]'`"
-            )
+            ) from e
+
+        return litellm
 
+    def generate(
+        self,
+        messages: list[dict[str, str | list[dict]]],
+        stop_sequences: list[str] | None = None,
+        grammar: str | None = None,
+        tools_to_call_from: list[Tool] | None = None,
+        **kwargs,
+    ) -> ChatMessage:
         completion_kwargs = self._prepare_completion_kwargs(
             messages=messages,
             stop_sequences=stop_sequences,
@@ -877,26 +1024,301 @@ def __call__(
             api_base=self.api_base,
             api_key=self.api_key,
             convert_images_to_image_urls=True,
-            flatten_messages_as_text=self.flatten_messages_as_text,
             custom_role_conversions=self.custom_role_conversions,
             **kwargs,
         )
 
-        response = litellm.completion(**completion_kwargs)
+        response = self.client.completion(**completion_kwargs)
 
         self.last_input_token_count = response.usage.prompt_tokens
         self.last_output_token_count = response.usage.completion_tokens
-        message = ChatMessage.from_dict(
-            response.choices[0].message.model_dump(include={"role", "content", "tool_calls"})
+        return ChatMessage.from_dict(
+            response.choices[0].message.model_dump(include={"role", "content", "tool_calls"}),
+            raw=response,
         )
-        message.raw = response
 
-        if tools_to_call_from is not None:
-            return parse_tool_args_if_needed(message)
-        return message
+    def generate_stream(
+        self,
+        messages: list[dict[str, str | list[dict]]],
+        stop_sequences: list[str] | None = None,
+        grammar: str | None = None,
+        tools_to_call_from: list[Tool] | None = None,
+        **kwargs,
+    ) -> Generator:
+        if tools_to_call_from:
+            raise NotImplementedError("Streaming is not yet supported for tool calling")
+        completion_kwargs = self._prepare_completion_kwargs(
+            messages=messages,
+            stop_sequences=stop_sequences,
+            grammar=grammar,
+            tools_to_call_from=tools_to_call_from,
+            model=self.model_id,
+            custom_role_conversions=self.custom_role_conversions,
+            convert_images_to_image_urls=True,
+            **kwargs,
+        )
+        for event in self.client.completion(**completion_kwargs, stream=True, stream_options={"include_usage": True}):
+            if event.choices:
+                if event.choices[0].delta is None:
+                    if not getattr(event.choices[0], "finish_reason", None):
+                        raise ValueError(f"No content or tool calls in event: {event}")
+                else:
+                    yield CompletionDelta(
+                        content=event.choices[0].delta.content,
+                    )
+            if getattr(event, "usage", None):
+                self.last_input_token_count = event.usage.prompt_tokens
+                self.last_output_token_count = event.usage.completion_tokens
 
 
-class OpenAIServerModel(Model):
+class LiteLLMRouterModel(LiteLLMModel):
+    """Router‑based client for interacting with the [LiteLLM Python SDK Router](https://docs.litellm.ai/docs/routing).
+
+    This class provides a high-level interface for distributing requests among multiple language models using
+    the LiteLLM SDK's routing capabilities. It is responsible for initializing and configuring the router client,
+    applying custom role conversions, and managing message formatting to ensure seamless integration with various LLMs.
+
+    Parameters:
+        model_id (`str`):
+            Identifier for the model group to use from the model list (e.g., "model-group-1").
+        model_list (`list[dict[str, Any]]`):
+            Model configurations to be used for routing.
+            Each configuration should include the model group name and any necessary parameters.
+            For more details, refer to the [LiteLLM Routing](https://docs.litellm.ai/docs/routing#quick-start) documentation.
+        client_kwargs (`dict[str, Any]`, *optional*):
+            Additional configuration parameters for the Router client. For more details, see the
+            [LiteLLM Routing Configurations](https://docs.litellm.ai/docs/routing).
+        custom_role_conversions (`dict[str, str]`, *optional*):
+            Custom role conversion mapping to convert message roles in others.
+            Useful for specific models that do not support specific message roles like "system".
+        flatten_messages_as_text (`bool`, *optional*): Whether to flatten messages as text.
+            Defaults to `True` for models that start with "ollama", "groq", "cerebras".
+        **kwargs:
+            Additional keyword arguments to pass to the LiteLLM Router completion method.
+
+    Example:
+    ```python
+    >>> import os
+    >>> from smolagents import CodeAgent, DuckDuckGoSearchTool, LiteLLMRouterModel
+    >>> os.environ["OPENAI_API_KEY"] = ""
+    >>> os.environ["AWS_ACCESS_KEY_ID"] = ""
+    >>> os.environ["AWS_SECRET_ACCESS_KEY"] = ""
+    >>> os.environ["AWS_REGION"] = ""
+    >>> llm_loadbalancer_model_list = [
+    ...     {
+    ...         "model_name": "model-group-1",
+    ...         "litellm_params": {
+    ...             "model": "gpt-4o-mini",
+    ...             "api_key": os.getenv("OPENAI_API_KEY"),
+    ...         },
+    ...     },
+    ...     {
+    ...         "model_name": "model-group-1",
+    ...         "litellm_params": {
+    ...             "model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
+    ...             "aws_access_key_id": os.getenv("AWS_ACCESS_KEY_ID"),
+    ...             "aws_secret_access_key": os.getenv("AWS_SECRET_ACCESS_KEY"),
+    ...             "aws_region_name": os.getenv("AWS_REGION"),
+    ...         },
+    ...     },
+    >>> ]
+    >>> model = LiteLLMRouterModel(
+    ...    model_id="model-group-1",
+    ...    model_list=llm_loadbalancer_model_list,
+    ...    client_kwargs={
+    ...        "routing_strategy":"simple-shuffle"
+    ...    }
+    >>> )
+    >>> agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model)
+    >>> agent.run("How many seconds would it take for a leopard at full speed to run through Pont des Arts?")
+    ```
+    """
+
+    def __init__(
+        self,
+        model_id: str,
+        model_list: list[dict[str, Any]],
+        client_kwargs: dict[str, Any] | None = None,
+        custom_role_conversions: dict[str, str] | None = None,
+        flatten_messages_as_text: bool | None = None,
+        **kwargs,
+    ):
+        self.client_kwargs = {
+            "model_list": model_list,
+            **(client_kwargs or {}),
+        }
+        super().__init__(
+            model_id=model_id,
+            custom_role_conversions=custom_role_conversions,
+            flatten_messages_as_text=flatten_messages_as_text,
+            **kwargs,
+        )
+
+    def create_client(self):
+        try:
+            from litellm import Router
+        except ModuleNotFoundError as e:
+            raise ModuleNotFoundError(
+                "Please install 'litellm' extra to use LiteLLMRouterModel: `pip install 'smolagents[litellm]'`"
+            ) from e
+        return Router(**self.client_kwargs)
+
+
+class InferenceClientModel(ApiModel):
+    """A class to interact with Hugging Face's Inference Providers for language model interaction.
+
+    This model allows you to communicate with Hugging Face's models using Inference Providers. It can be used in both serverless mode or with a dedicated endpoint, supporting features like stop sequences and grammar customization.
+
+    Providers include Cerebras, Cohere, Fal, Fireworks, HF-Inference, Hyperbolic, Nebius, Novita, Replicate, SambaNova, Together, and more.
+
+    Parameters:
+        model_id (`str`, *optional*, default `"Qwen/Qwen2.5-Coder-32B-Instruct"`):
+            The Hugging Face model ID to be used for inference.
+            This can be a model identifier from the Hugging Face model hub or a URL to a deployed Inference Endpoint.
+            Currently, it defaults to `"Qwen/Qwen2.5-Coder-32B-Instruct"`, but this may change in the future.
+        provider (`str`, *optional*):
+            Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"cohere"`, `"fal-ai"`, `"fireworks-ai"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"openai"`, `"replicate"`, "sambanova"`, `"together"`, etc.
+            Currently, it defaults to hf-inference (HF Inference API).
+        token (`str`, *optional*):
+            Token used by the Hugging Face API for authentication. This token need to be authorized 'Make calls to the serverless Inference Providers'.
+            If the model is gated (like Llama-3 models), the token also needs 'Read access to contents of all public gated repos you can access'.
+            If not provided, the class will try to use environment variable 'HF_TOKEN', else use the token stored in the Hugging Face CLI configuration.
+        timeout (`int`, *optional*, defaults to 120):
+            Timeout for the API request, in seconds.
+        client_kwargs (`dict[str, Any]`, *optional*):
+            Additional keyword arguments to pass to the Hugging Face InferenceClient.
+        custom_role_conversions (`dict[str, str]`, *optional*):
+            Custom role conversion mapping to convert message roles in others.
+            Useful for specific models that do not support specific message roles like "system".
+        api_key (`str`, *optional*):
+            Token to use for authentication. This is a duplicated argument from `token` to make [`InferenceClientModel`]
+            follow the same pattern as `openai.OpenAI` client. Cannot be used if `token` is set. Defaults to None.
+        **kwargs:
+            Additional keyword arguments to pass to the Hugging Face API.
+
+    Raises:
+        ValueError:
+            If the model name is not provided.
+
+    Example:
+    ```python
+    >>> engine = InferenceClientModel(
+    ...     model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
+    ...     provider="together",
+    ...     token="your_hf_token_here",
+    ...     max_tokens=5000,
+    ... )
+    >>> messages = [{"role": "user", "content": "Explain quantum mechanics in simple terms."}]
+    >>> response = engine(messages, stop_sequences=["END"])
+    >>> print(response)
+    "Quantum mechanics is the branch of physics that studies..."
+    ```
+    """
+
+    def __init__(
+        self,
+        model_id: str = "Qwen/Qwen2.5-Coder-32B-Instruct",
+        provider: str | None = None,
+        token: str | None = None,
+        timeout: int = 120,
+        client_kwargs: dict[str, Any] | None = None,
+        custom_role_conversions: dict[str, str] | None = None,
+        api_key: str | None = None,
+        **kwargs,
+    ):
+        if token is not None and api_key is not None:
+            raise ValueError(
+                "Received both `token` and `api_key` arguments. Please provide only one of them."
+                " `api_key` is an alias for `token` to make the API compatible with OpenAI's client."
+                " It has the exact same behavior as `token`."
+            )
+        token = token if token is not None else api_key
+        if token is None:
+            token = os.getenv("HF_TOKEN")
+        self.client_kwargs = {
+            **(client_kwargs or {}),
+            "model": model_id,
+            "provider": provider,
+            "token": token,
+            "timeout": timeout,
+        }
+        super().__init__(model_id=model_id, custom_role_conversions=custom_role_conversions, **kwargs)
+
+    def create_client(self):
+        """Create the Hugging Face client."""
+        from huggingface_hub import InferenceClient
+
+        return InferenceClient(**self.client_kwargs)
+
+    def generate(
+        self,
+        messages: list[dict[str, str | list[dict]]],
+        stop_sequences: list[str] | None = None,
+        grammar: str | None = None,
+        tools_to_call_from: list[Tool] | None = None,
+        **kwargs,
+    ) -> ChatMessage:
+        completion_kwargs = self._prepare_completion_kwargs(
+            messages=messages,
+            stop_sequences=stop_sequences,
+            grammar=grammar,
+            tools_to_call_from=tools_to_call_from,
+            convert_images_to_image_urls=True,
+            custom_role_conversions=self.custom_role_conversions,
+            **kwargs,
+        )
+        response = self.client.chat_completion(**completion_kwargs)
+
+        self.last_input_token_count = response.usage.prompt_tokens
+        self.last_output_token_count = response.usage.completion_tokens
+        return ChatMessage.from_dict(asdict(response.choices[0].message), raw=response)
+
+    def generate_stream(
+        self,
+        messages: list[dict[str, str | list[dict]]],
+        stop_sequences: list[str] | None = None,
+        grammar: str | None = None,
+        tools_to_call_from: list[Tool] | None = None,
+        **kwargs,
+    ) -> Generator:
+        if tools_to_call_from:
+            raise NotImplementedError("Streaming is not yet supported for tool calling")
+        completion_kwargs = self._prepare_completion_kwargs(
+            messages=messages,
+            stop_sequences=stop_sequences,
+            grammar=grammar,
+            tools_to_call_from=tools_to_call_from,
+            model=self.model_id,
+            custom_role_conversions=self.custom_role_conversions,
+            convert_images_to_image_urls=True,
+            **kwargs,
+        )
+        for event in self.client.chat.completions.create(
+            **completion_kwargs, stream=True, stream_options={"include_usage": True}
+        ):
+            if event.choices:
+                if event.choices[0].delta is None:
+                    if not getattr(event.choices[0], "finish_reason", None):
+                        raise ValueError(f"No content or tool calls in event: {event}")
+                else:
+                    yield CompletionDelta(
+                        content=event.choices[0].delta.content,
+                    )
+            if getattr(event, "usage", None):
+                self.last_input_token_count = event.usage.prompt_tokens
+                self.last_output_token_count = event.usage.completion_tokens
+
+
+class HfApiModel(InferenceClientModel):
+    def __new__(cls, *args, **kwargs):
+        warnings.warn(
+            "HfApiModel has been renamed to InferenceClientModel to more closely follow the name of the underlying Inference library.",
+            DeprecationWarning,
+        )
+        return super().__new__(cls)
+
+
+class OpenAIServerModel(ApiModel):
     """This model connects to an OpenAI-compatible API server.
 
     Parameters:
@@ -915,6 +1337,8 @@ class OpenAIServerModel(Model):
         custom_role_conversions (`dict[str, str]`, *optional*):
             Custom role conversion mapping to convert message roles in others.
             Useful for specific models that do not support specific message roles like "system".
+        flatten_messages_as_text (`bool`, default `False`):
+            Whether to flatten messages as text.
         **kwargs:
             Additional keyword arguments to pass to the OpenAI API.
     """
@@ -922,38 +1346,80 @@ class OpenAIServerModel(Model):
     def __init__(
         self,
         model_id: str,
-        api_base: Optional[str] = None,
-        api_key: Optional[str] = None,
-        organization: Optional[str] | None = None,
-        project: Optional[str] | None = None,
-        client_kwargs: Optional[Dict[str, Any]] = None,
-        custom_role_conversions: Optional[Dict[str, str]] = None,
+        api_base: str | None = None,
+        api_key: str | None = None,
+        organization: str | None = None,
+        project: str | None = None,
+        client_kwargs: dict[str, Any] | None = None,
+        custom_role_conversions: dict[str, str] | None = None,
+        flatten_messages_as_text: bool = False,
         **kwargs,
     ):
+        self.client_kwargs = {
+            **(client_kwargs or {}),
+            "api_key": api_key,
+            "base_url": api_base,
+            "organization": organization,
+            "project": project,
+        }
+        super().__init__(
+            model_id=model_id,
+            custom_role_conversions=custom_role_conversions,
+            flatten_messages_as_text=flatten_messages_as_text,
+            **kwargs,
+        )
+
+    def create_client(self):
         try:
             import openai
-        except ModuleNotFoundError:
+        except ModuleNotFoundError as e:
             raise ModuleNotFoundError(
                 "Please install 'openai' extra to use OpenAIServerModel: `pip install 'smolagents[openai]'`"
-            ) from None
+            ) from e
 
-        super().__init__(**kwargs)
-        self.model_id = model_id
-        self.client = openai.OpenAI(
-            base_url=api_base,
-            api_key=api_key,
-            organization=organization,
-            project=project,
-            **(client_kwargs or {}),
+        return openai.OpenAI(**self.client_kwargs)
+
+    def generate_stream(
+        self,
+        messages: list[dict[str, str | list[dict]]],
+        stop_sequences: list[str] | None = None,
+        grammar: str | None = None,
+        tools_to_call_from: list[Tool] | None = None,
+        **kwargs,
+    ) -> Generator:
+        if tools_to_call_from:
+            raise NotImplementedError("Streaming is not yet supported for tool calling")
+        completion_kwargs = self._prepare_completion_kwargs(
+            messages=messages,
+            stop_sequences=stop_sequences,
+            grammar=grammar,
+            tools_to_call_from=tools_to_call_from,
+            model=self.model_id,
+            custom_role_conversions=self.custom_role_conversions,
+            convert_images_to_image_urls=True,
+            **kwargs,
         )
-        self.custom_role_conversions = custom_role_conversions
+        for event in self.client.chat.completions.create(
+            **completion_kwargs, stream=True, stream_options={"include_usage": True}
+        ):
+            if event.choices:
+                if event.choices[0].delta is None:
+                    if not getattr(event.choices[0], "finish_reason", None):
+                        raise ValueError(f"No content or tool calls in event: {event}")
+                else:
+                    yield CompletionDelta(
+                        content=event.choices[0].delta.content,
+                    )
+            if getattr(event, "usage", None):
+                self.last_input_token_count = event.usage.prompt_tokens
+                self.last_output_token_count = event.usage.completion_tokens
 
-    def __call__(
+    def generate(
         self,
-        messages: List[Dict[str, str]],
-        stop_sequences: Optional[List[str]] = None,
-        grammar: Optional[str] = None,
-        tools_to_call_from: Optional[List[Tool]] = None,
+        messages: list[dict[str, str | list[dict]]],
+        stop_sequences: list[str] | None = None,
+        grammar: str | None = None,
+        tools_to_call_from: list[Tool] | None = None,
         **kwargs,
     ) -> ChatMessage:
         completion_kwargs = self._prepare_completion_kwargs(
@@ -970,13 +1436,10 @@ def __call__(
         self.last_input_token_count = response.usage.prompt_tokens
         self.last_output_token_count = response.usage.completion_tokens
 
-        message = ChatMessage.from_dict(
-            response.choices[0].message.model_dump(include={"role", "content", "tool_calls"})
+        return ChatMessage.from_dict(
+            response.choices[0].message.model_dump(include={"role", "content", "tool_calls"}),
+            raw=response,
         )
-        message.raw = response
-        if tools_to_call_from is not None:
-            return parse_tool_args_if_needed(message)
-        return message
 
 
 class AzureOpenAIServerModel(OpenAIServerModel):
@@ -991,6 +1454,8 @@ class AzureOpenAIServerModel(OpenAIServerModel):
             The API key to use for authentication. If not provided, it will be inferred from the `AZURE_OPENAI_API_KEY` environment variable.
         api_version (`str`, *optional*):
             The API version to use. If not provided, it will be inferred from the `OPENAI_API_VERSION` environment variable.
+        client_kwargs (`dict[str, Any]`, *optional*):
+            Additional keyword arguments to pass to the AzureOpenAI client (like organization, project, max_retries etc.).
         custom_role_conversions (`dict[str, str]`, *optional*):
             Custom role conversion mapping to convert message roles in others.
             Useful for specific models that do not support specific message roles like "system".
@@ -1001,21 +1466,207 @@ class AzureOpenAIServerModel(OpenAIServerModel):
     def __init__(
         self,
         model_id: str,
-        azure_endpoint: Optional[str] = None,
-        api_key: Optional[str] = None,
-        api_version: Optional[str] = None,
-        custom_role_conversions: Optional[Dict[str, str]] = None,
+        azure_endpoint: str | None = None,
+        api_key: str | None = None,
+        api_version: str | None = None,
+        client_kwargs: dict[str, Any] | None = None,
+        custom_role_conversions: dict[str, str] | None = None,
+        **kwargs,
+    ):
+        client_kwargs = client_kwargs or {}
+        client_kwargs.update(
+            {
+                "api_version": api_version,
+                "azure_endpoint": azure_endpoint,
+            }
+        )
+        super().__init__(
+            model_id=model_id,
+            api_key=api_key,
+            client_kwargs=client_kwargs,
+            custom_role_conversions=custom_role_conversions,
+            **kwargs,
+        )
+
+    def create_client(self):
+        try:
+            import openai
+        except ModuleNotFoundError as e:
+            raise ModuleNotFoundError(
+                "Please install 'openai' extra to use AzureOpenAIServerModel: `pip install 'smolagents[openai]'`"
+            ) from e
+
+        return openai.AzureOpenAI(**self.client_kwargs)
+
+
+class AmazonBedrockServerModel(ApiModel):
+    """
+    A model class for interacting with Amazon Bedrock Server models through the Bedrock API.
+
+    This class provides an interface to interact with various Bedrock language models,
+    allowing for customized model inference, guardrail configuration, message handling,
+    and other parameters allowed by boto3 API.
+
+    Parameters:
+        model_id (`str`):
+            The model identifier to use on Bedrock (e.g. "us.amazon.nova-pro-v1:0").
+        client (`boto3.client`, *optional*):
+            A custom boto3 client for AWS interactions. If not provided, a default client will be created.
+        client_kwargs (dict[str, Any], *optional*):
+            Keyword arguments used to configure the boto3 client if it needs to be created internally.
+            Examples include `region_name`, `config`, or `endpoint_url`.
+        custom_role_conversions (`dict[str, str]`, *optional*):
+            Custom role conversion mapping to convert message roles in others.
+            Useful for specific models that do not support specific message roles like "system".
+            Defaults to converting all roles to "user" role to enable using all the Bedrock models.
+        flatten_messages_as_text (`bool`, default `False`):
+            Whether to flatten messages as text.
+        **kwargs
+            Additional keyword arguments passed directly to the underlying API calls.
+
+    Example:
+        Creating a model instance with default settings:
+        >>> bedrock_model = AmazonBedrockServerModel(
+        ...     model_id='us.amazon.nova-pro-v1:0'
+        ... )
+
+        Creating a model instance with a custom boto3 client:
+        >>> import boto3
+        >>> client = boto3.client('bedrock-runtime', region_name='us-west-2')
+        >>> bedrock_model = AmazonBedrockServerModel(
+        ...     model_id='us.amazon.nova-pro-v1:0',
+        ...     client=client
+        ... )
+
+        Creating a model instance with client_kwargs for internal client creation:
+        >>> bedrock_model = AmazonBedrockServerModel(
+        ...     model_id='us.amazon.nova-pro-v1:0',
+        ...     client_kwargs={'region_name': 'us-west-2', 'endpoint_url': 'https://custom-endpoint.com'}
+        ... )
+
+        Creating a model instance with inference and guardrail configurations:
+        >>> additional_api_config = {
+        ...     "inferenceConfig": {
+        ...         "maxTokens": 3000
+        ...     },
+        ...     "guardrailConfig": {
+        ...         "guardrailIdentifier": "identify1",
+        ...         "guardrailVersion": 'v1'
+        ...     },
+        ... }
+        >>> bedrock_model = AmazonBedrockServerModel(
+        ...     model_id='anthropic.claude-3-haiku-20240307-v1:0',
+        ...     **additional_api_config
+        ... )
+    """
+
+    def __init__(
+        self,
+        model_id: str,
+        client=None,
+        client_kwargs: dict[str, Any] | None = None,
+        custom_role_conversions: dict[str, str] | None = None,
         **kwargs,
     ):
-        # read the api key manually, to avoid super().__init__() trying to use the wrong api_key (OPENAI_API_KEY)
-        if api_key is None:
-            api_key = os.environ.get("AZURE_OPENAI_API_KEY")
+        self.client_kwargs = client_kwargs or {}
+
+        # Bedrock only supports `assistant` and `user` roles.
+        # Many Bedrock models do not allow conversations to start with the `assistant` role, so the default is set to `user/user`.
+        # This parameter is retained for future model implementations and extended support.
+        custom_role_conversions = custom_role_conversions or {
+            MessageRole.SYSTEM: MessageRole.USER,
+            MessageRole.ASSISTANT: MessageRole.USER,
+            MessageRole.TOOL_CALL: MessageRole.USER,
+            MessageRole.TOOL_RESPONSE: MessageRole.USER,
+        }
+
+        super().__init__(
+            model_id=model_id,
+            custom_role_conversions=custom_role_conversions,
+            flatten_messages_as_text=False,  # Bedrock API doesn't support flatten messages, must be a list of messages
+            client=client,
+            **kwargs,
+        )
+
+    def _prepare_completion_kwargs(
+        self,
+        messages: list[dict[str, str | list[dict]]],
+        stop_sequences: list[str] | None = None,
+        grammar: str | None = None,
+        tools_to_call_from: list[Tool] | None = None,
+        custom_role_conversions: dict[str, str] | None = None,
+        convert_images_to_image_urls: bool = False,
+        **kwargs,
+    ) -> dict:
+        """
+        Overrides the base method to handle Bedrock-specific configurations.
+
+        This implementation adapts the completion keyword arguments to align with
+        Bedrock's requirements, ensuring compatibility with its unique setup and
+        constraints.
+        """
+        completion_kwargs = super()._prepare_completion_kwargs(
+            messages=messages,
+            stop_sequences=None,  # Bedrock support stop_sequence using Inference Config
+            grammar=None,  # Bedrock doesn't support grammar
+            tools_to_call_from=tools_to_call_from,
+            custom_role_conversions=custom_role_conversions,
+            convert_images_to_image_urls=convert_images_to_image_urls,
+            **kwargs,
+        )
+
+        # Not all models in Bedrock support `toolConfig`. Also, smolagents already include the tool call in the prompt,
+        # so adding `toolConfig` could cause conflicts. We remove it to avoid issues.
+        completion_kwargs.pop("toolConfig", None)
+
+        # The Bedrock API does not support the `type` key in requests.
+        # This block of code modifies the object to meet Bedrock's requirements.
+        for message in completion_kwargs.get("messages", []):
+            for content in message.get("content", []):
+                if "type" in content:
+                    del content["type"]
+
+        return {
+            "modelId": self.model_id,
+            **completion_kwargs,
+        }
+
+    def create_client(self):
+        try:
+            import boto3  # type: ignore
+        except ModuleNotFoundError as e:
+            raise ModuleNotFoundError(
+                "Please install 'bedrock' extra to use AmazonBedrockServerModel: `pip install 'smolagents[bedrock]'`"
+            ) from e
+
+        return boto3.client("bedrock-runtime", **self.client_kwargs)
+
+    def generate(
+        self,
+        messages: list[dict[str, str | list[dict]]],
+        stop_sequences: list[str] | None = None,
+        grammar: str | None = None,
+        tools_to_call_from: list[Tool] | None = None,
+        **kwargs,
+    ) -> ChatMessage:
+        completion_kwargs: dict = self._prepare_completion_kwargs(
+            messages=messages,
+            tools_to_call_from=tools_to_call_from,
+            custom_role_conversions=self.custom_role_conversions,
+            convert_images_to_image_urls=True,
+            **kwargs,
+        )
+
+        # self.client is created in ApiModel class
+        response = self.client.converse(**completion_kwargs)
 
-        super().__init__(model_id=model_id, api_key=api_key, custom_role_conversions=custom_role_conversions, **kwargs)
-        # if we've reached this point, it means the openai package is available (checked in baseclass) so go ahead and import it
-        import openai
+        # Get usage
+        self.last_input_token_count = response["usage"]["inputTokens"]
+        self.last_output_token_count = response["usage"]["outputTokens"]
 
-        self.client = openai.AzureOpenAI(api_key=api_key, api_version=api_version, azure_endpoint=azure_endpoint)
+        # Get first message
+        response["output"]["message"]["content"] = response["output"]["message"]["content"][0]["text"]
+        return ChatMessage.from_dict(response["output"]["message"], raw=response)
 
 
 __all__ = [
@@ -1025,9 +1676,14 @@ def __init__(
     "Model",
     "MLXModel",
     "TransformersModel",
+    "ApiModel",
+    "InferenceClientModel",
     "HfApiModel",
     "LiteLLMModel",
+    "LiteLLMRouterModel",
     "OpenAIServerModel",
+    "VLLMModel",
     "AzureOpenAIServerModel",
+    "AmazonBedrockServerModel",
     "ChatMessage",
 ]
diff --git a/src/smolagents/monitoring.py b/src/smolagents/monitoring.py
index d7deb4403..0d827a95e 100644
--- a/src/smolagents/monitoring.py
+++ b/src/smolagents/monitoring.py
@@ -16,7 +16,6 @@
 # limitations under the License.
 import json
 from enum import IntEnum
-from typing import List, Optional
 
 from rich import box
 from rich.console import Console, Group
@@ -27,6 +26,8 @@
 from rich.text import Text
 from rich.tree import Tree
 
+from smolagents.utils import escape_code_brackets
+
 
 __all__ = ["AgentLogger", "LogLevel", "Monitor"]
 
@@ -82,11 +83,14 @@ class LogLevel(IntEnum):
 
 
 class AgentLogger:
-    def __init__(self, level: LogLevel = LogLevel.INFO):
+    def __init__(self, level: LogLevel = LogLevel.INFO, console: Console | None = None):
         self.level = level
-        self.console = Console()
+        if console is None:
+            self.console = Console()
+        else:
+            self.console = console
 
-    def log(self, *args, level: str | LogLevel = LogLevel.INFO, **kwargs) -> None:
+    def log(self, *args, level: int | str | LogLevel = LogLevel.INFO, **kwargs) -> None:
         """Logs a message to the console.
 
         Args:
@@ -97,7 +101,10 @@ def log(self, *args, level: str | LogLevel = LogLevel.INFO, **kwargs) -> None:
         if level <= self.level:
             self.console.print(*args, **kwargs)
 
-    def log_markdown(self, content: str, title: Optional[str] = None, level=LogLevel.INFO, style=YELLOW_HEX) -> None:
+    def log_error(self, error_message: str) -> None:
+        self.log(escape_code_brackets(error_message), style="bold red", level=LogLevel.ERROR)
+
+    def log_markdown(self, content: str, title: str | None = None, level=LogLevel.INFO, style=YELLOW_HEX) -> None:
         markdown_content = Syntax(
             content,
             lexer="markdown",
@@ -145,10 +152,10 @@ def log_rule(self, title: str, level: int = LogLevel.INFO) -> None:
             level=LogLevel.INFO,
         )
 
-    def log_task(self, content: str, subtitle: str, title: Optional[str] = None, level: int = LogLevel.INFO) -> None:
+    def log_task(self, content: str, subtitle: str, title: str | None = None, level: LogLevel = LogLevel.INFO) -> None:
         self.log(
             Panel(
-                f"\n[bold]{content}\n",
+                f"\n[bold]{escape_code_brackets(content)}\n",
                 title="[bold]New run" + (f" - {title}" if title else ""),
                 subtitle=subtitle,
                 border_style=YELLOW_HEX,
@@ -157,7 +164,7 @@ def log_task(self, content: str, subtitle: str, title: Optional[str] = None, lev
             level=level,
         )
 
-    def log_messages(self, messages: List) -> None:
+    def log_messages(self, messages: list[dict], level: LogLevel = LogLevel.DEBUG) -> None:
         messages_as_string = "\n".join([json.dumps(dict(message), indent=4) for message in messages])
         self.log(
             Syntax(
@@ -165,7 +172,8 @@ def log_messages(self, messages: List) -> None:
                 lexer="markdown",
                 theme="github-dark",
                 word_wrap=True,
-            )
+            ),
+            level=level,
         )
 
     def visualize_agent_tree(self, agent):
@@ -184,7 +192,7 @@ def create_tools_section(tools_dict):
 
             return Group("🛠️ [italic #1E90FF]Tools:[/italic #1E90FF]", table)
 
-        def get_agent_headline(agent, name: Optional[str] = None):
+        def get_agent_headline(agent, name: str | None = None):
             name_headline = f"{name} | " if name else ""
             return f"[bold {YELLOW_HEX}]{name_headline}{agent.__class__.__name__} | {agent.model.model_id}"
 
diff --git a/src/smolagents/prompts/code_agent.yaml b/src/smolagents/prompts/code_agent.yaml
index b7388e207..29294601a 100644
--- a/src/smolagents/prompts/code_agent.yaml
+++ b/src/smolagents/prompts/code_agent.yaml
@@ -141,22 +141,31 @@ system_prompt: |-
   final_answer(pope_current_age)
   ```<end_code>
 
-  Above example were using notional tools that might not exist for you. On top of performing computations in the Python code snippets that you create, you only have access to these tools:
+  Above example were using notional tools that might not exist for you. On top of performing computations in the Python code snippets that you create, you only have access to these tools, behaving like regular python functions:
+  ```python
   {%- for tool in tools.values() %}
-  - {{ tool.name }}: {{ tool.description }}
-      Takes inputs: {{tool.inputs}}
-      Returns an output of type: {{tool.output_type}}
-  {%- endfor %}
+  def {{ tool.name }}({% for arg_name, arg_info in tool.inputs.items() %}{{ arg_name }}: {{ arg_info.type }}{% if not loop.last %}, {% endif %}{% endfor %}) -> {{tool.output_type}}:
+      """{{ tool.description }}
+
+      Args:
+      {%- for arg_name, arg_info in tool.inputs.items() %}
+          {{ arg_name }}: {{ arg_info.description }}
+      {%- endfor %}
+      """
+  {% endfor %}
+  ```
 
   {%- if managed_agents and managed_agents.values() | list %}
   You can also give tasks to team members.
-  Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task', a long string explaining your task.
-  Given that this team member is a real human, you should be very verbose in your task.
+  Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task'.
+  Given that this team member is a real human, you should be very verbose in your task, it should be a long string providing informations as detailed as necessary.
   Here is a list of the team members that you can call:
+  ```python
   {%- for agent in managed_agents.values() %}
-  - {{ agent.name }}: {{ agent.description }}
-  {%- endfor %}
-  {%- else %}
+  def {{ agent.name }}("Your query goes here.") -> str:
+      """{{ agent.description }}"""
+  {% endfor %}
+  ```
   {%- endif %}
 
   Here are the rules you should always follow to solve your task:
@@ -171,140 +180,123 @@ system_prompt: |-
   9. The state persists between code executions: so if in one step you've created variables or imported modules, these will all persist.
   10. Don't give up! You're in charge of solving the task, not providing directions to solve it.
 
-  Now Begin! If you solve the task correctly, you will receive a reward of $1,000,000.
+  Now Begin!
 planning:
-  initial_facts: |-
-    Below I will present you a task.
-
-    You will now build a comprehensive preparatory survey of which facts we have at our disposal and which ones we still need.
-    To do so, you will have to read the task and identify things that must be discovered in order to successfully complete it.
-    Don't make any assumptions. For each item, provide a thorough reasoning. Here is how you will structure this survey:
+  initial_plan : |-
+    You are a world expert at analyzing a situation to derive facts, and plan accordingly towards solving a task.
+    Below I will present you a task. You will need to 1. build a survey of facts known or needed to solve the task, then 2. make a plan of action to solve the task.
 
-    ---
-    ### 1. Facts given in the task
+    ## 1. Facts survey
+    You will build a comprehensive preparatory survey of which facts we have at our disposal and which ones we still need.
+    These "facts" will typically be specific names, dates, values, etc. Your answer should use the below headings:
+    ### 1.1. Facts given in the task
     List here the specific facts given in the task that could help you (there might be nothing here).
 
-    ### 2. Facts to look up
+    ### 1.2. Facts to look up
     List here any facts that we may need to look up.
     Also list where to find each of these, for instance a website, a file... - maybe the task contains some sources that you should re-use here.
 
-    ### 3. Facts to derive
+    ### 1.3. Facts to derive
     List here anything that we want to derive from the above by logical reasoning, for instance computation or simulation.
 
-    Keep in mind that "facts" will typically be specific names, dates, values, etc. Your answer should use the below headings:
-    ### 1. Facts given in the task
-    ### 2. Facts to look up
-    ### 3. Facts to derive
-    Do not add anything else.
+    Don't make any assumptions. For each item, provide a thorough reasoning. Do not add anything else on top of three headings above.
 
-    Here is the task:
-    ```
-    {{task}}
-    ```
-    Now begin!
-  initial_plan : |-
-    You are a world expert at making efficient plans to solve any task using a set of carefully crafted tools.
-
-    Now for the given task, develop a step-by-step high-level plan taking into account the above inputs and list of facts.
+    ## 2. Plan
+    Then for the given task, develop a step-by-step high-level plan taking into account the above inputs and list of facts.
     This plan should involve individual tasks based on the available tools, that if executed correctly will yield the correct answer.
     Do not skip steps, do not add any superfluous steps. Only write the high-level plan, DO NOT DETAIL INDIVIDUAL TOOL CALLS.
     After writing the final step of the plan, write the '\n<end_plan>' tag and stop there.
 
-    Here is your task:
-
-    Task:
-    ```
-    {{task}}
-    ```
-    You can leverage these tools:
+    You can leverage these tools, behaving like regular python functions:
+    ```python
     {%- for tool in tools.values() %}
-    - {{ tool.name }}: {{ tool.description }}
-        Takes inputs: {{tool.inputs}}
-        Returns an output of type: {{tool.output_type}}
-    {%- endfor %}
+    def {{ tool.name }}({% for arg_name, arg_info in tool.inputs.items() %}{{ arg_name }}: {{ arg_info.type }}{% if not loop.last %}, {% endif %}{% endfor %}) -> {{tool.output_type}}:
+        """{{ tool.description }}
+
+        Args:
+        {%- for arg_name, arg_info in tool.inputs.items() %}
+            {{ arg_name }}: {{ arg_info.description }}
+        {%- endfor %}
+        """
+    {% endfor %}
+    ```
 
     {%- if managed_agents and managed_agents.values() | list %}
     You can also give tasks to team members.
-    Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task', a long string explaining your task.
-    Given that this team member is a real human, you should be very verbose in your task.
+    Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task'.
+    Given that this team member is a real human, you should be very verbose in your task, it should be a long string providing informations as detailed as necessary.
     Here is a list of the team members that you can call:
+    ```python
     {%- for agent in managed_agents.values() %}
-    - {{ agent.name }}: {{ agent.description }}
-    {%- endfor %}
-    {%- else %}
+    def {{ agent.name }}("Your query goes here.") -> str:
+        """{{ agent.description }}"""
+    {% endfor %}
+    ```
     {%- endif %}
 
-    List of facts that you know:
+    ---
+    Now begin! Here is your task:
     ```
-    {{answer_facts}}
+    {{task}}
     ```
-
-    Now begin! Write your plan below.
-  update_facts_pre_messages: |-
-    You are a world expert at gathering known and unknown facts based on a conversation.
-    Below you will find a task, and a history of attempts made to solve the task. You will have to produce a list of these:
-    ### 1. Facts given in the task
-    ### 2. Facts that we have learned
-    ### 3. Facts still to look up
-    ### 4. Facts still to derive
-    Find the task and history below:
-  update_facts_post_messages: |-
-    Earlier we've built a list of facts.
-    But since in your previous steps you may have learned useful new facts or invalidated some false ones.
-    Please update your list of facts based on the previous history, and provide these headings:
-    ### 1. Facts given in the task
-    ### 2. Facts that we have learned
-    ### 3. Facts still to look up
-    ### 4. Facts still to derive
-
-    Now write your new list of facts below.
+    First in part 1, write the facts survey, then in part 2, write your plan.
   update_plan_pre_messages: |-
-    You are a world expert at making efficient plans to solve any task using a set of carefully crafted tools.
-
-    You have been given a task:
+    You are a world expert at analyzing a situation, and plan accordingly towards solving a task.
+    You have been given the following task:
     ```
     {{task}}
     ```
-
-    Find below the record of what has been tried so far to solve it. Then you will be asked to make an updated plan to solve the task.
-    If the previous tries so far have met some success, you can make an updated plan based on these actions.
+  
+    Below you will find a history of attempts made to solve this task.
+    You will first have to produce a survey of known and unknown facts, then propose a step-by-step high-level plan to solve the task.
+    If the previous tries so far have met some success, your updated plan can build on these results.
     If you are stalled, you can make a completely new plan starting from scratch.
+
+    Find the task and history below:
   update_plan_post_messages: |-
-    You're still working towards solving this task:
-    ```
-    {{task}}
-    ```
+    Now write your updated facts below, taking into account the above history:
+    ## 1. Updated facts survey
+    ### 1.1. Facts given in the task
+    ### 1.2. Facts that we have learned
+    ### 1.3. Facts still to look up
+    ### 1.4. Facts still to derive
+  
+    Then write a step-by-step high-level plan to solve the task above.
+    ## 2. Plan
+    ### 2. 1. ...
+    Etc.
+    This plan should involve individual tasks based on the available tools, that if executed correctly will yield the correct answer.
+    Beware that you have {remaining_steps} steps remaining.
+    Do not skip steps, do not add any superfluous steps. Only write the high-level plan, DO NOT DETAIL INDIVIDUAL TOOL CALLS.
+    After writing the final step of the plan, write the '\n<end_plan>' tag and stop there.
 
-    You can leverage these tools:
+    You can leverage these tools, behaving like regular python functions:
+    ```python
     {%- for tool in tools.values() %}
-    - {{ tool.name }}: {{ tool.description }}
-        Takes inputs: {{tool.inputs}}
-        Returns an output of type: {{tool.output_type}}
-    {%- endfor %}
+    def {{ tool.name }}({% for arg_name, arg_info in tool.inputs.items() %}{{ arg_name }}: {{ arg_info.type }}{% if not loop.last %}, {% endif %}{% endfor %}) -> {{tool.output_type}}:
+        """{{ tool.description }}
+
+        Args:
+        {%- for arg_name, arg_info in tool.inputs.items() %}
+            {{ arg_name }}: {{ arg_info.description }}
+        {%- endfor %}"""
+    {% endfor %}
+    ```
 
     {%- if managed_agents and managed_agents.values() | list %}
     You can also give tasks to team members.
     Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task'.
     Given that this team member is a real human, you should be very verbose in your task, it should be a long string providing informations as detailed as necessary.
     Here is a list of the team members that you can call:
+    ```python
     {%- for agent in managed_agents.values() %}
-    - {{ agent.name }}: {{ agent.description }}
-    {%- endfor %}
-    {%- else %}
-    {%- endif %}
-
-    Here is the up to date list of facts that you know:
+    def {{ agent.name }}("Your query goes here.") -> str:
+        """{{ agent.description }}"""
+    {% endfor %}
     ```
-    {{facts_update}}
-    ```
-
-    Now for the given task, develop a step-by-step high-level plan taking into account the above inputs and list of facts.
-    This plan should involve individual tasks based on the available tools, that if executed correctly will yield the correct answer.
-    Beware that you have {remaining_steps} steps remaining.
-    Do not skip steps, do not add any superfluous steps. Only write the high-level plan, DO NOT DETAIL INDIVIDUAL TOOL CALLS.
-    After writing the final step of the plan, write the '\n<end_plan>' tag and stop there.
+    {%- endif %}
 
-    Now write your new plan below.
+    Now write your updated facts survey below, then your new plan.
 managed_agent:
   task: |-
       You're a helpful agent named '{{name}}'.
diff --git a/src/smolagents/prompts/toolcalling_agent.yaml b/src/smolagents/prompts/toolcalling_agent.yaml
index 744bd7451..3e99fb423 100644
--- a/src/smolagents/prompts/toolcalling_agent.yaml
+++ b/src/smolagents/prompts/toolcalling_agent.yaml
@@ -1,5 +1,5 @@
 system_prompt: |-
-  You are an expert assistant who can solve any task using  tool calls. You will be given a task to solve as best you can.
+  You are an expert assistant who can solve any task using tool calls. You will be given a task to solve as best you can.
   To do so, you have been given access to some tools.
 
   The tool call you write is an action: after the tool is executed, you will get the result of the tool call as an "observation".
@@ -104,7 +104,6 @@ system_prompt: |-
   {%- for agent in managed_agents.values() %}
   - {{ agent.name }}: {{ agent.description }}
   {%- endfor %}
-  {%- else %}
   {%- endif %}
 
   Here are the rules you should always follow to solve your task:
@@ -114,51 +113,33 @@ system_prompt: |-
   If no tool call is needed, use final_answer tool to return your answer.
   4. Never re-do a tool call that you previously did with the exact same parameters.
 
-  Now Begin! If you solve the task correctly, you will receive a reward of $1,000,000.
+  Now Begin!
 planning:
-  initial_facts: |-
-    Below I will present you a task.
-
-    You will now build a comprehensive preparatory survey of which facts we have at our disposal and which ones we still need.
-    To do so, you will have to read the task and identify things that must be discovered in order to successfully complete it.
-    Don't make any assumptions. For each item, provide a thorough reasoning. Here is how you will structure this survey:
+  initial_plan : |-
+    You are a world expert at analyzing a situation to derive facts, and plan accordingly towards solving a task.
+    Below I will present you a task. You will need to 1. build a survey of facts known or needed to solve the task, then 2. make a plan of action to solve the task.
 
-    ---
-    ### 1. Facts given in the task
+    ## 1. Facts survey
+    You will build a comprehensive preparatory survey of which facts we have at our disposal and which ones we still need.
+    These "facts" will typically be specific names, dates, values, etc. Your answer should use the below headings:
+    ### 1.1. Facts given in the task
     List here the specific facts given in the task that could help you (there might be nothing here).
 
-    ### 2. Facts to look up
+    ### 1.2. Facts to look up
     List here any facts that we may need to look up.
     Also list where to find each of these, for instance a website, a file... - maybe the task contains some sources that you should re-use here.
 
-    ### 3. Facts to derive
+    ### 1.3. Facts to derive
     List here anything that we want to derive from the above by logical reasoning, for instance computation or simulation.
 
-    Keep in mind that "facts" will typically be specific names, dates, values, etc. Your answer should use the below headings:
-    ### 1. Facts given in the task
-    ### 2. Facts to look up
-    ### 3. Facts to derive
-    Do not add anything else.
+    Don't make any assumptions. For each item, provide a thorough reasoning. Do not add anything else on top of three headings above.
 
-    Here is the task:
-    ```
-    {{task}}
-    ```
-    Now begin!
-  initial_plan : |-
-    You are a world expert at making efficient plans to solve any task using a set of carefully crafted tools.
-
-    Now for the given task, develop a step-by-step high-level plan taking into account the above inputs and list of facts.
+    ## 2. Plan
+    Then for the given task, develop a step-by-step high-level plan taking into account the above inputs and list of facts.
     This plan should involve individual tasks based on the available tools, that if executed correctly will yield the correct answer.
     Do not skip steps, do not add any superfluous steps. Only write the high-level plan, DO NOT DETAIL INDIVIDUAL TOOL CALLS.
     After writing the final step of the plan, write the '\n<end_plan>' tag and stop there.
 
-    Here is your task:
-
-    Task:
-    ```
-    {{task}}
-    ```
     You can leverage these tools:
     {%- for tool in tools.values() %}
     - {{ tool.name }}: {{ tool.description }}
@@ -174,49 +155,43 @@ planning:
     {%- for agent in managed_agents.values() %}
     - {{ agent.name }}: {{ agent.description }}
     {%- endfor %}
-    {%- else %}
     {%- endif %}
 
-    List of facts that you know:
+    ---
+    Now begin! Here is your task:
     ```
-    {{answer_facts}}
+    {{task}}
     ```
-
-    Now begin! Write your plan below.
-  update_facts_pre_messages: |-
-    You are a world expert at gathering known and unknown facts based on a conversation.
-    Below you will find a task, and a history of attempts made to solve the task. You will have to produce a list of these:
-    ### 1. Facts given in the task
-    ### 2. Facts that we have learned
-    ### 3. Facts still to look up
-    ### 4. Facts still to derive
-    Find the task and history below:
-  update_facts_post_messages: |-
-    Earlier we've built a list of facts.
-    But since in your previous steps you may have learned useful new facts or invalidated some false ones.
-    Please update your list of facts based on the previous history, and provide these headings:
-    ### 1. Facts given in the task
-    ### 2. Facts that we have learned
-    ### 3. Facts still to look up
-    ### 4. Facts still to derive
-
-    Now write your new list of facts below.
+    First in part 1, write the facts survey, then in part 2, write your plan.
   update_plan_pre_messages: |-
-    You are a world expert at making efficient plans to solve any task using a set of carefully crafted tools.
-
-    You have been given a task:
+    You are a world expert at analyzing a situation, and plan accordingly towards solving a task.
+    You have been given the following task:
     ```
     {{task}}
     ```
-
-    Find below the record of what has been tried so far to solve it. Then you will be asked to make an updated plan to solve the task.
-    If the previous tries so far have met some success, you can make an updated plan based on these actions.
+  
+    Below you will find a history of attempts made to solve this task.
+    You will first have to produce a survey of known and unknown facts, then propose a step-by-step high-level plan to solve the task.
+    If the previous tries so far have met some success, your updated plan can build on these results.
     If you are stalled, you can make a completely new plan starting from scratch.
+
+    Find the task and history below:
   update_plan_post_messages: |-
-    You're still working towards solving this task:
-    ```
-    {{task}}
-    ```
+    Now write your updated facts below, taking into account the above history:
+    ## 1. Updated facts survey
+    ### 1.1. Facts given in the task
+    ### 1.2. Facts that we have learned
+    ### 1.3. Facts still to look up
+    ### 1.4. Facts still to derive
+  
+    Then write a step-by-step high-level plan to solve the task above.
+    ## 2. Plan
+    ### 2. 1. ...
+    Etc.
+    This plan should involve individual tasks based on the available tools, that if executed correctly will yield the correct answer.
+    Beware that you have {remaining_steps} steps remaining.
+    Do not skip steps, do not add any superfluous steps. Only write the high-level plan, DO NOT DETAIL INDIVIDUAL TOOL CALLS.
+    After writing the final step of the plan, write the '\n<end_plan>' tag and stop there.
 
     You can leverage these tools:
     {%- for tool in tools.values() %}
@@ -233,20 +208,8 @@ planning:
     {%- for agent in managed_agents.values() %}
     - {{ agent.name }}: {{ agent.description }}
     {%- endfor %}
-    {%- else %}
     {%- endif %}
 
-    Here is the up to date list of facts that you know:
-    ```
-    {{facts_update}}
-    ```
-
-    Now for the given task, develop a step-by-step high-level plan taking into account the above inputs and list of facts.
-    This plan should involve individual tasks based on the available tools, that if executed correctly will yield the correct answer.
-    Beware that you have {remaining_steps} steps remaining.
-    Do not skip steps, do not add any superfluous steps. Only write the high-level plan, DO NOT DETAIL INDIVIDUAL TOOL CALLS.
-    After writing the final step of the plan, write the '\n<end_plan>' tag and stop there.
-
     Now write your new plan below.
 managed_agent:
   task: |-
diff --git a/src/smolagents/remote_executors.py b/src/smolagents/remote_executors.py
new file mode 100644
index 000000000..acfe70020
--- /dev/null
+++ b/src/smolagents/remote_executors.py
@@ -0,0 +1,387 @@
+#!/usr/bin/env python
+# coding=utf-8
+
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import base64
+import json
+import pickle
+import re
+import time
+from io import BytesIO
+from pathlib import Path
+from textwrap import dedent
+from typing import Any
+
+import PIL.Image
+import requests
+
+from .local_python_executor import PythonExecutor
+from .monitoring import LogLevel
+from .tools import Tool, get_tools_definition_code
+from .utils import AgentError
+
+
+try:
+    from dotenv import load_dotenv
+
+    load_dotenv()
+except ModuleNotFoundError:
+    pass
+
+
+class RemotePythonExecutor(PythonExecutor):
+    def __init__(self, additional_imports: list[str], logger):
+        self.additional_imports = additional_imports
+        self.logger = logger
+        self.logger.log("Initializing executor, hold on...")
+        self.final_answer_pattern = re.compile(r"^final_answer\((.*)\)$", re.M)
+        self.installed_packages = []
+
+    def run_code_raise_errors(self, code: str, return_final_answer: bool = False) -> tuple[Any, str]:
+        raise NotImplementedError
+
+    def send_tools(self, tools: dict[str, Tool]):
+        tool_definition_code = get_tools_definition_code(tools)
+
+        packages_to_install = set()
+        for tool in tools.values():
+            for package in tool.to_dict()["requirements"]:
+                if package not in self.installed_packages:
+                    packages_to_install.add(package)
+                    self.installed_packages.append(package)
+
+        execution = self.run_code_raise_errors(
+            f"!pip install {' '.join(packages_to_install)}\n" + tool_definition_code
+        )
+        self.logger.log(execution[1])
+
+    def send_variables(self, variables: dict):
+        """
+        Send variables to the kernel namespace using pickle.
+        """
+        pickled_vars = base64.b64encode(pickle.dumps(variables)).decode()
+        code = f"""
+import pickle, base64
+vars_dict = pickle.loads(base64.b64decode('{pickled_vars}'))
+locals().update(vars_dict)
+"""
+        self.run_code_raise_errors(code)
+
+    def __call__(self, code_action: str) -> tuple[Any, str, bool]:
+        """Check if code is a final answer and run it accordingly"""
+        is_final_answer = bool(self.final_answer_pattern.search(code_action))
+        output = self.run_code_raise_errors(code_action, return_final_answer=is_final_answer)
+        return output[0], output[1], is_final_answer
+
+    def install_packages(self, additional_imports: list[str]):
+        additional_imports = additional_imports + ["smolagents"]
+        _, execution_logs = self.run_code_raise_errors(f"!pip install {' '.join(additional_imports)}")
+        self.logger.log(execution_logs)
+        return additional_imports
+
+
+class E2BExecutor(RemotePythonExecutor):
+    """
+    Executes Python code using E2B.
+
+    Args:
+        additional_imports (`list[str]`): Additional imports to install.
+        logger (`Logger`): Logger to use.
+        **kwargs: Additional arguments to pass to the E2B Sandbox.
+    """
+
+    def __init__(self, additional_imports: list[str], logger, **kwargs):
+        super().__init__(additional_imports, logger)
+        try:
+            from e2b_code_interpreter import Sandbox
+        except ModuleNotFoundError:
+            raise ModuleNotFoundError(
+                """Please install 'e2b' extra to use E2BExecutor: `pip install 'smolagents[e2b]'`"""
+            )
+        self.sandbox = Sandbox(**kwargs)
+        self.installed_packages = self.install_packages(additional_imports)
+        self.logger.log("E2B is running", level=LogLevel.INFO)
+
+    def run_code_raise_errors(self, code: str, return_final_answer: bool = False) -> tuple[Any, str]:
+        execution = self.sandbox.run_code(
+            code,
+        )
+        if execution.error:
+            execution_logs = "\n".join([str(log) for log in execution.logs.stdout])
+            logs = execution_logs
+            logs += "Executing code yielded an error:"
+            logs += execution.error.name + "\n"
+            logs += execution.error.value
+            logs += execution.error.traceback
+            raise AgentError(logs, self.logger)
+        execution_logs = "\n".join([str(log) for log in execution.logs.stdout])
+        if not execution.results:
+            return None, execution_logs
+        else:
+            for result in execution.results:
+                if result.is_main_result:
+                    for attribute_name in ["jpeg", "png"]:
+                        if getattr(result, attribute_name) is not None:
+                            image_output = getattr(result, attribute_name)
+                            decoded_bytes = base64.b64decode(image_output.encode("utf-8"))
+                            return PIL.Image.open(BytesIO(decoded_bytes)), execution_logs
+                    for attribute_name in [
+                        "chart",
+                        "data",
+                        "html",
+                        "javascript",
+                        "json",
+                        "latex",
+                        "markdown",
+                        "pdf",
+                        "svg",
+                        "text",
+                    ]:
+                        if getattr(result, attribute_name) is not None:
+                            return getattr(result, attribute_name), execution_logs
+            if return_final_answer:
+                raise AgentError("No main result returned by executor!", self.logger)
+            return None, execution_logs
+
+
+class DockerExecutor(RemotePythonExecutor):
+    """
+    Executes Python code using Jupyter Kernel Gateway in a Docker container.
+    """
+
+    def __init__(
+        self,
+        additional_imports: list[str],
+        logger,
+        host: str = "127.0.0.1",
+        port: int = 8888,
+        image_name: str = "jupyter-kernel",
+        build_new_image: bool = True,
+        container_run_kwargs: dict[str, Any] | None = None,
+    ):
+        """
+        Initialize the Docker-based Jupyter Kernel Gateway executor.
+
+        Args:
+            additional_imports: Additional imports to install.
+            logger: Logger to use.
+            host: Host to bind to.
+            port: Port to bind to.
+            image_name: Name of the Docker image to use. If the image doesn't exist, it will be built.
+            build_new_image: If True, the image will be rebuilt even if it already exists.
+            container_run_kwargs: Additional keyword arguments to pass to the Docker container run command.
+        """
+        super().__init__(additional_imports, logger)
+        try:
+            import docker
+            from websocket import create_connection
+        except ModuleNotFoundError:
+            raise ModuleNotFoundError(
+                "Please install 'docker' extra to use DockerExecutor: `pip install 'smolagents[docker]'`"
+            )
+        self.host = host
+        self.port = port
+        self.image_name = image_name
+
+        # Initialize Docker
+        try:
+            self.client = docker.from_env()
+        except docker.errors.DockerException as e:
+            raise RuntimeError("Could not connect to Docker daemon: make sure Docker is running.") from e
+
+        # Build and start container
+        try:
+            # Check if image exists, unless forced to rebuild
+            if not build_new_image:
+                try:
+                    self.client.images.get(self.image_name)
+                    self.logger.log(f"Using existing Docker image: {self.image_name}", level=LogLevel.INFO)
+                except docker.errors.ImageNotFound:
+                    self.logger.log(f"Image {self.image_name} not found, building...", level=LogLevel.INFO)
+                    build_new_image = True
+
+            if build_new_image:
+                self.logger.log(f"Building Docker image {self.image_name}...", level=LogLevel.INFO)
+                dockerfile_path = Path(__file__).parent / "Dockerfile"
+                if not dockerfile_path.exists():
+                    with open(dockerfile_path, "w") as f:
+                        f.write("""FROM python:3.12-slim
+
+RUN pip install jupyter_kernel_gateway requests numpy pandas
+RUN pip install jupyter_client notebook
+
+EXPOSE 8888
+CMD ["jupyter", "kernelgateway", "--KernelGatewayApp.ip='0.0.0.0'", "--KernelGatewayApp.port=8888", "--KernelGatewayApp.allow_origin='*'"]
+""")
+                _, build_logs = self.client.images.build(
+                    path=str(dockerfile_path.parent), dockerfile=str(dockerfile_path), tag=self.image_name
+                )
+                self.logger.log(build_logs, level=LogLevel.DEBUG)
+
+            self.logger.log(f"Starting container on {host}:{port}...", level=LogLevel.INFO)
+            # Create base container parameters
+            container_kwargs = {}
+            if container_run_kwargs:
+                container_kwargs.update(container_run_kwargs)
+
+            # Ensure required port mapping and background running
+            if not isinstance(container_kwargs.get("ports"), dict):
+                container_kwargs["ports"] = {}
+            container_kwargs["ports"]["8888/tcp"] = (host, port)
+            container_kwargs["detach"] = True
+
+            self.container = self.client.containers.run(self.image_name, **container_kwargs)
+
+            retries = 0
+            while self.container.status != "running" and retries < 5:
+                self.logger.log(f"Container status: {self.container.status}, waiting...", level=LogLevel.INFO)
+                time.sleep(1)
+                self.container.reload()
+                retries += 1
+
+            self.base_url = f"http://{host}:{port}"
+
+            # Create new kernel via HTTP
+            r = requests.post(f"{self.base_url}/api/kernels")
+            if r.status_code != 201:
+                error_details = {
+                    "status_code": r.status_code,
+                    "headers": dict(r.headers),
+                    "url": r.url,
+                    "body": r.text,
+                    "request_method": r.request.method,
+                    "request_headers": dict(r.request.headers),
+                    "request_body": r.request.body,
+                }
+                self.logger.log_error(f"Failed to create kernel. Details: {json.dumps(error_details, indent=2)}")
+                raise RuntimeError(f"Failed to create kernel: Status {r.status_code}\nResponse: {r.text}") from None
+
+            self.kernel_id = r.json()["id"]
+
+            ws_url = f"ws://{host}:{port}/api/kernels/{self.kernel_id}/channels"
+            self.ws = create_connection(ws_url)
+
+            self.installed_packages = self.install_packages(additional_imports)
+            self.logger.log(
+                f"Container {self.container.short_id} is running with kernel {self.kernel_id}", level=LogLevel.INFO
+            )
+
+        except Exception as e:
+            self.cleanup()
+            raise RuntimeError(f"Failed to initialize Jupyter kernel: {e}") from e
+
+    def run_code_raise_errors(self, code_action: str, return_final_answer: bool = False) -> tuple[Any, str]:
+        """
+        Execute code and return result based on whether it's a final answer.
+        """
+        try:
+            if return_final_answer:
+                match = self.final_answer_pattern.search(code_action)
+                if match:
+                    pre_final_answer_code = self.final_answer_pattern.sub("", code_action)
+                    result_expr = match.group(1)
+                    wrapped_code = pre_final_answer_code + dedent(f"""
+                        import pickle, base64
+                        _result = {result_expr}
+                        print("RESULT_PICKLE:" + base64.b64encode(pickle.dumps(_result)).decode())
+                        """)
+            else:
+                wrapped_code = code_action
+
+            # Send execute request
+            msg_id = self._send_execute_request(wrapped_code)
+
+            # Collect output and results
+            outputs = []
+            result = None
+            waiting_for_idle = False
+
+            while True:
+                msg = json.loads(self.ws.recv())
+                msg_type = msg.get("msg_type", "")
+                parent_msg_id = msg.get("parent_header", {}).get("msg_id")
+
+                # Only process messages related to our execute request
+                if parent_msg_id != msg_id:
+                    continue
+
+                if msg_type == "stream":
+                    text = msg["content"]["text"]
+                    if return_final_answer and text.startswith("RESULT_PICKLE:"):
+                        pickle_data = text[len("RESULT_PICKLE:") :].strip()
+                        result = pickle.loads(base64.b64decode(pickle_data))
+                        waiting_for_idle = True
+                    else:
+                        outputs.append(text)
+                elif msg_type == "error":
+                    traceback = msg["content"].get("traceback", [])
+                    raise AgentError("\n".join(traceback), self.logger)
+                elif msg_type == "status" and msg["content"]["execution_state"] == "idle":
+                    if not return_final_answer or waiting_for_idle:
+                        break
+
+            return result, "".join(outputs)
+
+        except Exception as e:
+            self.logger.log_error(f"Code execution failed: {e}")
+            raise
+
+    def _send_execute_request(self, code: str) -> str:
+        """Send code execution request to kernel."""
+        import uuid
+
+        # Generate a unique message ID
+        msg_id = str(uuid.uuid4())
+
+        # Create execute request
+        execute_request = {
+            "header": {
+                "msg_id": msg_id,
+                "username": "anonymous",
+                "session": str(uuid.uuid4()),
+                "msg_type": "execute_request",
+                "version": "5.0",
+            },
+            "parent_header": {},
+            "metadata": {},
+            "content": {
+                "code": code,
+                "silent": False,
+                "store_history": True,
+                "user_expressions": {},
+                "allow_stdin": False,
+            },
+        }
+
+        self.ws.send(json.dumps(execute_request))
+        return msg_id
+
+    def cleanup(self):
+        """Clean up resources."""
+        try:
+            if hasattr(self, "container"):
+                self.logger.log(f"Stopping and removing container {self.container.short_id}...", level=LogLevel.INFO)
+                self.container.stop()
+                self.container.remove()
+                self.logger.log("Container cleanup completed", level=LogLevel.INFO)
+        except Exception as e:
+            self.logger.log_error(f"Error during cleanup: {e}")
+
+    def delete(self):
+        """Ensure cleanup on deletion."""
+        self.cleanup()
+
+
+__all__ = ["E2BExecutor", "DockerExecutor"]
diff --git a/src/smolagents/tool_validation.py b/src/smolagents/tool_validation.py
index 125e68993..3b8a3fdca 100644
--- a/src/smolagents/tool_validation.py
+++ b/src/smolagents/tool_validation.py
@@ -1,9 +1,8 @@
 import ast
 import builtins
 from itertools import zip_longest
-from typing import Set
 
-from .utils import BASE_BUILTIN_MODULES, get_source
+from .utils import BASE_BUILTIN_MODULES, get_source, is_valid_name
 
 
 _BUILTIN_NAMES = set(vars(builtins))
@@ -16,7 +15,7 @@ class MethodChecker(ast.NodeVisitor):
     - contains no local imports (e.g. numpy is ok but local_script is not)
     """
 
-    def __init__(self, class_attributes: Set[str], check_imports: bool = True):
+    def __init__(self, class_attributes: set[str], check_imports: bool = True):
         self.undefined_names = set()
         self.imports = {}
         self.from_imports = {}
@@ -50,6 +49,10 @@ def visit_Assign(self, node):
         for target in node.targets:
             if isinstance(target, ast.Name):
                 self.assigned_names.add(target.id)
+            elif isinstance(target, (ast.Tuple, ast.List)):
+                for elt in target.elts:
+                    if isinstance(elt, ast.Name):
+                        self.assigned_names.add(elt.id)
         self.visit(node.value)
 
     def visit_With(self, node):
@@ -166,6 +169,7 @@ def __init__(self):
             self.non_defaults = set()
             self.non_literal_defaults = set()
             self.in_method = False
+            self.invalid_attributes = []
 
         def visit_FunctionDef(self, node):
             if node.name == "__init__":
@@ -192,6 +196,19 @@ def visit_Assign(self, node):
                     if isinstance(target, ast.Name):
                         self.complex_attributes.add(target.id)
 
+            # Check specific class attributes
+            if getattr(node.targets[0], "id", "") == "name":
+                if not isinstance(node.value, ast.Constant):
+                    self.invalid_attributes.append(f"Class attribute 'name' must be a constant, found '{node.value}'")
+                elif not isinstance(node.value.value, str):
+                    self.invalid_attributes.append(
+                        f"Class attribute 'name' must be a string, found '{node.value.value}'"
+                    )
+                elif not is_valid_name(node.value.value):
+                    self.invalid_attributes.append(
+                        f"Class attribute 'name' must be a valid Python identifier and not a reserved keyword, found '{node.value.value}'"
+                    )
+
         def _check_init_function_parameters(self, node):
             # Check defaults in parameters
             for arg, default in reversed(list(zip_longest(reversed(node.args.args), reversed(node.args.defaults)))):
@@ -210,6 +227,9 @@ def _check_init_function_parameters(self, node):
     class_level_checker.visit(class_node)
 
     errors = []
+    # Check invalid class attributes
+    if class_level_checker.invalid_attributes:
+        errors += class_level_checker.invalid_attributes
     if class_level_checker.complex_attributes:
         errors.append(
             f"Complex attributes should be defined in __init__, not as class attributes: "
diff --git a/src/smolagents/tools.py b/src/smolagents/tools.py
index 3f8b25a26..35622c090 100644
--- a/src/smolagents/tools.py
+++ b/src/smolagents/tools.py
@@ -14,6 +14,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from __future__ import annotations
+
 import ast
 import inspect
 import json
@@ -23,19 +25,20 @@
 import tempfile
 import textwrap
 import types
+from collections.abc import Callable
 from contextlib import contextmanager
 from functools import wraps
 from pathlib import Path
-from typing import Callable, Dict, List, Optional, Union
+from typing import TYPE_CHECKING, Any
 
 from huggingface_hub import (
+    CommitOperationAdd,
+    create_commit,
     create_repo,
     get_collection,
     hf_hub_download,
     metadata_update,
-    upload_folder,
 )
-from huggingface_hub.utils import is_torch_available
 
 from ._function_type_hints_utils import (
     TypeHintParsingException,
@@ -45,7 +48,11 @@
 )
 from .agent_types import handle_agent_input_types, handle_agent_output_types
 from .tool_validation import MethodChecker, validate_tool_attributes
-from .utils import _is_package_available, _is_pillow_available, get_source, instance_to_source
+from .utils import BASE_BUILTIN_MODULES, _is_package_available, get_source, instance_to_source, is_valid_name
+
+
+if TYPE_CHECKING:
+    import mcp
 
 
 logger = logging.getLogger(__name__)
@@ -89,7 +96,7 @@ class Tool:
       returns the text contained in the file'.
     - **name** (`str`) -- A performative name that will be used for your tool in the prompt to the agent. For instance
       `"text-classifier"` or `"image_generator"`.
-    - **inputs** (`Dict[str, Dict[str, Union[str, type]]]`) -- The dict of modalities expected for the inputs.
+    - **inputs** (`Dict[str, Dict[str, Union[str, type, bool]]]`) -- The dict of modalities expected for the inputs.
       It has one `type`key and a `description`key.
       This is used by `launch_gradio_demo` or to make a nice space from your tool, and also can be used in the generated
       description for your tool.
@@ -103,7 +110,7 @@ class Tool:
 
     name: str
     description: str
-    inputs: Dict[str, Dict[str, Union[str, type, bool]]]
+    inputs: dict[str, dict[str, str | type | bool]]
     output_type: str
 
     def __init__(self, *args, **kwargs):
@@ -120,7 +127,7 @@ def validate_arguments(self):
             "inputs": dict,
             "output_type": str,
         }
-
+        # Validate class attributes
         for attr, expected_type in required_attributes.items():
             attr_value = getattr(self, attr, None)
             if attr_value is None:
@@ -129,6 +136,12 @@ def validate_arguments(self):
                 raise TypeError(
                     f"Attribute {attr} should have type {expected_type.__name__}, got {type(attr_value)} instead."
                 )
+        # - Validate name
+        if not is_valid_name(self.name):
+            raise Exception(
+                f"Invalid Tool name '{self.name}': must be a valid Python identifier and not a reserved keyword"
+            )
+        # Validate inputs
         for input_name, input_content in self.inputs.items():
             assert isinstance(input_content, dict), f"Input '{input_name}' should be a dictionary."
             assert "type" in input_content and "description" in input_content, (
@@ -138,7 +151,7 @@ def validate_arguments(self):
                 raise Exception(
                     f"Input '{input_name}': type '{input_content['type']}' is not an authorized value, should be one of {AUTHORIZED_TYPES}."
                 )
-
+        # Validate output type
         assert getattr(self, "output_type", None) in AUTHORIZED_TYPES
 
         # Validate forward function signature, except for Tools that use a "generic" signature (PipelineTool, SpaceToolWrapper, LangChainToolWrapper)
@@ -147,10 +160,12 @@ def validate_arguments(self):
             and getattr(self, "skip_forward_signature_validation") is True
         ):
             signature = inspect.signature(self.forward)
-
-            if not set(signature.parameters.keys()) == set(self.inputs.keys()):
+            actual_keys = set(key for key in signature.parameters.keys() if key != "self")
+            expected_keys = set(self.inputs.keys())
+            if actual_keys != expected_keys:
                 raise Exception(
-                    "Tool's 'forward' method should take 'self' as its first argument, then its next arguments should match the keys of tool attribute 'inputs'."
+                    f"In tool '{self.name}', 'forward' method parameters were {actual_keys}, but expected {expected_keys}. "
+                    f"It should take 'self' as its first argument, then its next arguments should match the keys of tool attribute 'inputs'."
                 )
 
             json_schema = _convert_type_hints_to_json_schema(self.forward, error_on_missing_type_hints=False)[
@@ -211,7 +226,8 @@ def to_dict(self) -> dict:
             method_checker.visit(forward_node)
 
             if len(method_checker.errors) > 0:
-                raise (ValueError("\n".join(method_checker.errors)))
+                errors = [f"- {error}" for error in method_checker.errors]
+                raise (ValueError(f"SimpleTool validation failed for {self.name}:\n" + "\n".join(errors)))
 
             forward_source_code = get_source(self.forward)
             tool_code = textwrap.dedent(
@@ -222,7 +238,7 @@ def to_dict(self) -> dict:
             class {class_name}(Tool):
                 name = "{self.name}"
                 description = {json.dumps(textwrap.dedent(self.description).strip())}
-                inputs = {json.dumps(self.inputs, separators=(",", ":"))}
+                inputs = {repr(self.inputs)}
                 output_type = "{self.output_type}"
             """
             ).strip()
@@ -261,9 +277,25 @@ def replacement(match):
 
         requirements = {el for el in get_imports(tool_code) if el not in sys.stdlib_module_names} | {"smolagents"}
 
-        return {"name": self.name, "code": tool_code, "requirements": requirements}
+        return {"name": self.name, "code": tool_code, "requirements": sorted(requirements)}
+
+    @classmethod
+    def from_dict(cls, tool_dict: dict[str, Any], **kwargs) -> "Tool":
+        """
+        Create tool from a dictionary representation.
+
+        Args:
+            tool_dict (`dict[str, Any]`): Dictionary representation of the tool.
+            **kwargs: Additional keyword arguments to pass to the tool's constructor.
 
-    def save(self, output_dir: str, tool_file_name: str = "tool", make_gradio_app: bool = True):
+        Returns:
+            `Tool`: Tool object.
+        """
+        if "code" not in tool_dict:
+            raise ValueError("Tool dictionary must contain 'code' key with the tool source code")
+        return cls.from_code(tool_dict["code"], **kwargs)
+
+    def save(self, output_dir: str | Path, tool_file_name: str = "tool", make_gradio_app: bool = True):
         """
         Saves the relevant code files for your tool so it can be pushed to the Hub. This will copy the code of your
         tool in `output_dir` as well as autogenerate:
@@ -275,48 +307,31 @@ def save(self, output_dir: str, tool_file_name: str = "tool", make_gradio_app: b
           code)
 
         Args:
-            output_dir (`str`): The folder in which you want to save your tool.
+            output_dir (`str` or `Path`): The folder in which you want to save your tool.
             tool_file_name (`str`, *optional*): The file name in which you want to save your tool.
             make_gradio_app (`bool`, *optional*, defaults to True): Whether to also export a `requirements.txt` file and Gradio UI.
         """
-        os.makedirs(output_dir, exist_ok=True)
-        class_name = self.__class__.__name__
-        tool_file = os.path.join(output_dir, f"{tool_file_name}.py")
-
-        tool_dict = self.to_dict()
-        tool_code = tool_dict["code"]
-
-        with open(tool_file, "w", encoding="utf-8") as f:
-            f.write(tool_code.replace(":true,", ":True,").replace(":true}", ":True}"))
-
+        # Ensure output directory exists
+        output_path = Path(output_dir)
+        output_path.mkdir(parents=True, exist_ok=True)
+        # Save tool file
+        self._write_file(output_path / f"{tool_file_name}.py", self._get_tool_code())
         if make_gradio_app:
-            # Save app file
-            app_file = os.path.join(output_dir, "app.py")
-            with open(app_file, "w", encoding="utf-8") as f:
-                f.write(
-                    textwrap.dedent(
-                        f"""
-                from smolagents import launch_gradio_demo
-                from {tool_file_name} import {class_name}
-
-                tool = {class_name}()
-
-                launch_gradio_demo(tool)
-                """
-                    ).lstrip()
-                )
-
+            #  Save app file
+            self._write_file(output_path / "app.py", self._get_gradio_app_code(tool_module_name=tool_file_name))
             # Save requirements file
-            requirements_file = os.path.join(output_dir, "requirements.txt")
-            with open(requirements_file, "w", encoding="utf-8") as f:
-                f.write("\n".join(tool_dict["requirements"]) + "\n")
+            self._write_file(output_path / "requirements.txt", self._get_requirements())
+
+    def _write_file(self, file_path: Path, content: str) -> None:
+        """Writes content to a file with UTF-8 encoding."""
+        file_path.write_text(content, encoding="utf-8")
 
     def push_to_hub(
         self,
         repo_id: str,
         commit_message: str = "Upload tool",
-        private: Optional[bool] = None,
-        token: Optional[Union[bool, str]] = None,
+        private: bool | None = None,
+        token: bool | str | None = None,
         create_pr: bool = False,
     ) -> str:
         """
@@ -334,8 +349,25 @@ def push_to_hub(
                 The token to use as HTTP bearer authorization for remote files. If unset, will use the token generated
                 when running `huggingface-cli login` (stored in `~/.huggingface`).
             create_pr (`bool`, *optional*, defaults to `False`):
-                Whether or not to create a PR with the uploaded files or directly commit.
+                Whether to create a PR with the uploaded files or directly commit.
         """
+        # Initialize repository
+        repo_id = self._initialize_hub_repo(repo_id, token, private)
+        # Prepare files for commit
+        additions = self._prepare_hub_files()
+        # Create commit
+        return create_commit(
+            repo_id=repo_id,
+            operations=additions,
+            commit_message=commit_message,
+            token=token,
+            create_pr=create_pr,
+            repo_type="space",
+        )
+
+    @staticmethod
+    def _initialize_hub_repo(repo_id: str, token: bool | str | None, private: bool | None) -> str:
+        """Initialize repository on Hugging Face Hub."""
         repo_url = create_repo(
             repo_id=repo_id,
             token=token,
@@ -344,27 +376,56 @@ def push_to_hub(
             repo_type="space",
             space_sdk="gradio",
         )
-        repo_id = repo_url.repo_id
-        metadata_update(repo_id, {"tags": ["smolagents", "tool"]}, repo_type="space", token=token)
-
-        with tempfile.TemporaryDirectory() as work_dir:
-            # Save all files.
-            self.save(work_dir)
-            logger.info(f"Uploading the following files to {repo_id}: {','.join(os.listdir(work_dir))}")
-            return upload_folder(
-                repo_id=repo_id,
-                commit_message=commit_message,
-                folder_path=work_dir,
-                token=token,
-                create_pr=create_pr,
-                repo_type="space",
-            )
+        metadata_update(repo_url.repo_id, {"tags": ["smolagents", "tool"]}, repo_type="space", token=token)
+        return repo_url.repo_id
+
+    def _prepare_hub_files(self) -> list:
+        """Prepare files for Hub commit."""
+        additions = [
+            # Add tool code
+            CommitOperationAdd(
+                path_in_repo="tool.py",
+                path_or_fileobj=self._get_tool_code().encode(),
+            ),
+            # Add Gradio app
+            CommitOperationAdd(
+                path_in_repo="app.py",
+                path_or_fileobj=self._get_gradio_app_code().encode(),
+            ),
+            # Add requirements
+            CommitOperationAdd(
+                path_in_repo="requirements.txt",
+                path_or_fileobj=self._get_requirements().encode(),
+            ),
+        ]
+        return additions
+
+    def _get_tool_code(self) -> str:
+        """Get the tool's code."""
+        return self.to_dict()["code"]
+
+    def _get_gradio_app_code(self, tool_module_name: str = "tool") -> str:
+        """Get the Gradio app code."""
+        class_name = self.__class__.__name__
+        return textwrap.dedent(
+            f"""\
+            from smolagents import launch_gradio_demo
+            from {tool_module_name} import {class_name}
+
+            tool = {class_name}()
+            launch_gradio_demo(tool)
+            """
+        )
+
+    def _get_requirements(self) -> str:
+        """Get the requirements."""
+        return "\n".join(self.to_dict()["requirements"])
 
     @classmethod
     def from_hub(
         cls,
         repo_id: str,
-        token: Optional[str] = None,
+        token: str | None = None,
         trust_remote_code: bool = False,
         **kwargs,
     ):
@@ -381,7 +442,7 @@ def from_hub(
 
         Args:
             repo_id (`str`):
-                The name of the repo on the Hub where your tool is defined.
+                The name of the Space repo on the Hub where your tool is defined.
             token (`str`, *optional*):
                 The token to identify you on hf.co. If unset, will use the token generated when running
                 `huggingface-cli login` (stored in `~/.huggingface`).
@@ -444,8 +505,8 @@ def from_space(
         space_id: str,
         name: str,
         description: str,
-        api_name: Optional[str] = None,
-        token: Optional[str] = None,
+        api_name: str | None = None,
+        token: str | None = None,
     ):
         """
         Creates a [`Tool`] from a Space given its id on the Hub.
@@ -493,8 +554,8 @@ def __init__(
                 space_id: str,
                 name: str,
                 description: str,
-                api_name: Optional[str] = None,
-                token: Optional[str] = None,
+                api_name: str | None = None,
+                token: str | None = None,
             ):
                 self.name = name
                 self.description = description
@@ -535,11 +596,9 @@ def __init__(
 
             def sanitize_argument_for_prediction(self, arg):
                 from gradio_client.utils import is_http_url_like
+                from PIL.Image import Image
 
-                if _is_pillow_available():
-                    from PIL.Image import Image
-
-                if _is_pillow_available() and isinstance(arg, Image):
+                if isinstance(arg, Image):
                     temp_file = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
                     arg.save(temp_file.name)
                     arg = temp_file.name
@@ -641,6 +700,7 @@ def launch_gradio_demo(tool: Tool):
         raise ImportError("Gradio should be installed in order to launch a gradio demo.")
 
     TYPE_TO_COMPONENT_CLASS_MAPPING = {
+        "boolean": gr.Checkbox,
         "image": gr.Image,
         "audio": gr.Audio,
         "string": gr.Textbox,
@@ -659,8 +719,8 @@ def tool_forward(*args, **kwargs):
         new_component = input_gradio_component_class(label=input_name)
         gradio_inputs.append(new_component)
 
-    output_gradio_componentclass = TYPE_TO_COMPONENT_CLASS_MAPPING[tool.output_type]
-    gradio_output = output_gradio_componentclass(label="Output")
+    output_gradio_component_class = TYPE_TO_COMPONENT_CLASS_MAPPING[tool.output_type]
+    gradio_output = output_gradio_component_class(label="Output")
 
     gr.Interface(
         fn=tool_forward,
@@ -674,8 +734,8 @@ def tool_forward(*args, **kwargs):
 
 def load_tool(
     repo_id,
-    model_repo_id: Optional[str] = None,
-    token: Optional[str] = None,
+    model_repo_id: str | None = None,
+    token: str | None = None,
     trust_remote_code: bool = False,
     **kwargs,
 ):
@@ -692,7 +752,7 @@ def load_tool(
 
     Args:
         repo_id (`str`):
-            Repo ID of a tool on the Hub.
+            Space repo ID of a tool on the Hub.
         model_repo_id (`str`, *optional*):
             Use this argument to use a different model than the default one for the tool you selected.
         token (`str`, *optional*):
@@ -738,14 +798,14 @@ class ToolCollection:
     For example and usage, see: [`ToolCollection.from_hub`] and [`ToolCollection.from_mcp`]
     """
 
-    def __init__(self, tools: List[Tool]):
+    def __init__(self, tools: list[Tool]):
         self.tools = tools
 
     @classmethod
     def from_hub(
         cls,
         collection_slug: str,
-        token: Optional[str] = None,
+        token: str | None = None,
         trust_remote_code: bool = False,
     ) -> "ToolCollection":
         """Loads a tool collection from the Hub.
@@ -783,20 +843,32 @@ def from_hub(
 
     @classmethod
     @contextmanager
-    def from_mcp(cls, server_parameters) -> "ToolCollection":
+    def from_mcp(
+        cls, server_parameters: "mcp.StdioServerParameters" | dict, trust_remote_code: bool = False
+    ) -> "ToolCollection":
         """Automatically load a tool collection from an MCP server.
 
+        This method supports both SSE and Stdio MCP servers. Look at the `server_parameters`
+        argument for more details on how to connect to an SSE or Stdio MCP server.
+
         Note: a separate thread will be spawned to run an asyncio event loop handling
         the MCP server.
 
         Args:
-            server_parameters (mcp.StdioServerParameters): The server parameters to use to
-            connect to the MCP server.
+            server_parameters (`mcp.StdioServerParameters` or `dict`):
+                The server parameters to use to connect to the MCP server. If a dict is
+                provided, it is assumed to be the parameters of `mcp.client.sse.sse_client`.
+            trust_remote_code (`bool`, *optional*, defaults to `False`):
+                Whether to trust the execution of code from tools defined on the MCP server.
+                This option should only be set to `True` if you trust the MCP server,
+                and undertand the risks associated with running remote code on your local machine.
+                If set to `False`, loading tools from MCP will fail.
+
 
         Returns:
             ToolCollection: A tool collection instance.
 
-        Example:
+        Example with a Stdio MCP server:
         ```py
         >>> from smolagents import ToolCollection, CodeAgent
         >>> from mcp import StdioServerParameters
@@ -807,11 +879,23 @@ def from_mcp(cls, server_parameters) -> "ToolCollection":
         >>>     env={"UV_PYTHON": "3.12", **os.environ},
         >>> )
 
-        >>> with ToolCollection.from_mcp(server_parameters) as tool_collection:
+        >>> with ToolCollection.from_mcp(server_parameters, trust_remote_code=True) as tool_collection:
+        >>>     agent = CodeAgent(tools=[*tool_collection.tools], add_base_tools=True)
+        >>>     agent.run("Please find a remedy for hangover.")
+        ```
+
+        Example with an SSE MCP server:
+        ```py
+        >>> with ToolCollection.from_mcp({"url": "http://127.0.0.1:8000/sse"}, trust_remote_code=True) as tool_collection:
         >>>     agent = CodeAgent(tools=[*tool_collection.tools], add_base_tools=True)
         >>>     agent.run("Please find a remedy for hangover.")
         ```
         """
+        if not trust_remote_code:
+            raise ValueError(
+                "Loading tools from MCP requires you to acknowledge you trust the MCP server, "
+                "as it will execute code on your local machine: pass `trust_remote_code=True`."
+            )
         try:
             from mcpadapt.core import MCPAdapt
             from mcpadapt.smolagents_adapter import SmolAgentsAdapter
@@ -826,45 +910,73 @@ def from_mcp(cls, server_parameters) -> "ToolCollection":
 
 def tool(tool_function: Callable) -> Tool:
     """
-    Converts a function into an instance of a Tool subclass.
+    Convert a function into an instance of a dynamically created Tool subclass.
 
     Args:
-        tool_function: Your function. Should have type hints for each input and a type hint for the output.
-        Should also have a docstring description including an 'Args:' part where each argument is described.
+        tool_function (`Callable`): Function to convert into a Tool subclass.
+            Should have type hints for each input and a type hint for the output.
+            Should also have a docstring including the description of the function
+            and an 'Args:' part where each argument is described.
     """
     tool_json_schema = get_json_schema(tool_function)["function"]
     if "return" not in tool_json_schema:
         raise TypeHintParsingException("Tool return type not found: make sure your function has a return type hint!")
 
     class SimpleTool(Tool):
-        def __init__(
-            self,
-            name: str,
-            description: str,
-            inputs: Dict[str, Dict[str, str]],
-            output_type: str,
-            function: Callable,
-        ):
-            self.name = name
-            self.description = description
-            self.inputs = inputs
-            self.output_type = output_type
-            self.forward = function
+        def __init__(self):
             self.is_initialized = True
 
-    simple_tool = SimpleTool(
-        name=tool_json_schema["name"],
-        description=tool_json_schema["description"],
-        inputs=tool_json_schema["parameters"]["properties"],
-        output_type=tool_json_schema["return"]["type"],
-        function=tool_function,
+    # Set the class attributes
+    SimpleTool.name = tool_json_schema["name"]
+    SimpleTool.description = tool_json_schema["description"]
+    SimpleTool.inputs = tool_json_schema["parameters"]["properties"]
+    SimpleTool.output_type = tool_json_schema["return"]["type"]
+
+    @wraps(tool_function)
+    def wrapped_function(*args, **kwargs):
+        return tool_function(*args, **kwargs)
+
+    # Bind the copied function to the forward method
+    SimpleTool.forward = staticmethod(wrapped_function)
+
+    # Get the signature parameters of the tool function
+    sig = inspect.signature(tool_function)
+    # - Add "self" as first parameter to tool_function signature
+    new_sig = sig.replace(
+        parameters=[inspect.Parameter("self", inspect.Parameter.POSITIONAL_OR_KEYWORD)] + list(sig.parameters.values())
     )
-    original_signature = inspect.signature(tool_function)
-    new_parameters = [inspect.Parameter("self", inspect.Parameter.POSITIONAL_ONLY)] + list(
-        original_signature.parameters.values()
+    # - Set the signature of the forward method
+    SimpleTool.forward.__signature__ = new_sig
+
+    # Create and attach the source code of the dynamically created tool class and forward method
+    # - Get the source code of tool_function
+    tool_source = inspect.getsource(tool_function)
+    # - Remove the tool decorator and function definition line
+    tool_source_body = "\n".join(tool_source.split("\n")[2:])
+    # - Dedent
+    tool_source_body = textwrap.dedent(tool_source_body)
+    # - Create the forward method source, including def line and indentation
+    forward_method_source = f"def forward{str(new_sig)}:\n{textwrap.indent(tool_source_body, '    ')}"
+    # - Create the class source
+    class_source = (
+        textwrap.dedent(f"""
+        class SimpleTool(Tool):
+            name: str = "{tool_json_schema["name"]}"
+            description: str = {json.dumps(textwrap.dedent(tool_json_schema["description"]).strip())}
+            inputs: dict[str, dict[str, str]] = {tool_json_schema["parameters"]["properties"]}
+            output_type: str = "{tool_json_schema["return"]["type"]}"
+
+            def __init__(self):
+                self.is_initialized = True
+
+        """)
+        + textwrap.indent(forward_method_source, "    ")  # indent for class method
     )
-    new_signature = original_signature.replace(parameters=new_parameters)
-    simple_tool.forward.__signature__ = new_signature
+    # - Store the source code on both class and method for inspection
+    SimpleTool.__source__ = class_source
+    SimpleTool.forward.__source__ = forward_method_source
+
+    simple_tool = SimpleTool()
     return simple_tool
 
 
@@ -927,7 +1039,7 @@ def __init__(
         token=None,
         **hub_kwargs,
     ):
-        if not is_torch_available() or not _is_package_available("accelerate"):
+        if not _is_package_available("accelerate") or not _is_package_available("torch"):
             raise ModuleNotFoundError(
                 "Please install 'transformers' extra to use a PipelineTool: `pip install 'smolagents[transformers]'`"
             )
@@ -1009,15 +1121,15 @@ def decode(self, outputs):
         """
         return self.post_processor(outputs)
 
-    def __call__(self, *args, **kwargs):
+    def __call__(self, *args, sanitize_inputs_outputs: bool = False, **kwargs):
         import torch
         from accelerate.utils import send_to_device
 
-        args, kwargs = handle_agent_input_types(*args, **kwargs)
-
         if not self.is_initialized:
             self.setup()
 
+        if sanitize_inputs_outputs:
+            args, kwargs = handle_agent_input_types(*args, **kwargs)
         encoded_inputs = self.encode(*args, **kwargs)
 
         tensor_inputs = {k: v for k, v in encoded_inputs.items() if isinstance(v, torch.Tensor)}
@@ -1027,8 +1139,35 @@ def __call__(self, *args, **kwargs):
         outputs = self.forward({**encoded_inputs, **non_tensor_inputs})
         outputs = send_to_device(outputs, "cpu")
         decoded_outputs = self.decode(outputs)
+        if sanitize_inputs_outputs:
+            decoded_outputs = handle_agent_output_types(decoded_outputs, self.output_type)
+        return decoded_outputs
+
+
+def get_tools_definition_code(tools: dict[str, Tool]) -> str:
+    tool_codes = []
+    for tool in tools.values():
+        validate_tool_attributes(tool.__class__, check_imports=False)
+        tool_code = instance_to_source(tool, base_cls=Tool)
+        tool_code = tool_code.replace("from smolagents.tools import Tool", "")
+        tool_code += f"\n\n{tool.name} = {tool.__class__.__name__}()\n"
+        tool_codes.append(tool_code)
+
+    tool_definition_code = "\n".join([f"import {module}" for module in BASE_BUILTIN_MODULES])
+    tool_definition_code += textwrap.dedent(
+        """
+    from typing import Any
+
+    class Tool:
+        def __call__(self, *args, **kwargs):
+            return self.forward(*args, **kwargs)
 
-        return handle_agent_output_types(decoded_outputs, self.output_type)
+        def forward(self, *args, **kwargs):
+            pass # to be implemented in child class
+    """
+    )
+    tool_definition_code += "\n\n".join(tool_codes)
+    return tool_definition_code
 
 
 __all__ = [
diff --git a/src/smolagents/utils.py b/src/smolagents/utils.py
index 3f7219b61..49b212dd7 100644
--- a/src/smolagents/utils.py
+++ b/src/smolagents/utils.py
@@ -20,13 +20,15 @@
 import importlib.util
 import inspect
 import json
+import keyword
 import os
 import re
-import textwrap
 import types
 from functools import lru_cache
 from io import BytesIO
-from typing import TYPE_CHECKING, Any, Dict, Tuple, Union
+from pathlib import Path
+from textwrap import dedent
+from typing import TYPE_CHECKING, Any
 
 
 if TYPE_CHECKING:
@@ -45,11 +47,6 @@ def _is_package_available(package_name: str) -> bool:
         return False
 
 
-@lru_cache
-def _is_pillow_available():
-    return importlib.util.find_spec("PIL") is not None
-
-
 BASE_BUILTIN_MODULES = [
     "collections",
     "datetime",
@@ -65,15 +62,28 @@ def _is_pillow_available():
 ]
 
 
+def escape_code_brackets(text: str) -> str:
+    """Escapes square brackets in code segments while preserving Rich styling tags."""
+
+    def replace_bracketed_content(match):
+        content = match.group(1)
+        cleaned = re.sub(
+            r"bold|red|green|blue|yellow|magenta|cyan|white|black|italic|dim|\s|#[0-9a-fA-F]{6}", "", content
+        )
+        return f"\\[{content}\\]" if cleaned.strip() else f"[{content}]"
+
+    return re.sub(r"\[([^\]]*)\]", replace_bracketed_content, text)
+
+
 class AgentError(Exception):
     """Base class for other agent-related exceptions"""
 
     def __init__(self, message, logger: "AgentLogger"):
         super().__init__(message)
         self.message = message
-        logger.log(f"[bold red]{message}[/bold red]", level="ERROR")
+        logger.log_error(message)
 
-    def dict(self) -> Dict[str, str]:
+    def dict(self) -> dict[str, str]:
         return {"type": self.__class__.__name__, "message": str(self.message)}
 
 
@@ -95,6 +105,18 @@ class AgentMaxStepsError(AgentError):
     pass
 
 
+class AgentToolCallError(AgentExecutionError):
+    """Exception raised for errors when incorrect arguments are passed to the tool"""
+
+    pass
+
+
+class AgentToolExecutionError(AgentExecutionError):
+    """Exception raised for errors when executing a tool"""
+
+    pass
+
+
 class AgentGenerationError(AgentError):
     """Exception raised for errors in generation in the agent"""
 
@@ -127,13 +149,16 @@ def make_json_serializable(obj: Any) -> Any:
         return str(obj)
 
 
-def parse_json_blob(json_blob: str) -> Dict[str, str]:
+def parse_json_blob(json_blob: str) -> tuple[dict[str, str], str]:
+    "Extracts the JSON blob from the input and returns the JSON data and the rest of the input."
     try:
         first_accolade_index = json_blob.find("{")
         last_accolade_index = [a.start() for a in list(re.finditer("}", json_blob))][-1]
-        json_blob = json_blob[first_accolade_index : last_accolade_index + 1].replace('\\"', "'")
-        json_data = json.loads(json_blob, strict=False)
-        return json_data
+        json_data = json_blob[first_accolade_index : last_accolade_index + 1]
+        json_data = json.loads(json_data, strict=False)
+        return json_data, json_blob[:first_accolade_index]
+    except IndexError:
+        raise ValueError("The model output does not contain any JSON blob.")
     except json.JSONDecodeError as e:
         place = e.pos
         if json_blob[place - 1 : place + 2] == "},\n":
@@ -145,70 +170,63 @@ def parse_json_blob(json_blob: str) -> Dict[str, str]:
             f"JSON blob was: {json_blob}, decoding failed on that specific part of the blob:\n"
             f"'{json_blob[place - 4 : place + 5]}'."
         )
-    except Exception as e:
-        raise ValueError(f"Error in parsing the JSON blob: {e}")
-
-
-def parse_code_blobs(code_blob: str) -> str:
-    """Parses the LLM's output to get any code blob inside. Will return the code directly if it's code."""
-    pattern = r"```(?:py|python)?\n(.*?)\n```"
-    matches = re.findall(pattern, code_blob, re.DOTALL)
-    if len(matches) == 0:
-        try:  # Maybe the LLM outputted a code blob directly
-            ast.parse(code_blob)
-            return code_blob
-        except SyntaxError:
-            pass
-
-        if "final" in code_blob and "answer" in code_blob:
-            raise ValueError(
-                f"""
-Your code snippet is invalid, because the regex pattern {pattern} was not found in it.
-Here is your code snippet:
-{code_blob}
-It seems like you're trying to return the final answer, you can do it as follows:
-Code:
-```py
-final_answer("YOUR FINAL ANSWER HERE")
-```<end_code>""".strip()
-            )
+
+
+def parse_code_blobs(text: str) -> str:
+    """Extract code blocs from the LLM's output.
+
+    If a valid code block is passed, it returns it directly.
+
+    Args:
+        text (`str`): LLM's output text to parse.
+
+    Returns:
+        `str`: Extracted code block.
+
+    Raises:
+        ValueError: If no valid code block is found in the text.
+    """
+    pattern = r"```(?:py|python)?\s*\n(.*?)\n```"
+    matches = re.findall(pattern, text, re.DOTALL)
+    if matches:
+        return "\n\n".join(match.strip() for match in matches)
+    # Maybe the LLM outputted a code blob directly
+    try:
+        ast.parse(text)
+        return text
+    except SyntaxError:
+        pass
+
+    if "final" in text and "answer" in text:
         raise ValueError(
-            f"""
-Your code snippet is invalid, because the regex pattern {pattern} was not found in it.
-Here is your code snippet:
-{code_blob}
-Make sure to include code with the correct pattern, for instance:
-Thoughts: Your thoughts
-Code:
-```py
-# Your python code here
-```<end_code>""".strip()
+            dedent(
+                f"""
+                Your code snippet is invalid, because the regex pattern {pattern} was not found in it.
+                Here is your code snippet:
+                {text}
+                It seems like you're trying to return the final answer, you can do it as follows:
+                Code:
+                ```py
+                final_answer("YOUR FINAL ANSWER HERE")
+                ```<end_code>
+                """
+            ).strip()
         )
-    return "\n\n".join(match.strip() for match in matches)
-
-
-def parse_json_tool_call(json_blob: str) -> Tuple[str, Union[str, None]]:
-    json_blob = json_blob.replace("```json", "").replace("```", "")
-    tool_call = parse_json_blob(json_blob)
-    tool_name_key, tool_arguments_key = None, None
-    for possible_tool_name_key in ["action", "tool_name", "tool", "name", "function"]:
-        if possible_tool_name_key in tool_call:
-            tool_name_key = possible_tool_name_key
-    for possible_tool_arguments_key in [
-        "action_input",
-        "tool_arguments",
-        "tool_args",
-        "parameters",
-    ]:
-        if possible_tool_arguments_key in tool_call:
-            tool_arguments_key = possible_tool_arguments_key
-    if tool_name_key is not None:
-        if tool_arguments_key is not None:
-            return tool_call[tool_name_key], tool_call[tool_arguments_key]
-        else:
-            return tool_call[tool_name_key], None
-    error_msg = "No tool name key found in tool call!" + f" Tool call: {json_blob}"
-    raise AgentParsingError(error_msg)
+    raise ValueError(
+        dedent(
+            f"""
+            Your code snippet is invalid, because the regex pattern {pattern} was not found in it.
+            Here is your code snippet:
+            {text}
+            Make sure to include code with the correct pattern, for instance:
+            Thoughts: Your thoughts
+            Code:
+            ```py
+            # Your python code here
+            ```<end_code>
+            """
+        ).strip()
+    )
 
 
 MAX_LENGTH_TRUNCATE_CONTENT = 20000
@@ -316,8 +334,14 @@ def instance_to_source(instance, base_cls=None):
         name: func
         for name, func in cls.__dict__.items()
         if callable(func)
-        and not (
-            base_cls and hasattr(base_cls, name) and getattr(base_cls, name).__code__.co_code == func.__code__.co_code
+        and (
+            not base_cls
+            or not hasattr(base_cls, name)
+            or (
+                isinstance(func, staticmethod)
+                or isinstance(func, classmethod)
+                or (getattr(base_cls, name).__code__.co_code != func.__code__.co_code)
+            )
         )
     }
 
@@ -382,7 +406,9 @@ def get_source(obj) -> str:
 
     inspect_error = None
     try:
-        return textwrap.dedent(inspect.getsource(obj)).strip()
+        # Handle dynamically created classes
+        source = getattr(obj, "__source__", None) or inspect.getsource(obj)
+        return dedent(source).strip()
     except OSError as e:
         # let's keep track of the exception to raise it if all further methods fail
         inspect_error = e
@@ -399,7 +425,7 @@ def get_source(obj) -> str:
         tree = ast.parse(all_cells)
         for node in ast.walk(tree):
             if isinstance(node, (ast.ClassDef, ast.FunctionDef)) and node.name == obj.__name__:
-                return textwrap.dedent("\n".join(all_cells.split("\n")[node.lineno - 1 : node.end_lineno])).strip()
+                return dedent("\n".join(all_cells.split("\n")[node.lineno - 1 : node.end_lineno])).strip()
         raise ValueError(f"Could not find source code for {obj.__name__} in IPython history")
     except ImportError:
         # IPython is not available, let's just raise the original inspect error
@@ -419,8 +445,12 @@ def make_image_url(base64_image):
     return f"data:image/png;base64,{base64_image}"
 
 
-def make_init_file(folder: str):
+def make_init_file(folder: str | Path):
     os.makedirs(folder, exist_ok=True)
     # Create __init__
     with open(os.path.join(folder, "__init__.py"), "w"):
         pass
+
+
+def is_valid_name(name: str) -> bool:
+    return name.isidentifier() and not keyword.iskeyword(name) if isinstance(name, str) else False
diff --git a/src/smolagents/vision_web_browser.py b/src/smolagents/vision_web_browser.py
index 46a07f99d..8886ec97e 100644
--- a/src/smolagents/vision_web_browser.py
+++ b/src/smolagents/vision_web_browser.py
@@ -3,8 +3,8 @@
 from time import sleep
 
 import helium
+import PIL.Image
 from dotenv import load_dotenv
-from PIL import Image
 from selenium import webdriver
 from selenium.webdriver.common.by import By
 from selenium.webdriver.common.keys import Keys
@@ -37,7 +37,7 @@ def parse_arguments():
         "--model-type",
         type=str,
         default="LiteLLMModel",
-        help="The model type to use (e.g., OpenAIServerModel, LiteLLMModel, TransformersModel, HfApiModel)",
+        help="The model type to use (e.g., OpenAIServerModel, LiteLLMModel, TransformersModel, InferenceClientModel)",
     )
     parser.add_argument(
         "--model-id",
@@ -45,6 +45,12 @@ def parse_arguments():
         default="gpt-4o",
         help="The model ID to use for the specified model type",
     )
+    parser.add_argument(
+        "--provider",
+        type=str,
+        default=None,
+        help="The inference provider to use for the model",
+    )
     return parser.parse_args()
 
 
@@ -57,7 +63,7 @@ def save_screenshot(memory_step: ActionStep, agent: CodeAgent) -> None:
             if isinstance(previous_memory_step, ActionStep) and previous_memory_step.step_number <= current_step - 2:
                 previous_memory_step.observations_images = None
         png_bytes = driver.get_screenshot_as_png()
-        image = Image.open(BytesIO(png_bytes))
+        image = PIL.Image.open(BytesIO(png_bytes))
         print(f"Captured a browser screenshot: {image.size} pixels")
         memory_step.observations_images = [image.copy()]  # Create a copy to ensure it persists, important!
 
@@ -187,23 +193,26 @@ def initialize_agent(model):
 """
 
 
-def main():
+def run_webagent(prompt: str, model_type: str, model_id: str, provider: str) -> None:
     # Load environment variables
     load_dotenv()
 
-    # Parse command line arguments
-    args = parse_arguments()
-
     # Initialize the model based on the provided arguments
-    model = load_model(args.model_type, args.model_id)
+    model = load_model(model_type, model_id, provider=provider, api_base=None, api_key=None)
 
     global driver
     driver = initialize_driver()
     agent = initialize_agent(model)
 
     # Run the agent with the provided prompt
-    agent.python_executor("from helium import *", agent.state)
-    agent.run(args.prompt + helium_instructions)
+    agent.python_executor("from helium import *")
+    agent.run(prompt + helium_instructions)
+
+
+def main() -> None:
+    # Parse command line arguments
+    args = parse_arguments()
+    run_webagent(args.prompt, args.model_type, args.model_id, args.provider)
 
 
 if __name__ == "__main__":
diff --git a/tests/conftest.py b/tests/conftest.py
index a3896e2db..cca27193a 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -6,6 +6,9 @@
 from smolagents.monitoring import LogLevel
 
 
+# Import fixture modules as plugins
+pytest_plugins = ["tests.fixtures.agents", "tests.fixtures.tools"]
+
 original_multi_step_agent_init = MultiStepAgent.__init__
 
 
diff --git a/tests/fixtures/000000039769.png b/tests/data/000000039769.png
similarity index 100%
rename from tests/fixtures/000000039769.png
rename to tests/data/000000039769.png
diff --git a/tests/fixtures/agents.py b/tests/fixtures/agents.py
new file mode 100644
index 000000000..450d7016b
--- /dev/null
+++ b/tests/fixtures/agents.py
@@ -0,0 +1,97 @@
+import pytest
+
+
+AGENT_DICTS = {
+    "v1.9": {
+        "tools": [],
+        "model": {
+            "class": "InferenceClientModel",
+            "data": {
+                "last_input_token_count": None,
+                "last_output_token_count": None,
+                "model_id": "Qwen/Qwen2.5-Coder-32B-Instruct",
+                "provider": None,
+            },
+        },
+        "managed_agents": {},
+        "prompt_templates": {
+            "system_prompt": "dummy system prompt",
+            "planning": {
+                "initial_facts": "dummy planning initial facts",
+                "initial_plan": "dummy planning initial plan",
+                "update_facts_pre_messages": "dummy planning update facts pre messages",
+                "update_facts_post_messages": "dummy planning update facts post messages",
+                "update_plan_pre_messages": "dummy planning update plan pre messages",
+                "update_plan_post_messages": "dummy planning update plan post messages",
+            },
+            "managed_agent": {
+                "task": "dummy managed agent task",
+                "report": "dummy managed agent report",
+            },
+            "final_answer": {
+                "pre_messages": "dummy final answer pre messages",
+                "post_messages": "dummy final answer post messages",
+            },
+        },
+        "max_steps": 10,
+        "verbosity_level": 2,
+        "grammar": None,
+        "planning_interval": 2,
+        "name": "test_agent",
+        "description": "dummy description",
+        "requirements": ["smolagents"],
+        "authorized_imports": ["pandas"],
+    },
+    # Added: executor_type, executor_kwargs, max_print_outputs_length
+    "v1.10": {
+        "tools": [],
+        "model": {
+            "class": "InferenceClientModel",
+            "data": {
+                "last_input_token_count": None,
+                "last_output_token_count": None,
+                "model_id": "Qwen/Qwen2.5-Coder-32B-Instruct",
+                "provider": None,
+            },
+        },
+        "managed_agents": {},
+        "prompt_templates": {
+            "system_prompt": "dummy system prompt",
+            "planning": {
+                "initial_facts": "dummy planning initial facts",
+                "initial_plan": "dummy planning initial plan",
+                "update_facts_pre_messages": "dummy planning update facts pre messages",
+                "update_facts_post_messages": "dummy planning update facts post messages",
+                "update_plan_pre_messages": "dummy planning update plan pre messages",
+                "update_plan_post_messages": "dummy planning update plan post messages",
+            },
+            "managed_agent": {
+                "task": "dummy managed agent task",
+                "report": "dummy managed agent report",
+            },
+            "final_answer": {
+                "pre_messages": "dummy final answer pre messages",
+                "post_messages": "dummy final answer post messages",
+            },
+        },
+        "max_steps": 10,
+        "verbosity_level": 2,
+        "grammar": None,
+        "planning_interval": 2,
+        "name": "test_agent",
+        "description": "dummy description",
+        "requirements": ["smolagents"],
+        "authorized_imports": ["pandas"],
+        "executor_type": "local",
+        "executor_kwargs": {},
+        "max_print_outputs_length": None,
+    },
+}
+
+
+@pytest.fixture
+def get_agent_dict():
+    def _get_agent_dict(agent_dict_key):
+        return AGENT_DICTS[agent_dict_key]
+
+    return _get_agent_dict
diff --git a/tests/fixtures/tools.py b/tests/fixtures/tools.py
new file mode 100644
index 000000000..dae7ea576
--- /dev/null
+++ b/tests/fixtures/tools.py
@@ -0,0 +1,87 @@
+import pytest
+
+from smolagents.tools import Tool, tool
+
+
+@pytest.fixture
+def example_tool():
+    @tool
+    def valid_tool_function(input: str) -> str:
+        """A valid tool function.
+
+        Args:
+            input (str): Input string.
+        """
+        return input.upper()
+
+    return valid_tool_function
+
+
+@pytest.fixture
+def boolean_default_tool_class():
+    class BooleanDefaultTool(Tool):
+        name = "boolean_default_tool"
+        description = "A tool with a boolean default parameter"
+        inputs = {
+            "text": {"type": "string", "description": "Input text"},
+            "flag": {"type": "boolean", "description": "Boolean flag with default value", "nullable": True},
+        }
+        output_type = "string"
+
+        def forward(self, text: str, flag: bool = False) -> str:
+            return f"Text: {text}, Flag: {flag}"
+
+    return BooleanDefaultTool()
+
+
+@pytest.fixture
+def boolean_default_tool_function():
+    @tool
+    def boolean_default_tool(text: str, flag: bool = False) -> str:
+        """
+        A tool with a boolean default parameter.
+
+        Args:
+            text: Input text
+            flag: Boolean flag with default value
+        """
+        return f"Text: {text}, Flag: {flag}"
+
+    return boolean_default_tool
+
+
+@pytest.fixture
+def optional_input_tool_class():
+    class OptionalInputTool(Tool):
+        name = "optional_input_tool"
+        description = "A tool with an optional input parameter"
+        inputs = {
+            "required_text": {"type": "string", "description": "Required input text"},
+            "optional_text": {"type": "string", "description": "Optional input text", "nullable": True},
+        }
+        output_type = "string"
+
+        def forward(self, required_text: str, optional_text: str | None = None) -> str:
+            if optional_text:
+                return f"{required_text} + {optional_text}"
+            return required_text
+
+    return OptionalInputTool()
+
+
+@pytest.fixture
+def optional_input_tool_function():
+    @tool
+    def optional_input_tool(required_text: str, optional_text: str | None = None) -> str:
+        """
+        A tool with an optional input parameter.
+
+        Args:
+            required_text: Required input text
+            optional_text: Optional input text
+        """
+        if optional_text:
+            return f"{required_text} + {optional_text}"
+        return required_text
+
+    return optional_input_tool
diff --git a/tests/test_agents.py b/tests/test_agents.py
index 376cc0869..826c2f2e4 100644
--- a/tests/test_agents.py
+++ b/tests/test_agents.py
@@ -12,18 +12,28 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import io
 import os
 import tempfile
 import unittest
 import uuid
+from contextlib import nullcontext as does_not_raise
 from pathlib import Path
-from unittest.mock import MagicMock
+from typing import Any
+from unittest.mock import MagicMock, patch
 
 import pytest
-from transformers.testing_utils import get_tests_dir
+from huggingface_hub import (
+    ChatCompletionOutputFunctionDefinition,
+    ChatCompletionOutputMessage,
+    ChatCompletionOutputToolCall,
+)
+from rich.console import Console
 
+from smolagents import EMPTY_PROMPT_TEMPLATES
 from smolagents.agent_types import AgentImage, AgentText
 from smolagents.agents import (
+    AgentError,
     AgentMaxStepsError,
     CodeAgent,
     MultiStepAgent,
@@ -31,18 +41,20 @@
     ToolCallingAgent,
     populate_template,
 )
-from smolagents.default_tools import DuckDuckGoSearchTool, PythonInterpreterTool, VisitWebpageTool
-from smolagents.memory import PlanningStep
+from smolagents.default_tools import DuckDuckGoSearchTool, FinalAnswerTool, PythonInterpreterTool, VisitWebpageTool
+from smolagents.memory import ActionStep, PlanningStep
 from smolagents.models import (
     ChatMessage,
     ChatMessageToolCall,
     ChatMessageToolCallDefinition,
-    HfApiModel,
+    InferenceClientModel,
     MessageRole,
+    Model,
     TransformersModel,
 )
-from smolagents.tools import tool
-from smolagents.utils import BASE_BUILTIN_MODULES
+from smolagents.monitoring import AgentLogger, LogLevel
+from smolagents.tools import Tool, tool
+from smolagents.utils import BASE_BUILTIN_MODULES, AgentExecutionError, AgentGenerationError, AgentToolCallError
 
 
 def get_new_path(suffix="") -> str:
@@ -50,8 +62,15 @@ def get_new_path(suffix="") -> str:
     return os.path.join(directory, str(uuid.uuid4()) + suffix)
 
 
-class FakeToolCallModel:
-    def __call__(self, messages, tools_to_call_from=None, stop_sequences=None, grammar=None):
+@pytest.fixture
+def agent_logger():
+    return AgentLogger(
+        LogLevel.DEBUG, console=Console(record=True, no_color=True, force_terminal=False, file=io.StringIO())
+    )
+
+
+class FakeToolCallModel(Model):
+    def generate(self, messages, tools_to_call_from=None, stop_sequences=None, grammar=None):
         if len(messages) < 3:
             return ChatMessage(
                 role="assistant",
@@ -80,8 +99,8 @@ def __call__(self, messages, tools_to_call_from=None, stop_sequences=None, gramm
             )
 
 
-class FakeToolCallModelImage:
-    def __call__(self, messages, tools_to_call_from=None, stop_sequences=None, grammar=None):
+class FakeToolCallModelImage(Model):
+    def generate(self, messages, tools_to_call_from=None, stop_sequences=None, grammar=None):
         if len(messages) < 3:
             return ChatMessage(
                 role="assistant",
@@ -111,8 +130,8 @@ def __call__(self, messages, tools_to_call_from=None, stop_sequences=None, gramm
             )
 
 
-class FakeToolCallModelVL:
-    def __call__(self, messages, tools_to_call_from=None, stop_sequences=None, grammar=None):
+class FakeToolCallModelVL(Model):
+    def generate(self, messages, tools_to_call_from=None, stop_sequences=None, grammar=None):
         if len(messages) < 3:
             return ChatMessage(
                 role="assistant",
@@ -145,38 +164,40 @@ def __call__(self, messages, tools_to_call_from=None, stop_sequences=None, gramm
             )
 
 
-def fake_code_model(messages, stop_sequences=None, grammar=None) -> str:
-    prompt = str(messages)
-    if "special_marker" not in prompt:
-        return ChatMessage(
-            role="assistant",
-            content="""
+class FakeCodeModel(Model):
+    def generate(self, messages, stop_sequences=None, grammar=None):
+        prompt = str(messages)
+        if "special_marker" not in prompt:
+            return ChatMessage(
+                role="assistant",
+                content="""
 Thought: I should multiply 2 by 3.6452. special_marker
 Code:
 ```py
 result = 2**3.6452
 ```<end_code>
 """,
-        )
-    else:  # We're at step 2
-        return ChatMessage(
-            role="assistant",
-            content="""
+            )
+        else:  # We're at step 2
+            return ChatMessage(
+                role="assistant",
+                content="""
 Thought: I can now answer the initial question
 Code:
 ```py
 final_answer(7.2904)
 ```<end_code>
 """,
-        )
+            )
 
 
-def fake_code_model_error(messages, stop_sequences=None) -> str:
-    prompt = str(messages)
-    if "special_marker" not in prompt:
-        return ChatMessage(
-            role="assistant",
-            content="""
+class FakeCodeModelError(Model):
+    def generate(self, messages, stop_sequences=None):
+        prompt = str(messages)
+        if "special_marker" not in prompt:
+            return ChatMessage(
+                role="assistant",
+                content="""
 Thought: I should multiply 2 by 3.6452. special_marker
 Code:
 ```py
@@ -187,26 +208,27 @@ def error_function():
 error_function()
 ```<end_code>
 """,
-        )
-    else:  # We're at step 2
-        return ChatMessage(
-            role="assistant",
-            content="""
+            )
+        else:  # We're at step 2
+            return ChatMessage(
+                role="assistant",
+                content="""
 Thought: I faced an error in the previous step.
 Code:
 ```py
 final_answer("got an error")
 ```<end_code>
 """,
-        )
+            )
 
 
-def fake_code_model_syntax_error(messages, stop_sequences=None) -> str:
-    prompt = str(messages)
-    if "special_marker" not in prompt:
-        return ChatMessage(
-            role="assistant",
-            content="""
+class FakeCodeModelSyntaxError(Model):
+    def generate(self, messages, stop_sequences=None):
+        prompt = str(messages)
+        if "special_marker" not in prompt:
+            return ChatMessage(
+                role="assistant",
+                content="""
 Thought: I should multiply 2 by 3.6452. special_marker
 Code:
 ```py
@@ -216,24 +238,25 @@ def fake_code_model_syntax_error(messages, stop_sequences=None) -> str:
 print("Ok, calculation done!")
 ```<end_code>
 """,
-        )
-    else:  # We're at step 2
-        return ChatMessage(
-            role="assistant",
-            content="""
+            )
+        else:  # We're at step 2
+            return ChatMessage(
+                role="assistant",
+                content="""
 Thought: I can now answer the initial question
 Code:
 ```py
 final_answer("got an error")
 ```<end_code>
 """,
-        )
+            )
 
 
-def fake_code_model_import(messages, stop_sequences=None) -> str:
-    return ChatMessage(
-        role="assistant",
-        content="""
+class FakeCodeModelImport(Model):
+    def generate(self, messages, stop_sequences=None):
+        return ChatMessage(
+            role="assistant",
+            content="""
 Thought: I can answer the question
 Code:
 ```py
@@ -241,15 +264,16 @@ def fake_code_model_import(messages, stop_sequences=None) -> str:
 final_answer("got an error")
 ```<end_code>
 """,
-    )
+        )
 
 
-def fake_code_functiondef(messages, stop_sequences=None) -> str:
-    prompt = str(messages)
-    if "special_marker" not in prompt:
-        return ChatMessage(
-            role="assistant",
-            content="""
+class FakeCodeModelFunctionDef(Model):
+    def generate(self, messages, stop_sequences=None):
+        prompt = str(messages)
+        if "special_marker" not in prompt:
+            return ChatMessage(
+                role="assistant",
+                content="""
 Thought: Let's define the function. special_marker
 Code:
 ```py
@@ -258,12 +282,12 @@ def fake_code_functiondef(messages, stop_sequences=None) -> str:
 def moving_average(x, w):
     return np.convolve(x, np.ones(w), 'valid') / w
 ```<end_code>
-""",
-        )
-    else:  # We're at step 2
-        return ChatMessage(
-            role="assistant",
-            content="""
+    """,
+            )
+        else:  # We're at step 2
+            return ChatMessage(
+                role="assistant",
+                content="""
 Thought: I can now answer the initial question
 Code:
 ```py
@@ -272,13 +296,14 @@ def moving_average(x, w):
 final_answer(res)
 ```<end_code>
 """,
-        )
+            )
 
 
-def fake_code_model_single_step(messages, stop_sequences=None, grammar=None) -> str:
-    return ChatMessage(
-        role="assistant",
-        content="""
+class FakeCodeModelSingleStep(Model):
+    def generate(self, messages, stop_sequences=None, grammar=None):
+        return ChatMessage(
+            role="assistant",
+            content="""
 Thought: I should multiply 2 by 3.6452. special_marker
 Code:
 ```py
@@ -286,13 +311,14 @@ def fake_code_model_single_step(messages, stop_sequences=None, grammar=None) ->
 final_answer(result)
 ```
 """,
-    )
+        )
 
 
-def fake_code_model_no_return(messages, stop_sequences=None, grammar=None) -> str:
-    return ChatMessage(
-        role="assistant",
-        content="""
+class FakeCodeModelNoReturn(Model):
+    def generate(self, messages, stop_sequences=None, grammar=None):
+        return ChatMessage(
+            role="assistant",
+            content="""
 Thought: I should multiply 2 by 3.6452. special_marker
 Code:
 ```py
@@ -300,10 +326,10 @@ def fake_code_model_no_return(messages, stop_sequences=None, grammar=None) -> st
 print(result)
 ```
 """,
-    )
+        )
 
 
-class AgentTests(unittest.TestCase):
+class TestAgent:
     def test_fake_toolcalling_agent(self):
         agent = ToolCallingAgent(tools=[PythonInterpreterTool()], model=FakeToolCallModel())
         output = agent.run("What is 2 multiplied by 3.6452?")
@@ -311,32 +337,35 @@ def test_fake_toolcalling_agent(self):
         assert "7.2904" in output
         assert agent.memory.steps[0].task == "What is 2 multiplied by 3.6452?"
         assert "7.2904" in agent.memory.steps[1].observations
-        assert agent.memory.steps[2].model_output is None
+        assert agent.memory.steps[2].model_output == "Called Tool: 'final_answer' with arguments: {'answer': '7.2904'}"
 
-    def test_toolcalling_agent_handles_image_tool_outputs(self):
-        from PIL import Image
+    def test_toolcalling_agent_handles_image_tool_outputs(self, shared_datadir):
+        import PIL.Image
 
         @tool
-        def fake_image_generation_tool(prompt: str) -> Image.Image:
+        def fake_image_generation_tool(prompt: str) -> PIL.Image.Image:
             """Tool that generates an image.
 
             Args:
                 prompt: The prompt
             """
-            return Image.open(Path(get_tests_dir("fixtures")) / "000000039769.png")
+
+            import PIL.Image
+
+            return PIL.Image.open(shared_datadir / "000000039769.png")
 
         agent = ToolCallingAgent(tools=[fake_image_generation_tool], model=FakeToolCallModelImage())
         output = agent.run("Make me an image.")
         assert isinstance(output, AgentImage)
-        assert isinstance(agent.state["image.png"], Image.Image)
+        assert isinstance(agent.state["image.png"], PIL.Image.Image)
 
-    def test_toolcalling_agent_handles_image_inputs(self):
-        from PIL import Image
+    def test_toolcalling_agent_handles_image_inputs(self, shared_datadir):
+        import PIL.Image
 
-        image = Image.open(Path(get_tests_dir("fixtures")) / "000000039769.png")  # dummy input
+        image = PIL.Image.open(shared_datadir / "000000039769.png")  # dummy input
 
         @tool
-        def fake_image_understanding_tool(prompt: str, image: Image.Image) -> str:
+        def fake_image_understanding_tool(prompt: str, image: PIL.Image.Image) -> str:
             """Tool that creates a caption for an image.
 
             Args:
@@ -350,7 +379,7 @@ def fake_image_understanding_tool(prompt: str, image: Image.Image) -> str:
         assert output == "The image is a cat."
 
     def test_fake_code_agent(self):
-        agent = CodeAgent(tools=[PythonInterpreterTool()], model=fake_code_model)
+        agent = CodeAgent(tools=[PythonInterpreterTool()], model=FakeCodeModel())
         output = agent.run("What is 2 multiplied by 3.6452?")
         assert isinstance(output, float)
         assert output == 7.2904
@@ -360,16 +389,15 @@ def test_fake_code_agent(self):
         ]
 
     def test_additional_args_added_to_task(self):
-        agent = CodeAgent(tools=[], model=fake_code_model)
+        agent = CodeAgent(tools=[], model=FakeCodeModel())
         agent.run(
             "What is 2 multiplied by 3.6452?",
             additional_args={"instruction": "Remember this."},
         )
         assert "Remember this" in agent.task
-        assert "Remember this" in str(agent.input_messages)
 
     def test_reset_conversations(self):
-        agent = CodeAgent(tools=[PythonInterpreterTool()], model=fake_code_model)
+        agent = CodeAgent(tools=[PythonInterpreterTool()], model=FakeCodeModel())
         output = agent.run("What is 2 multiplied by 3.6452?", reset=True)
         assert output == 7.2904
         assert len(agent.memory.steps) == 3
@@ -382,33 +410,13 @@ def test_reset_conversations(self):
         assert output == 7.2904
         assert len(agent.memory.steps) == 3
 
-    def test_code_agent_code_errors_show_offending_line_and_error(self):
-        agent = CodeAgent(tools=[PythonInterpreterTool()], model=fake_code_model_error)
-        output = agent.run("What is 2 multiplied by 3.6452?")
-        assert isinstance(output, AgentText)
-        assert output == "got an error"
-        assert "Code execution failed at line 'error_function()'" in str(agent.memory.steps[1].error)
-        assert "ValueError" in str(agent.memory.steps)
-
-    def test_code_agent_code_error_saves_previous_print_outputs(self):
-        agent = CodeAgent(tools=[PythonInterpreterTool()], model=fake_code_model_error)
-        agent.run("What is 2 multiplied by 3.6452?")
-        assert "Flag!" in str(agent.memory.steps[1].observations)
-
-    def test_code_agent_syntax_error_show_offending_lines(self):
-        agent = CodeAgent(tools=[PythonInterpreterTool()], model=fake_code_model_syntax_error)
-        output = agent.run("What is 2 multiplied by 3.6452?")
-        assert isinstance(output, AgentText)
-        assert output == "got an error"
-        assert '    print("Failing due to unexpected indent")' in str(agent.memory.steps)
-
     def test_setup_agent_with_empty_toolbox(self):
         ToolCallingAgent(model=FakeToolCallModel(), tools=[])
 
     def test_fails_max_steps(self):
         agent = CodeAgent(
             tools=[PythonInterpreterTool()],
-            model=fake_code_model_no_return,  # use this callable because it never ends
+            model=FakeCodeModelNoReturn(),  # use this callable because it never ends
             max_steps=5,
         )
         answer = agent.run("What is 2 multiplied by 3.6452?")
@@ -416,49 +424,60 @@ def test_fails_max_steps(self):
         assert type(agent.memory.steps[-1].error) is AgentMaxStepsError
         assert isinstance(answer, str)
 
+        agent = CodeAgent(
+            tools=[PythonInterpreterTool()],
+            model=FakeCodeModelNoReturn(),  # use this callable because it never ends
+            max_steps=5,
+        )
+        answer = agent.run("What is 2 multiplied by 3.6452?", max_steps=3)
+        assert len(agent.memory.steps) == 5  # Task step + 3 action steps + Final answer
+        assert type(agent.memory.steps[-1].error) is AgentMaxStepsError
+        assert isinstance(answer, str)
+
     def test_tool_descriptions_get_baked_in_system_prompt(self):
         tool = PythonInterpreterTool()
         tool.name = "fake_tool_name"
         tool.description = "fake_tool_description"
-        agent = CodeAgent(tools=[tool], model=fake_code_model)
+        agent = CodeAgent(tools=[tool], model=FakeCodeModel())
         agent.run("Empty task")
-        assert tool.name in agent.system_prompt
-        assert tool.description in agent.system_prompt
+        assert agent.system_prompt is not None
+        assert f"def {tool.name}(" in agent.system_prompt
+        assert f'"""{tool.description}' in agent.system_prompt
 
     def test_module_imports_get_baked_in_system_prompt(self):
-        agent = CodeAgent(tools=[], model=fake_code_model)
+        agent = CodeAgent(tools=[], model=FakeCodeModel())
         agent.run("Empty task")
         for module in BASE_BUILTIN_MODULES:
             assert module in agent.system_prompt
 
     def test_init_agent_with_different_toolsets(self):
         toolset_1 = []
-        agent = CodeAgent(tools=toolset_1, model=fake_code_model)
+        agent = CodeAgent(tools=toolset_1, model=FakeCodeModel())
         assert len(agent.tools) == 1  # when no tools are provided, only the final_answer tool is added by default
 
         toolset_2 = [PythonInterpreterTool(), PythonInterpreterTool()]
         with pytest.raises(ValueError) as e:
-            agent = CodeAgent(tools=toolset_2, model=fake_code_model)
+            agent = CodeAgent(tools=toolset_2, model=FakeCodeModel())
         assert "Each tool or managed_agent should have a unique name!" in str(e)
 
         with pytest.raises(ValueError) as e:
             agent.name = "python_interpreter"
             agent.description = "empty"
-            CodeAgent(tools=[PythonInterpreterTool()], model=fake_code_model, managed_agents=[agent])
+            CodeAgent(tools=[PythonInterpreterTool()], model=FakeCodeModel(), managed_agents=[agent])
         assert "Each tool or managed_agent should have a unique name!" in str(e)
 
         # check that python_interpreter base tool does not get added to CodeAgent
-        agent = CodeAgent(tools=[], model=fake_code_model, add_base_tools=True)
+        agent = CodeAgent(tools=[], model=FakeCodeModel(), add_base_tools=True)
         assert len(agent.tools) == 3  # added final_answer tool + search + visit_webpage
 
         # check that python_interpreter base tool gets added to ToolCallingAgent
-        agent = ToolCallingAgent(tools=[], model=fake_code_model, add_base_tools=True)
+        agent = ToolCallingAgent(tools=[], model=FakeCodeModel(), add_base_tools=True)
         assert len(agent.tools) == 4  # added final_answer tool + search + visit_webpage
 
     def test_function_persistence_across_steps(self):
         agent = CodeAgent(
             tools=[],
-            model=fake_code_functiondef,
+            model=FakeCodeModelFunctionDef(),
             max_steps=2,
             additional_authorized_imports=["numpy"],
         )
@@ -466,58 +485,64 @@ def test_function_persistence_across_steps(self):
         assert res[0] == 0.5
 
     def test_init_managed_agent(self):
-        agent = CodeAgent(tools=[], model=fake_code_functiondef, name="managed_agent", description="Empty")
+        agent = CodeAgent(tools=[], model=FakeCodeModelFunctionDef(), name="managed_agent", description="Empty")
         assert agent.name == "managed_agent"
         assert agent.description == "Empty"
 
     def test_agent_description_gets_correctly_inserted_in_system_prompt(self):
-        managed_agent = CodeAgent(tools=[], model=fake_code_functiondef, name="managed_agent", description="Empty")
+        managed_agent = CodeAgent(
+            tools=[], model=FakeCodeModelFunctionDef(), name="managed_agent", description="Empty"
+        )
         manager_agent = CodeAgent(
             tools=[],
-            model=fake_code_functiondef,
+            model=FakeCodeModelFunctionDef(),
             managed_agents=[managed_agent],
         )
         assert "You can also give tasks to team members." not in managed_agent.system_prompt
         assert "{{managed_agents_descriptions}}" not in managed_agent.system_prompt
         assert "You can also give tasks to team members." in manager_agent.system_prompt
 
-    def test_code_agent_missing_import_triggers_advice_in_error_log(self):
-        # Set explicit verbosity level to 1 to override the default verbosity level of -1 set in CI fixture
-        agent = CodeAgent(tools=[], model=fake_code_model_import, verbosity_level=1)
-
-        with agent.logger.console.capture() as capture:
-            agent.run("Count to 3")
-        str_output = capture.get()
-        assert "`additional_authorized_imports`" in str_output.replace("\n", "")
-
-    def test_replay_shows_logs(self):
+    def test_replay_shows_logs(self, agent_logger):
         agent = CodeAgent(
-            tools=[], model=fake_code_model_import, verbosity_level=0, additional_authorized_imports=["numpy"]
+            tools=[],
+            model=FakeCodeModelImport(),
+            verbosity_level=0,
+            additional_authorized_imports=["numpy"],
+            logger=agent_logger,
         )
         agent.run("Count to 3")
 
-        with agent.logger.console.capture() as capture:
-            agent.replay()
-        str_output = capture.get().replace("\n", "")
+        str_output = agent_logger.console.export_text()
+
         assert "New run" in str_output
-        assert "Agent output:" in str_output
         assert 'final_answer("got' in str_output
         assert "```<end_code>" in str_output
 
+        agent = ToolCallingAgent(tools=[PythonInterpreterTool()], model=FakeToolCallModel(), verbosity_level=0)
+        agent.logger = agent_logger
+
+        agent.run("What is 2 multiplied by 3.6452?")
+        agent.replay()
+
+        str_output = agent_logger.console.export_text()
+        assert "Called Tool" in str_output
+        assert "arguments" in str_output
+
     def test_code_nontrivial_final_answer_works(self):
-        def fake_code_model_final_answer(messages, stop_sequences=None, grammar=None):
-            return ChatMessage(
-                role="assistant",
-                content="""Code:
+        class FakeCodeModelFinalAnswer(Model):
+            def generate(self, messages, stop_sequences=None, grammar=None):
+                return ChatMessage(
+                    role="assistant",
+                    content="""Code:
 ```py
 def nested_answer():
     final_answer("Correct!")
 
 nested_answer()
 ```<end_code>""",
-            )
+                )
 
-        agent = CodeAgent(tools=[], model=fake_code_model_final_answer)
+        agent = CodeAgent(tools=[], model=FakeCodeModelFinalAnswer())
 
         output = agent.run("Count to 3")
         assert output == "Correct!"
@@ -541,9 +566,10 @@ def weather_api(location: str, celsius: bool = False) -> str:
             device_map="auto",
             do_sample=False,
         )
-        agent = ToolCallingAgent(model=model, tools=[weather_api], max_steps=1)
-        agent.run("What's the weather in Paris?")
-        assert agent.memory.steps[0].task == "What's the weather in Paris?"
+        agent = ToolCallingAgent(model=model, tools=[weather_api], max_steps=1, verbosity_level=10)
+        task = "What is the weather in Paris? "
+        agent.run(task)
+        assert agent.memory.steps[0].task == task
         assert agent.memory.steps[1].tool_calls[0].name == "weather_api"
         step_memory_dict = agent.memory.get_succinct_steps()[1]
         assert step_memory_dict["model_output_message"].tool_calls[0].function.name == "weather_api"
@@ -554,31 +580,109 @@ def test_final_answer_checks(self):
         def check_always_fails(final_answer, agent_memory):
             assert False, "Error raised in check"
 
-        agent = CodeAgent(model=fake_code_model, tools=[], final_answer_checks=[check_always_fails])
+        agent = CodeAgent(model=FakeCodeModel(), tools=[], final_answer_checks=[check_always_fails])
         agent.run("Dummy task.")
         assert "Error raised in check" in str(agent.write_memory_to_messages())
 
+    def test_generation_errors_are_raised(self):
+        class FakeCodeModel(Model):
+            def generate(self, messages, stop_sequences=None, grammar=None):
+                assert False, "Generation failed"
+
+        agent = CodeAgent(model=FakeCodeModel(), tools=[])
+        with pytest.raises(AgentGenerationError) as e:
+            agent.run("Dummy task.")
+        assert len(agent.memory.steps) == 2
+        assert "Generation failed" in str(e)
+
+
+class CustomFinalAnswerTool(FinalAnswerTool):
+    def forward(self, answer) -> str:
+        return answer + "CUSTOM"
+
+
+class MockTool(Tool):
+    def __init__(self, name):
+        self.name = name
+        self.description = "Mock tool description"
+        self.inputs = {}
+        self.output_type = "string"
+
+    def forward(self):
+        return "Mock tool output"
+
+
+class MockAgent:
+    def __init__(self, name, tools, description="Mock agent description"):
+        self.name = name
+        self.tools = {t.name: t for t in tools}
+        self.description = description
+
+
+class DummyMultiStepAgent(MultiStepAgent):
+    def step(self, memory_step: ActionStep) -> None | Any:
+        return super().step(memory_step)
+
+    def initialize_system_prompt(self):
+        pass
+
 
 class TestMultiStepAgent:
     def test_instantiation_disables_logging_to_terminal(self):
         fake_model = MagicMock()
-        agent = MultiStepAgent(tools=[], model=fake_model)
+        agent = DummyMultiStepAgent(tools=[], model=fake_model)
         assert agent.logger.level == -1, "logging to terminal should be disabled for testing using a fixture"
 
     def test_instantiation_with_prompt_templates(self, prompt_templates):
-        agent = MultiStepAgent(tools=[], model=MagicMock(), prompt_templates=prompt_templates)
+        agent = DummyMultiStepAgent(tools=[], model=MagicMock(), prompt_templates=prompt_templates)
         assert agent.prompt_templates == prompt_templates
         assert agent.prompt_templates["system_prompt"] == "This is a test system prompt."
         assert "managed_agent" in agent.prompt_templates
         assert agent.prompt_templates["managed_agent"]["task"] == "Task for {{name}}: {{task}}"
         assert agent.prompt_templates["managed_agent"]["report"] == "Report for {{name}}: {{final_answer}}"
 
+    @pytest.mark.parametrize(
+        "tools, expected_final_answer_tool",
+        [([], FinalAnswerTool), ([CustomFinalAnswerTool()], CustomFinalAnswerTool)],
+    )
+    def test_instantiation_with_final_answer_tool(self, tools, expected_final_answer_tool):
+        agent = DummyMultiStepAgent(tools=tools, model=MagicMock())
+        assert "final_answer" in agent.tools
+        assert isinstance(agent.tools["final_answer"], expected_final_answer_tool)
+
+    def test_logs_display_thoughts_even_if_error(self):
+        class FakeJsonModelNoCall(Model):
+            def generate(self, messages, stop_sequences=None, tools_to_call_from=None):
+                return ChatMessage(
+                    role="assistant",
+                    content="""I don't want to call tools today""",
+                    tool_calls=None,
+                    raw="""I don't want to call tools today""",
+                )
+
+        agent_toolcalling = ToolCallingAgent(model=FakeJsonModelNoCall(), tools=[], max_steps=1, verbosity_level=10)
+        with agent_toolcalling.logger.console.capture() as capture:
+            agent_toolcalling.run("Dummy task")
+        assert "don't" in capture.get() and "want" in capture.get()
+
+        class FakeCodeModelNoCall(Model):
+            def generate(self, messages, stop_sequences=None):
+                return ChatMessage(
+                    role="assistant",
+                    content="""I don't want to write an action today""",
+                )
+
+        agent_code = CodeAgent(model=FakeCodeModelNoCall(), tools=[], max_steps=1, verbosity_level=10)
+        with agent_code.logger.console.capture() as capture:
+            agent_code.run("Dummy task")
+        assert "don't" in capture.get() and "want" in capture.get()
+
     def test_step_number(self):
         fake_model = MagicMock()
         fake_model.last_input_token_count = 10
         fake_model.last_output_token_count = 20
         max_steps = 2
-        agent = MultiStepAgent(tools=[], model=fake_model, max_steps=max_steps)
+        agent = DummyMultiStepAgent(tools=[], model=fake_model, max_steps=max_steps)
         assert hasattr(agent, "step_number"), "step_number attribute should be defined"
         assert agent.step_number == 0, "step_number should be initialized to 0"
         agent.run("Test task")
@@ -591,20 +695,12 @@ def test_step_number(self):
             (
                 1,
                 [
-                    [{"role": MessageRole.USER, "content": [{"type": "text", "text": "INITIAL_FACTS_USER_PROMPT"}]}],
                     [{"role": MessageRole.USER, "content": [{"type": "text", "text": "INITIAL_PLAN_USER_PROMPT"}]}],
                 ],
             ),
             (
                 2,
                 [
-                    [
-                        {
-                            "role": MessageRole.SYSTEM,
-                            "content": [{"type": "text", "text": "UPDATE_FACTS_SYSTEM_PROMPT"}],
-                        },
-                        {"role": MessageRole.USER, "content": [{"type": "text", "text": "UPDATE_FACTS_USER_PROMPT"}]},
-                    ],
                     [
                         {
                             "role": MessageRole.SYSTEM,
@@ -623,22 +719,17 @@ def test_planning_step(self, step, expected_messages_list):
             model=fake_model,
         )
         task = "Test task"
-        agent.planning_step(task, is_first_step=(step == 1), step=step)
+        planning_step = agent._generate_planning_step(task, is_first_step=(step == 1), step=step)
         expected_message_texts = {
-            "INITIAL_FACTS_USER_PROMPT": populate_template(
-                agent.prompt_templates["planning"]["initial_facts"], variables=dict(task=task)
-            ),
             "INITIAL_PLAN_USER_PROMPT": populate_template(
                 agent.prompt_templates["planning"]["initial_plan"],
                 variables=dict(
                     task=task,
                     tools=agent.tools,
                     managed_agents=agent.managed_agents,
-                    answer_facts=agent.memory.steps[0].model_output_message_facts.content,
+                    answer_facts=planning_step.model_output_message.content,
                 ),
             ),
-            "UPDATE_FACTS_SYSTEM_PROMPT": agent.prompt_templates["planning"]["update_facts_pre_messages"],
-            "UPDATE_FACTS_USER_PROMPT": agent.prompt_templates["planning"]["update_facts_post_messages"],
             "UPDATE_PLAN_SYSTEM_PROMPT": populate_template(
                 agent.prompt_templates["planning"]["update_plan_pre_messages"], variables=dict(task=task)
             ),
@@ -648,7 +739,7 @@ def test_planning_step(self, step, expected_messages_list):
                     task=task,
                     tools=agent.tools,
                     managed_agents=agent.managed_agents,
-                    facts_update=agent.memory.steps[0].model_output_message_facts.content,
+                    facts_update=planning_step.model_output_message.content,
                     remaining_steps=agent.max_steps - step,
                 ),
             ),
@@ -657,8 +748,6 @@ def test_planning_step(self, step, expected_messages_list):
             for expected_message in expected_messages:
                 for expected_content in expected_message["content"]:
                     expected_content["text"] = expected_message_texts[expected_content["text"]]
-        assert len(agent.memory.steps) == 1
-        planning_step = agent.memory.steps[0]
         assert isinstance(planning_step, PlanningStep)
         expected_model_input_messages = expected_messages_list[0]
         model_input_messages = planning_step.model_input_messages
@@ -675,7 +764,7 @@ def test_planning_step(self, step, expected_messages_list):
             for content, expected_content in zip(message["content"], expected_message["content"]):
                 assert content == expected_content
         # Test calls to model
-        assert len(fake_model.call_args_list) == 2
+        assert len(fake_model.call_args_list) == 1
         for call_args, expected_messages in zip(fake_model.call_args_list, expected_messages_list):
             assert len(call_args.args) == 1
             messages = call_args.args[0]
@@ -760,6 +849,196 @@ def test_provide_final_answer(self, images, expected_messages_list):
                 for content, expected_content in zip(message["content"], expected_message["content"]):
                     assert content == expected_content
 
+    def test_interrupt(self):
+        fake_model = MagicMock()
+        fake_model.return_value.content = "Model output."
+        fake_model.last_input_token_count = None
+
+        def interrupt_callback(memory_step, agent):
+            agent.interrupt()
+
+        agent = CodeAgent(
+            tools=[],
+            model=fake_model,
+            step_callbacks=[interrupt_callback],
+        )
+        with pytest.raises(AgentError) as e:
+            agent.run("Test task")
+        assert "Agent interrupted" in str(e)
+
+    @pytest.mark.parametrize(
+        "tools, managed_agents, name, expectation",
+        [
+            # Valid case: no duplicates
+            (
+                [MockTool("tool1"), MockTool("tool2")],
+                [MockAgent("agent1", [MockTool("tool3")])],
+                "test_agent",
+                does_not_raise(),
+            ),
+            # Invalid case: duplicate tool names
+            ([MockTool("tool1"), MockTool("tool1")], [], "test_agent", pytest.raises(ValueError)),
+            # Invalid case: tool name same as managed agent name
+            (
+                [MockTool("tool1")],
+                [MockAgent("tool1", [MockTool("final_answer")])],
+                "test_agent",
+                pytest.raises(ValueError),
+            ),
+            # Valid case: tool name same as managed agent's tool name
+            ([MockTool("tool1")], [MockAgent("agent1", [MockTool("tool1")])], "test_agent", does_not_raise()),
+            # Invalid case: duplicate managed agent name and managed agent tool name
+            ([MockTool("tool1")], [], "tool1", pytest.raises(ValueError)),
+            # Valid case: duplicate tool names across managed agents
+            (
+                [MockTool("tool1")],
+                [
+                    MockAgent("agent1", [MockTool("tool2"), MockTool("final_answer")]),
+                    MockAgent("agent2", [MockTool("tool2"), MockTool("final_answer")]),
+                ],
+                "test_agent",
+                does_not_raise(),
+            ),
+        ],
+    )
+    def test_validate_tools_and_managed_agents(self, tools, managed_agents, name, expectation):
+        fake_model = MagicMock()
+        with expectation:
+            DummyMultiStepAgent(
+                tools=tools,
+                model=fake_model,
+                name=name,
+                managed_agents=managed_agents,
+            )
+
+    def test_from_dict(self):
+        # Create a test agent dictionary
+        agent_dict = {
+            "model": {"class": "TransformersModel", "data": {"model_id": "test/model"}},
+            "tools": [
+                {
+                    "name": "valid_tool_function",
+                    "code": 'from smolagents import Tool\nfrom typing import Any, Optional\n\nclass SimpleTool(Tool):\n    name = "valid_tool_function"\n    description = "A valid tool function."\n    inputs = {"input":{"type":"string","description":"Input string."}}\n    output_type = "string"\n\n    def forward(self, input: str) -> str:\n        """A valid tool function.\n\n        Args:\n            input (str): Input string.\n        """\n        return input.upper()',
+                    "requirements": {"smolagents"},
+                }
+            ],
+            "managed_agents": {},
+            "prompt_templates": EMPTY_PROMPT_TEMPLATES,
+            "max_steps": 15,
+            "verbosity_level": 2,
+            "grammar": {"test": "grammar"},
+            "planning_interval": 3,
+            "name": "test_agent",
+            "description": "Test agent description",
+        }
+
+        # Call from_dict
+        with patch("smolagents.models.TransformersModel") as mock_model_class:
+            mock_model_instance = mock_model_class.from_dict.return_value
+            agent = DummyMultiStepAgent.from_dict(agent_dict)
+
+        # Verify the agent was created correctly
+        assert agent.model == mock_model_instance
+        assert mock_model_class.from_dict.call_args.args[0] == {"model_id": "test/model"}
+        assert agent.max_steps == 15
+        assert agent.logger.level == 2
+        assert agent.grammar == {"test": "grammar"}
+        assert agent.planning_interval == 3
+        assert agent.name == "test_agent"
+        assert agent.description == "Test agent description"
+        # Verify the tool was created correctly
+        assert sorted(agent.tools.keys()) == ["final_answer", "valid_tool_function"]
+        assert agent.tools["valid_tool_function"].name == "valid_tool_function"
+        assert agent.tools["valid_tool_function"].description == "A valid tool function."
+        assert agent.tools["valid_tool_function"].inputs == {
+            "input": {"type": "string", "description": "Input string."}
+        }
+        assert agent.tools["valid_tool_function"].output_type == "string"
+        assert agent.tools["valid_tool_function"]("test") == "TEST"
+
+        # Test overriding with kwargs
+        with patch("smolagents.models.TransformersModel") as mock_model_class:
+            agent = DummyMultiStepAgent.from_dict(agent_dict, max_steps=30)
+        assert agent.max_steps == 30
+
+
+class TestToolCallingAgent(unittest.TestCase):
+    @patch("huggingface_hub.InferenceClient")
+    def test_toolcalling_agent_api(self, mock_inference_client):
+        mock_client = mock_inference_client.return_value
+        mock_response = mock_client.chat_completion.return_value
+        mock_response.choices[0].message = ChatCompletionOutputMessage(
+            role="assistant", content='{"name": "weather_api", "arguments": {"location": "Paris", "date": "today"}}'
+        )
+        mock_response.usage.prompt_tokens = 10
+        mock_response.usage.completion_tokens = 20
+
+        model = InferenceClientModel(model_id="test-model")
+
+        from smolagents import tool
+
+        @tool
+        def weather_api(location: str, date: str) -> str:
+            """
+            Gets the weather in the next days at given location.
+            Args:
+                location: the location
+                date: the date
+            """
+            return f"The weather in {location} on date:{date} is sunny."
+
+        agent = ToolCallingAgent(model=model, tools=[weather_api], max_steps=1)
+        agent.run("What's the weather in Paris?")
+        assert agent.memory.steps[0].task == "What's the weather in Paris?"
+        assert agent.memory.steps[1].tool_calls[0].name == "weather_api"
+        assert agent.memory.steps[1].tool_calls[0].arguments == {"location": "Paris", "date": "today"}
+        assert agent.memory.steps[1].observations == "The weather in Paris on date:today is sunny."
+
+        mock_response.choices[0].message = ChatCompletionOutputMessage(
+            role="assistant",
+            content=None,
+            tool_calls=[
+                ChatCompletionOutputToolCall(
+                    function=ChatCompletionOutputFunctionDefinition(
+                        name="weather_api", arguments='{"location": "Paris", "date": "today"}'
+                    ),
+                    id="call_0",
+                    type="function",
+                )
+            ],
+        )
+
+        agent.run("What's the weather in Paris?")
+        assert agent.memory.steps[0].task == "What's the weather in Paris?"
+        assert agent.memory.steps[1].tool_calls[0].name == "weather_api"
+        assert agent.memory.steps[1].tool_calls[0].arguments == {"location": "Paris", "date": "today"}
+        assert agent.memory.steps[1].observations == "The weather in Paris on date:today is sunny."
+
+    @patch("huggingface_hub.InferenceClient")
+    def test_toolcalling_agent_api_misformatted_output(self, mock_inference_client):
+        """Test that even misformatted json blobs don't interrupt the run for a ToolCallingAgent."""
+        mock_client = mock_inference_client.return_value
+        mock_response = mock_client.chat_completion.return_value
+        mock_response.choices[0].message = ChatCompletionOutputMessage(
+            role="assistant", content='{"name": weather_api", "arguments": {"location": "Paris", "date": "today"}}'
+        )
+
+        mock_response.usage.prompt_tokens = 10
+        mock_response.usage.completion_tokens = 20
+
+        model = InferenceClientModel(model_id="test-model")
+
+        logger = AgentLogger(console=Console(markup=False, no_color=True))
+
+        agent = ToolCallingAgent(model=model, tools=[], max_steps=2, verbosity_level=1, logger=logger)
+        with agent.logger.console.capture() as capture:
+            agent.run("What's the weather in Paris?")
+        assert agent.memory.steps[0].task == "What's the weather in Paris?"
+        assert agent.memory.steps[1].tool_calls is None
+        assert "The JSON blob you used is invalid" in agent.memory.steps[1].error.message
+        assert "Error while parsing" in capture.get()
+        assert len(agent.memory.steps) == 4
+
 
 class TestCodeAgent:
     @pytest.mark.parametrize("provide_run_summary", [False, True])
@@ -780,10 +1059,188 @@ def test_call_with_provide_run_summary(self, provide_run_summary):
             )
         assert result == expected_summary
 
+    def test_errors_logging(self):
+        class FakeCodeModel(Model):
+            def generate(self, messages, stop_sequences=None, grammar=None):
+                return ChatMessage(role="assistant", content="Code:\n```py\nsecret=3;['1', '2'][secret]\n```")
+
+        agent = CodeAgent(tools=[], model=FakeCodeModel(), verbosity_level=1)
+
+        with agent.logger.console.capture() as capture:
+            agent.run("Test request")
+        assert "secret\\\\" in repr(capture.get())
+
+    def test_missing_import_triggers_advice_in_error_log(self):
+        # Set explicit verbosity level to 1 to override the default verbosity level of -1 set in CI fixture
+        agent = CodeAgent(tools=[], model=FakeCodeModelImport(), verbosity_level=1)
+
+        with agent.logger.console.capture() as capture:
+            agent.run("Count to 3")
+        str_output = capture.get()
+        assert "`additional_authorized_imports`" in str_output.replace("\n", "")
 
-class MultiAgentsTests(unittest.TestCase):
-    def test_multiagents_save(self):
-        model = HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct", max_tokens=2096, temperature=0.5)
+    def test_errors_show_offending_line_and_error(self):
+        agent = CodeAgent(tools=[PythonInterpreterTool()], model=FakeCodeModelError())
+        output = agent.run("What is 2 multiplied by 3.6452?")
+        assert isinstance(output, AgentText)
+        assert output == "got an error"
+        assert "Code execution failed at line 'error_function()'" in str(agent.memory.steps[1].error)
+        assert "ValueError" in str(agent.memory.steps)
+
+    def test_error_saves_previous_print_outputs(self):
+        agent = CodeAgent(tools=[PythonInterpreterTool()], model=FakeCodeModelError(), verbosity_level=10)
+        agent.run("What is 2 multiplied by 3.6452?")
+        assert "Flag!" in str(agent.memory.steps[1].observations)
+
+    def test_syntax_error_show_offending_lines(self):
+        agent = CodeAgent(tools=[PythonInterpreterTool()], model=FakeCodeModelSyntaxError())
+        output = agent.run("What is 2 multiplied by 3.6452?")
+        assert isinstance(output, AgentText)
+        assert output == "got an error"
+        assert '    print("Failing due to unexpected indent")' in str(agent.memory.steps)
+
+    def test_end_code_appending(self):
+        # Checking original output message
+        orig_output = FakeCodeModelNoReturn().generate([])
+        assert not orig_output.content.endswith("<end_code>")
+
+        # Checking the step output
+        agent = CodeAgent(
+            tools=[PythonInterpreterTool()],
+            model=FakeCodeModelNoReturn(),
+            max_steps=1,
+        )
+        answer = agent.run("What is 2 multiplied by 3.6452?")
+        assert answer
+
+        memory_steps = agent.memory.steps
+        actions_steps = [s for s in memory_steps if isinstance(s, ActionStep)]
+
+        outputs = [s.model_output for s in actions_steps if s.model_output]
+        assert outputs
+        assert all(o.endswith("<end_code>") for o in outputs)
+
+        messages = [s.model_output_message for s in actions_steps if s.model_output_message]
+        assert messages
+        assert all(m.content.endswith("<end_code>") for m in messages)
+
+    def test_change_tools_after_init(self):
+        from smolagents import tool
+
+        @tool
+        def fake_tool_1() -> str:
+            """Fake tool"""
+            return "1"
+
+        @tool
+        def fake_tool_2() -> str:
+            """Fake tool"""
+            return "2"
+
+        class FakeCodeModel(Model):
+            def generate(self, messages, stop_sequences=None, grammar=None):
+                return ChatMessage(role="assistant", content="Code:\n```py\nfinal_answer(fake_tool_1())\n```")
+
+        agent = CodeAgent(tools=[fake_tool_1], model=FakeCodeModel())
+
+        agent.tools["final_answer"] = CustomFinalAnswerTool()
+        agent.tools["fake_tool_1"] = fake_tool_2
+
+        answer = agent.run("Fake task.")
+        assert answer == "2CUSTOM"
+
+    @pytest.mark.parametrize("agent_dict_version", ["v1.9", "v1.10"])
+    def test_from_folder(self, agent_dict_version, get_agent_dict):
+        agent_dict = get_agent_dict(agent_dict_version)
+        with (
+            patch("smolagents.agents.Path") as mock_path,
+            patch("smolagents.models.InferenceClientModel") as mock_model,
+        ):
+            import json
+
+            mock_path.return_value.__truediv__.return_value.read_text.return_value = json.dumps(agent_dict)
+            mock_model.from_dict.return_value.model_id = "Qwen/Qwen2.5-Coder-32B-Instruct"
+            agent = CodeAgent.from_folder("ignored_dummy_folder")
+        assert isinstance(agent, CodeAgent)
+        assert agent.name == "test_agent"
+        assert agent.description == "dummy description"
+        assert agent.max_steps == 10
+        assert agent.planning_interval == 2
+        assert agent.grammar is None
+        assert agent.additional_authorized_imports == ["pandas"]
+        assert "pandas" in agent.authorized_imports
+        assert agent.executor_type == "local"
+        assert agent.executor_kwargs == {}
+        assert agent.max_print_outputs_length is None
+        assert agent.managed_agents == {}
+        assert set(agent.tools.keys()) == {"final_answer"}
+        assert agent.model == mock_model.from_dict.return_value
+        assert mock_model.from_dict.call_args.args[0]["model_id"] == "Qwen/Qwen2.5-Coder-32B-Instruct"
+        assert agent.model.model_id == "Qwen/Qwen2.5-Coder-32B-Instruct"
+        assert agent.logger.level == 2
+        assert agent.prompt_templates["system_prompt"] == "dummy system prompt"
+
+    def test_from_dict(self):
+        # Create a test agent dictionary
+        agent_dict = {
+            "model": {"class": "InferenceClientModel", "data": {"model_id": "Qwen/Qwen2.5-Coder-32B-Instruct"}},
+            "tools": [
+                {
+                    "name": "valid_tool_function",
+                    "code": 'from smolagents import Tool\nfrom typing import Any, Optional\n\nclass SimpleTool(Tool):\n    name = "valid_tool_function"\n    description = "A valid tool function."\n    inputs = {"input":{"type":"string","description":"Input string."}}\n    output_type = "string"\n\n    def forward(self, input: str) -> str:\n        """A valid tool function.\n\n        Args:\n            input (str): Input string.\n        """\n        return input.upper()',
+                    "requirements": {"smolagents"},
+                }
+            ],
+            "managed_agents": {},
+            "prompt_templates": EMPTY_PROMPT_TEMPLATES,
+            "max_steps": 15,
+            "verbosity_level": 2,
+            "grammar": None,
+            "planning_interval": 3,
+            "name": "test_code_agent",
+            "description": "Test code agent description",
+            "authorized_imports": ["pandas", "numpy"],
+            "executor_type": "local",
+            "executor_kwargs": {"max_workers": 2},
+            "max_print_outputs_length": 1000,
+        }
+
+        # Call from_dict
+        with patch("smolagents.models.InferenceClientModel") as mock_model_class:
+            mock_model_instance = mock_model_class.from_dict.return_value
+            agent = CodeAgent.from_dict(agent_dict)
+
+        # Verify the agent was created correctly with CodeAgent-specific parameters
+        assert agent.model == mock_model_instance
+        assert agent.additional_authorized_imports == ["pandas", "numpy"]
+        assert agent.executor_type == "local"
+        assert agent.executor_kwargs == {"max_workers": 2}
+        assert agent.max_print_outputs_length == 1000
+
+        # Test with missing optional parameters
+        minimal_agent_dict = {
+            "model": {"class": "InferenceClientModel", "data": {"model_id": "Qwen/Qwen2.5-Coder-32B-Instruct"}},
+            "tools": [],
+            "managed_agents": {},
+        }
+
+        with patch("smolagents.models.InferenceClientModel"):
+            agent = CodeAgent.from_dict(minimal_agent_dict)
+        # Verify defaults are used
+        assert agent.max_steps == 20  # default from MultiStepAgent.__init__
+
+        # Test overriding with kwargs
+        with patch("smolagents.models.InferenceClientModel"):
+            agent = CodeAgent.from_dict(
+                agent_dict, additional_authorized_imports=["matplotlib"], executor_kwargs={"max_workers": 4}
+            )
+        assert agent.additional_authorized_imports == ["matplotlib"]
+        assert agent.executor_kwargs == {"max_workers": 4}
+
+
+class TestMultiAgents:
+    def test_multiagents_save(self, tmp_path):
+        model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct", max_tokens=2096, temperature=0.5)
 
         web_agent = ToolCallingAgent(
             model=model,
@@ -799,8 +1256,10 @@ def test_multiagents_save(self):
             additional_authorized_imports=["pandas", "datetime"],
             managed_agents=[web_agent, code_agent],
             max_print_outputs_length=1000,
+            executor_type="local",
+            executor_kwargs={"max_workers": 2},
         )
-        agent.save("agent_export")
+        agent.save(tmp_path)
 
         expected_structure = {
             "managed_agents": {
@@ -829,24 +1288,25 @@ def verify_structure(current_path: Path, structure: dict):
                         assert file_path.exists(), f"File {file_path} does not exist"
                         assert file_path.is_file(), f"{file_path} is not a file"
 
-        verify_structure(Path("agent_export"), expected_structure)
+        verify_structure(tmp_path, expected_structure)
 
         # Test that re-loaded agents work as expected.
-        agent2 = CodeAgent.from_folder("agent_export", planning_interval=5)
+        agent2 = CodeAgent.from_folder(tmp_path, planning_interval=5)
         assert agent2.planning_interval == 5  # Check that kwargs are used
         assert set(agent2.authorized_imports) == set(["pandas", "datetime"] + BASE_BUILTIN_MODULES)
         assert agent2.max_print_outputs_length == 1000
-        assert agent2.use_e2b_executor is False
+        assert agent2.executor_type == "local"
+        assert agent2.executor_kwargs == {"max_workers": 2}
         assert (
             agent2.managed_agents["web_agent"].tools["web_search"].max_results == 10
         )  # For now tool init parameters are forgotten
         assert agent2.model.kwargs["temperature"] == pytest.approx(0.5)
 
     def test_multiagents(self):
-        class FakeModelMultiagentsManagerAgent:
+        class FakeModelMultiagentsManagerAgent(Model):
             model_id = "fake_model"
 
-            def __call__(
+            def generate(
                 self,
                 messages,
                 stop_sequences=None,
@@ -911,10 +1371,10 @@ def __call__(
 
         manager_model = FakeModelMultiagentsManagerAgent()
 
-        class FakeModelMultiagentsManagedAgent:
+        class FakeModelMultiagentsManagedAgent(Model):
             model_id = "fake_model"
 
-            def __call__(
+            def generate(
                 self,
                 messages,
                 tools_to_call_from=None,
@@ -923,7 +1383,7 @@ def __call__(
             ):
                 return ChatMessage(
                     role="assistant",
-                    content="",
+                    content="Here is the secret content: FLAG1",
                     tool_calls=[
                         ChatMessageToolCall(
                             id="call_0",
@@ -944,6 +1404,7 @@ def __call__(
             max_steps=10,
             name="search_agent",
             description="Runs web searches for you. Give it your request as an argument. Make the request as detailed as needed, you can ask for thorough reports",
+            verbosity_level=2,
         )
 
         manager_code_agent = CodeAgent(
@@ -962,11 +1423,15 @@ def __call__(
             managed_agents=[web_agent],
         )
 
-        report = manager_toolcalling_agent.run("Fake question.")
+        with web_agent.logger.console.capture() as capture:
+            report = manager_toolcalling_agent.run("Fake question.")
         assert report == "Final report."
+        assert "FLAG1" in capture.get()  # Check that managed agent's output is properly logged
 
         # Test that visualization works
-        manager_code_agent.visualize()
+        with manager_toolcalling_agent.logger.console.capture() as capture:
+            manager_toolcalling_agent.visualize()
+        assert "├──" in capture.get()
 
 
 @pytest.fixture
@@ -974,4 +1439,55 @@ def prompt_templates():
     return {
         "system_prompt": "This is a test system prompt.",
         "managed_agent": {"task": "Task for {{name}}: {{task}}", "report": "Report for {{name}}: {{final_answer}}"},
+        "planning": {
+            "initial_plan": "The plan.",
+            "update_plan_pre_messages": "custom",
+            "update_plan_post_messages": "custom",
+        },
+        "final_answer": {"pre_messages": "custom", "post_messages": "custom"},
     }
+
+
+@pytest.mark.parametrize(
+    "arguments",
+    [
+        {},
+        {"arg": "bar"},
+        {None: None},
+        [1, 2, 3],
+    ],
+)
+def test_tool_calling_agents_raises_tool_call_error_being_invoked_with_wrong_arguments(arguments):
+    @tool
+    def _sample_tool(prompt: str) -> str:
+        """Tool that returns same string
+
+        Args:
+            prompt: The string to return
+        Returns:
+            The same string
+        """
+
+        return prompt
+
+    agent = ToolCallingAgent(model=FakeToolCallModel(), tools=[_sample_tool])
+    with pytest.raises(AgentToolCallError):
+        agent.execute_tool_call(_sample_tool.name, arguments)
+
+
+def test_tool_calling_agents_raises_agent_execution_error_when_tool_raises():
+    @tool
+    def _sample_tool(_: str) -> float:
+        """Tool that fails
+
+        Args:
+            _: The pointless string
+        Returns:
+            Some number
+        """
+
+        return 1 / 0
+
+    agent = ToolCallingAgent(model=FakeToolCallModel(), tools=[_sample_tool])
+    with pytest.raises(AgentExecutionError):
+        agent.execute_tool_call(_sample_tool.name, "sample")
diff --git a/tests/test_all_docs.py b/tests/test_all_docs.py
index 0786e9138..0784af413 100644
--- a/tests/test_all_docs.py
+++ b/tests/test_all_docs.py
@@ -21,7 +21,6 @@
 import tempfile
 import traceback
 from pathlib import Path
-from typing import List
 
 import pytest
 from dotenv import load_dotenv
@@ -33,7 +32,7 @@ class SubprocessCallException(Exception):
     pass
 
 
-def run_command(command: List[str], return_stdout=False, env=None):
+def run_command(command: list[str], return_stdout=False, env=None):
     """
     Runs command with subprocess.check_output and returns stdout if requested.
     Properly captures and handles errors during command execution.
@@ -61,14 +60,14 @@ class DocCodeExtractor:
     """Handles extraction and validation of Python code from markdown files."""
 
     @staticmethod
-    def extract_python_code(content: str) -> List[str]:
+    def extract_python_code(content: str) -> list[str]:
         """Extract Python code blocks from markdown content."""
         pattern = r"```(?:python|py)\n(.*?)\n```"
         matches = re.finditer(pattern, content, re.DOTALL)
         return [match.group(1).strip() for match in matches]
 
     @staticmethod
-    def create_test_script(code_blocks: List[str], tmp_dir: str) -> Path:
+    def create_test_script(code_blocks: list[str], tmp_dir: str) -> Path:
         """Create a temporary Python script from code blocks."""
         combined_code = "\n\n".join(code_blocks)
         assert len(combined_code) > 0, "Code is empty!"
@@ -80,6 +79,7 @@ def create_test_script(code_blocks: List[str], tmp_dir: str) -> Path:
         return tmp_file
 
 
+# Skip: slow tests + require API keys
 @require_run_all
 class TestDocs:
     """Test case for documentation code testing."""
diff --git a/tests/test_cli.py b/tests/test_cli.py
new file mode 100644
index 000000000..bded39665
--- /dev/null
+++ b/tests/test_cli.py
@@ -0,0 +1,112 @@
+from unittest.mock import patch
+
+import pytest
+
+from smolagents.cli import load_model
+from smolagents.local_python_executor import LocalPythonExecutor
+from smolagents.models import InferenceClientModel, LiteLLMModel, OpenAIServerModel, TransformersModel
+
+
+@pytest.fixture
+def set_env_vars(monkeypatch):
+    monkeypatch.setenv("FIREWORKS_API_KEY", "test_fireworks_api_key")
+    monkeypatch.setenv("HF_TOKEN", "test_hf_api_key")
+
+
+def test_load_model_openai_server_model(set_env_vars):
+    with patch("openai.OpenAI") as MockOpenAI:
+        model = load_model("OpenAIServerModel", "test_model_id")
+    assert isinstance(model, OpenAIServerModel)
+    assert model.model_id == "test_model_id"
+    assert MockOpenAI.call_count == 1
+    assert MockOpenAI.call_args.kwargs["base_url"] == "https://api.fireworks.ai/inference/v1"
+    assert MockOpenAI.call_args.kwargs["api_key"] == "test_fireworks_api_key"
+
+
+def test_load_model_litellm_model():
+    model = load_model("LiteLLMModel", "test_model_id", api_key="test_api_key", api_base="https://api.test.com")
+    assert isinstance(model, LiteLLMModel)
+    assert model.api_key == "test_api_key"
+    assert model.api_base == "https://api.test.com"
+    assert model.model_id == "test_model_id"
+
+
+def test_load_model_transformers_model():
+    with (
+        patch(
+            "transformers.AutoModelForImageTextToText.from_pretrained",
+            side_effect=ValueError("Unrecognized configuration class"),
+        ),
+        patch("transformers.AutoModelForCausalLM.from_pretrained"),
+        patch("transformers.AutoTokenizer.from_pretrained"),
+    ):
+        model = load_model("TransformersModel", "test_model_id")
+    assert isinstance(model, TransformersModel)
+    assert model.model_id == "test_model_id"
+
+
+def test_load_model_hf_api_model(set_env_vars):
+    with patch("huggingface_hub.InferenceClient") as huggingface_hub_InferenceClient:
+        model = load_model("InferenceClientModel", "test_model_id")
+    assert isinstance(model, InferenceClientModel)
+    assert model.model_id == "test_model_id"
+    assert huggingface_hub_InferenceClient.call_count == 1
+    assert huggingface_hub_InferenceClient.call_args.kwargs["token"] == "test_hf_api_key"
+
+
+def test_load_model_invalid_model_type():
+    with pytest.raises(ValueError, match="Unsupported model type: InvalidModel"):
+        load_model("InvalidModel", "test_model_id")
+
+
+def test_cli_main(capsys):
+    with patch("smolagents.cli.load_model") as mock_load_model:
+        mock_load_model.return_value = "mock_model"
+        with patch("smolagents.cli.CodeAgent") as mock_code_agent:
+            from smolagents.cli import run_smolagent
+
+            run_smolagent("test_prompt", [], "InferenceClientModel", "test_model_id", provider="hf-inference")
+    # load_model
+    assert len(mock_load_model.call_args_list) == 1
+    assert mock_load_model.call_args.args == ("InferenceClientModel", "test_model_id")
+    assert mock_load_model.call_args.kwargs == {"api_base": None, "api_key": None, "provider": "hf-inference"}
+    # CodeAgent
+    assert len(mock_code_agent.call_args_list) == 1
+    assert mock_code_agent.call_args.args == ()
+    assert mock_code_agent.call_args.kwargs == {
+        "tools": [],
+        "model": "mock_model",
+        "additional_authorized_imports": None,
+    }
+    # agent.run
+    assert len(mock_code_agent.return_value.run.call_args_list) == 1
+    assert mock_code_agent.return_value.run.call_args.args == ("test_prompt",)
+    # print
+    captured = capsys.readouterr()
+    assert "Running agent with these tools: []" in captured.out
+
+
+def test_vision_web_browser_main():
+    with patch("smolagents.vision_web_browser.helium"):
+        with patch("smolagents.vision_web_browser.load_model") as mock_load_model:
+            mock_load_model.return_value = "mock_model"
+            with patch("smolagents.vision_web_browser.CodeAgent") as mock_code_agent:
+                from smolagents.vision_web_browser import helium_instructions, run_webagent
+
+                run_webagent("test_prompt", "InferenceClientModel", "test_model_id", provider="hf-inference")
+    # load_model
+    assert len(mock_load_model.call_args_list) == 1
+    assert mock_load_model.call_args.args == ("InferenceClientModel", "test_model_id")
+    # CodeAgent
+    assert len(mock_code_agent.call_args_list) == 1
+    assert mock_code_agent.call_args.args == ()
+    assert len(mock_code_agent.call_args.kwargs["tools"]) == 4
+    assert mock_code_agent.call_args.kwargs["model"] == "mock_model"
+    assert mock_code_agent.call_args.kwargs["additional_authorized_imports"] == ["helium"]
+    # agent.python_executor
+    assert len(mock_code_agent.return_value.python_executor.call_args_list) == 1
+    assert mock_code_agent.return_value.python_executor.call_args.args == ("from helium import *",)
+    assert LocalPythonExecutor(["helium"])("from helium import *") == (None, "", False)
+    # agent.run
+    assert len(mock_code_agent.return_value.run.call_args_list) == 1
+    assert mock_code_agent.return_value.run.call_args.args == ("test_prompt" + helium_instructions,)
diff --git a/tests/test_default_tools.py b/tests/test_default_tools.py
index 5ff436ef3..3f3fad49c 100644
--- a/tests/test_default_tools.py
+++ b/tests/test_default_tools.py
@@ -17,7 +17,13 @@
 import pytest
 
 from smolagents.agent_types import _AGENT_TYPE_MAPPING
-from smolagents.default_tools import DuckDuckGoSearchTool, PythonInterpreterTool, SpeechToTextTool, VisitWebpageTool
+from smolagents.default_tools import (
+    DuckDuckGoSearchTool,
+    PythonInterpreterTool,
+    SpeechToTextTool,
+    VisitWebpageTool,
+    WikipediaSearchTool,
+)
 
 from .test_tools import ToolTesterMixin
 
@@ -34,24 +40,24 @@ def test_ddgs_with_kwargs(self):
         assert isinstance(result, str)
 
 
-class PythonInterpreterToolTester(unittest.TestCase, ToolTesterMixin):
-    def setUp(self):
+class TestPythonInterpreterTool(ToolTesterMixin):
+    def setup_method(self):
         self.tool = PythonInterpreterTool(authorized_imports=["numpy"])
         self.tool.setup()
 
     def test_exact_match_arg(self):
         result = self.tool("(2 / 2) * 4")
-        self.assertEqual(result, "Stdout:\n\nOutput: 4.0")
+        assert result == "Stdout:\n\nOutput: 4.0"
 
     def test_exact_match_kwarg(self):
         result = self.tool(code="(2 / 2) * 4")
-        self.assertEqual(result, "Stdout:\n\nOutput: 4.0")
+        assert result == "Stdout:\n\nOutput: 4.0"
 
     def test_agent_type_output(self):
         inputs = ["2 * 2"]
         output = self.tool(*inputs, sanitize_inputs_outputs=True)
         output_type = _AGENT_TYPE_MAPPING[self.tool.output_type]
-        self.assertTrue(isinstance(output, output_type))
+        assert isinstance(output, output_type)
 
     def test_agent_types_inputs(self):
         inputs = ["2 * 2"]
@@ -67,7 +73,7 @@ def test_agent_types_inputs(self):
         # Should not raise an error
         output = self.tool(*inputs, sanitize_inputs_outputs=True)
         output_type = _AGENT_TYPE_MAPPING[self.tool.output_type]
-        self.assertTrue(isinstance(output, output_type))
+        assert isinstance(output, output_type)
 
     def test_imports_work(self):
         result = self.tool("import numpy as np")
@@ -87,3 +93,32 @@ def test_new_instance(self):
         assert tool is not None
         assert tool.pre_processor_class == WhisperProcessor
         assert tool.model_class == WhisperForConditionalGeneration
+
+
+@pytest.mark.parametrize(
+    "language, content_type, extract_format, query",
+    [
+        ("en", "summary", "HTML", "Python_(programming_language)"),  # English, Summary Mode, HTML format
+        ("en", "text", "WIKI", "Python_(programming_language)"),  # English, Full Text Mode, WIKI format
+        ("es", "summary", "HTML", "Python_(lenguaje_de_programación)"),  # Spanish, Summary Mode, HTML format
+        ("es", "text", "WIKI", "Python_(lenguaje_de_programación)"),  # Spanish, Full Text Mode, WIKI format
+    ],
+)
+def test_wikipedia_search(language, content_type, extract_format, query):
+    tool = WikipediaSearchTool(
+        user_agent="TestAgent (test@example.com)",
+        language=language,
+        content_type=content_type,
+        extract_format=extract_format,
+    )
+
+    result = tool.forward(query)
+
+    assert isinstance(result, str), "Output should be a string"
+    assert "✅ **Wikipedia Page:**" in result, "Response should contain Wikipedia page title"
+    assert "🔗 **Read more:**" in result, "Response should contain Wikipedia page URL"
+
+    if content_type == "summary":
+        assert len(result.split()) < 1000, "Summary mode should return a shorter text"
+    if content_type == "text":
+        assert len(result.split()) > 1000, "Full text mode should return a longer text"
diff --git a/tests/test_e2b_executor.py b/tests/test_e2b_executor.py
deleted file mode 100644
index 5994a44be..000000000
--- a/tests/test_e2b_executor.py
+++ /dev/null
@@ -1,18 +0,0 @@
-from unittest.mock import MagicMock, patch
-
-from smolagents.e2b_executor import E2BExecutor
-
-
-class TestE2BExecutor:
-    def test_e2b_executor_instantiation(self):
-        logger = MagicMock()
-        with patch("e2b_code_interpreter.Sandbox") as mock_sandbox:
-            mock_sandbox.return_value.commands.run.return_value.error = None
-            mock_sandbox.return_value.run_code.return_value.error = None
-            executor = E2BExecutor(additional_imports=[], tools=[], logger=logger)
-        assert isinstance(executor, E2BExecutor)
-        assert executor.logger == logger
-        assert executor.final_answer is False
-        assert executor.custom_tools == {}
-        assert executor.final_answer_pattern.pattern == r"final_answer\((.*?)\)"
-        assert executor.sbx == mock_sandbox.return_value
diff --git a/tests/test_final_answer.py b/tests/test_final_answer.py
index fcfb02a3f..b960e2fb1 100644
--- a/tests/test_final_answer.py
+++ b/tests/test_final_answer.py
@@ -13,47 +13,44 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
-from pathlib import Path
 
 import numpy as np
-from PIL import Image
-from transformers import is_torch_available
-from transformers.testing_utils import get_tests_dir, require_torch
+import PIL.Image
+import pytest
 
 from smolagents.agent_types import _AGENT_TYPE_MAPPING
 from smolagents.default_tools import FinalAnswerTool
 
 from .test_tools import ToolTesterMixin
+from .utils.markers import require_torch
 
 
-if is_torch_available():
-    import torch
-
-
-class FinalAnswerToolTester(unittest.TestCase, ToolTesterMixin):
-    def setUp(self):
+class TestFinalAnswerTool(ToolTesterMixin):
+    def setup_method(self):
         self.inputs = {"answer": "Final answer"}
         self.tool = FinalAnswerTool()
 
     def test_exact_match_arg(self):
         result = self.tool("Final answer")
-        self.assertEqual(result, "Final answer")
+        assert result == "Final answer"
 
     def test_exact_match_kwarg(self):
         result = self.tool(answer=self.inputs["answer"])
-        self.assertEqual(result, "Final answer")
-
-    def create_inputs(self):
-        inputs_text = {"answer": "Text input"}
-        inputs_image = {"answer": Image.open(Path(get_tests_dir("fixtures")) / "000000039769.png").resize((512, 512))}
-        inputs_audio = {"answer": torch.Tensor(np.ones(3000))}
-        return {"string": inputs_text, "image": inputs_image, "audio": inputs_audio}
+        assert result == "Final answer"
 
     @require_torch
-    def test_agent_type_output(self):
-        inputs = self.create_inputs()
+    def test_agent_type_output(self, inputs):
         for input_type, input in inputs.items():
             output = self.tool(**input, sanitize_inputs_outputs=True)
             agent_type = _AGENT_TYPE_MAPPING[input_type]
-            self.assertTrue(isinstance(output, agent_type))
+            assert isinstance(output, agent_type)
+
+    @pytest.fixture
+    def inputs(self, shared_datadir):
+        import torch
+
+        return {
+            "string": {"answer": "Text input"},
+            "image": {"answer": PIL.Image.open(shared_datadir / "000000039769.png").resize((512, 512))},
+            "audio": {"answer": torch.Tensor(np.ones(3000))},
+        }
diff --git a/tests/test_function_type_hints_utils.py b/tests/test_function_type_hints_utils.py
index 3379237c6..fdb55f200 100644
--- a/tests/test_function_type_hints_utils.py
+++ b/tests/test_function_type_hints_utils.py
@@ -12,17 +12,234 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import unittest
-from typing import List, Optional, Tuple
+from typing import Any
 
 import pytest
 
-from smolagents._function_type_hints_utils import get_imports, get_json_schema
+from smolagents._function_type_hints_utils import DocstringParsingException, get_imports, get_json_schema
 
 
-class TestJsonSchema(unittest.TestCase):
-    def test_get_json_schema(self):
-        def fn(x: int, y: Optional[Tuple[str, str, float]] = None) -> None:
+@pytest.fixture
+def valid_func():
+    """A well-formed function with docstring, type hints, and return block."""
+
+    def multiply(x: int, y: float) -> float:
+        """
+        Multiplies two numbers.
+
+        Args:
+            x: The first number.
+            y: The second number.
+        Returns:
+            Product of x and y.
+        """
+        return x * y
+
+    return multiply
+
+
+@pytest.fixture
+def no_docstring_func():
+    """Function with no docstring."""
+
+    def sample(x: int):
+        return x
+
+    return sample
+
+
+@pytest.fixture
+def missing_arg_doc_func():
+    """Function with docstring but missing an argument description."""
+
+    def add(x: int, y: int):
+        """
+        Adds two numbers.
+
+        Args:
+            x: The first number.
+        """
+        return x + y
+
+    return add
+
+
+@pytest.fixture
+def bad_return_func():
+    """Function docstring with missing return description (allowed)."""
+
+    def do_nothing(x: str | None = None):
+        """
+        Does nothing.
+
+        Args:
+            x: Some optional string.
+        """
+        pass
+
+    return do_nothing
+
+
+@pytest.fixture
+def complex_types_func():
+    def process_data(items: list[str], config: dict[str, float], point: tuple[int, int]) -> dict:
+        """
+        Process some data.
+
+        Args:
+            items: List of items to process.
+            config: Configuration parameters.
+            point: A position as (x,y).
+
+        Returns:
+            Processed data result.
+        """
+        return {"result": True}
+
+    return process_data
+
+
+@pytest.fixture
+def optional_types_func():
+    def process_with_optional(required_arg: str, optional_arg: int | None = None) -> str:
+        """
+        Process with optional argument.
+
+        Args:
+            required_arg: A required string argument.
+            optional_arg: An optional integer argument.
+
+        Returns:
+            Processing result.
+        """
+        return "processed"
+
+    return process_with_optional
+
+
+@pytest.fixture
+def enum_choices_func():
+    def select_color(color: str) -> str:
+        """
+        Select a color.
+
+        Args:
+            color: The color to select (choices: ["red", "green", "blue"])
+
+        Returns:
+            Selected color.
+        """
+        return color
+
+    return select_color
+
+
+@pytest.fixture
+def union_types_func():
+    def process_union(value: int | str) -> bool | str:
+        """
+        Process a value that can be either int or string.
+
+        Args:
+            value: An integer or string value.
+
+        Returns:
+            Processing result.
+        """
+        return True if isinstance(value, int) else "string result"
+
+    return process_union
+
+
+@pytest.fixture
+def nested_types_func():
+    def process_nested_data(data: list[dict[str, Any]]) -> list[str]:
+        """
+        Process nested data structure.
+
+        Args:
+            data: List of dictionaries to process.
+
+        Returns:
+            List of processed results.
+        """
+        return ["result"]
+
+    return process_nested_data
+
+
+@pytest.fixture
+def typed_docstring_func():
+    def calculate(x: int, y: float) -> float:
+        """
+        Calculate something.
+
+        Args:
+            x (int): An integer parameter with type in docstring.
+            y (float): A float parameter with type in docstring.
+
+        Returns:
+            float: The calculated result.
+        """
+        return x * y
+
+    return calculate
+
+
+@pytest.fixture
+def mismatched_types_func():
+    def convert(value: int) -> str:
+        """
+        Convert a value.
+
+        Args:
+            value (str): A string value (type mismatch with hint).
+
+        Returns:
+            int: Converted value (type mismatch with hint).
+        """
+        return str(value)
+
+    return convert
+
+
+@pytest.fixture
+def complex_docstring_types_func():
+    def process(data: dict[str, list[int]]) -> list[dict[str, Any]]:
+        """
+        Process complex data.
+
+        Args:
+            data (Dict[str, List[int]]): Nested structure with types.
+
+        Returns:
+            List[Dict[str, Any]]: Processed results with types.
+        """
+        return [{"result": sum(v) for k, v in data.items()}]
+
+    return process
+
+
+@pytest.fixture
+def keywords_in_description_func():
+    def process(value: str) -> str:
+        """
+        Function with Args: or Returns: keywords in its description.
+
+        Args:
+            value: A string value.
+
+        Returns:
+            str: Processed value.
+        """
+        return value.upper()
+
+    return process
+
+
+class TestGetJsonSchema:
+    def test_get_json_schema_example(self):
+        def fn(x: int, y: tuple[str, str, float] | None = None) -> None:
             """
             Test function
             Args:
@@ -50,10 +267,189 @@ def fn(x: int, y: Optional[Tuple[str, str, float]] = None) -> None:
             },
             "return": {"type": "null"},
         }
-        self.assertEqual(
-            schema["function"]["parameters"]["properties"]["y"], expected_schema["parameters"]["properties"]["y"]
+        assert schema["function"]["parameters"]["properties"]["y"] == expected_schema["parameters"]["properties"]["y"]
+        assert schema["function"] == expected_schema
+
+    @pytest.mark.parametrize(
+        "fixture_name,should_fail",
+        [
+            ("valid_func", False),
+            # ('no_docstring_func', True),
+            # ('missing_arg_doc_func', True),
+            ("bad_return_func", False),
+        ],
+    )
+    def test_get_json_schema(self, request, fixture_name, should_fail):
+        func = request.getfixturevalue(fixture_name)
+        schema = get_json_schema(func)
+        assert schema["type"] == "function"
+        assert "function" in schema
+        assert "parameters" in schema["function"]
+
+    @pytest.mark.parametrize(
+        "fixture_name,should_fail",
+        [
+            # ('valid_func', False),
+            ("no_docstring_func", True),
+            ("missing_arg_doc_func", True),
+            # ('bad_return_func', False),
+        ],
+    )
+    def test_get_json_schema_raises(self, request, fixture_name, should_fail):
+        func = request.getfixturevalue(fixture_name)
+        with pytest.raises(DocstringParsingException):
+            get_json_schema(func)
+
+    @pytest.mark.parametrize(
+        "fixture_name,expected_properties",
+        [
+            ("valid_func", {"x": "integer", "y": "number"}),
+            ("bad_return_func", {"x": "string"}),
+        ],
+    )
+    def test_property_types(self, request, fixture_name, expected_properties):
+        """Test that property types are correctly mapped."""
+        func = request.getfixturevalue(fixture_name)
+        schema = get_json_schema(func)
+
+        properties = schema["function"]["parameters"]["properties"]
+        for prop_name, expected_type in expected_properties.items():
+            assert properties[prop_name]["type"] == expected_type
+
+    def test_schema_basic_structure(self, valid_func):
+        """Test that basic schema structure is correct."""
+        schema = get_json_schema(valid_func)
+        # Check schema type
+        assert schema["type"] == "function"
+        assert "function" in schema
+        # Check function schema
+        function_schema = schema["function"]
+        assert function_schema["name"] == "multiply"
+        assert "description" in function_schema
+        assert function_schema["description"] == "Multiplies two numbers."
+        # Check parameters schema
+        assert "parameters" in function_schema
+        params = function_schema["parameters"]
+        assert params["type"] == "object"
+        assert "properties" in params
+        assert "required" in params
+        assert set(params["required"]) == {"x", "y"}
+        properties = params["properties"]
+        assert properties["x"]["type"] == "integer"
+        assert properties["y"]["type"] == "number"
+        # Check return schema
+        assert "return" in function_schema
+        return_schema = function_schema["return"]
+        assert return_schema["type"] == "number"
+        assert return_schema["description"] == "Product of x and y."
+
+    def test_complex_types(self, complex_types_func):
+        """Test schema generation for complex types."""
+        schema = get_json_schema(complex_types_func)
+        properties = schema["function"]["parameters"]["properties"]
+        # Check list type
+        assert properties["items"]["type"] == "array"
+        # Check dict type
+        assert properties["config"]["type"] == "object"
+        # Check tuple type
+        assert properties["point"]["type"] == "array"
+        assert len(properties["point"]["prefixItems"]) == 2
+        assert properties["point"]["prefixItems"][0]["type"] == "integer"
+        assert properties["point"]["prefixItems"][1]["type"] == "integer"
+
+    def test_optional_types(self, optional_types_func):
+        """Test schema generation for optional arguments."""
+        schema = get_json_schema(optional_types_func)
+        params = schema["function"]["parameters"]
+        # Required argument should be in required list
+        assert "required_arg" in params["required"]
+        # Optional argument should not be in required list
+        assert "optional_arg" not in params["required"]
+        # Optional argument should be nullable
+        assert params["properties"]["optional_arg"]["nullable"] is True
+        assert params["properties"]["optional_arg"]["type"] == "integer"
+
+    def test_enum_choices(self, enum_choices_func):
+        """Test schema generation for enum choices in docstring."""
+        schema = get_json_schema(enum_choices_func)
+        color_prop = schema["function"]["parameters"]["properties"]["color"]
+        assert "enum" in color_prop
+        assert color_prop["enum"] == ["red", "green", "blue"]
+
+    def test_union_types(self, union_types_func):
+        """Test schema generation for union types."""
+        schema = get_json_schema(union_types_func)
+        value_prop = schema["function"]["parameters"]["properties"]["value"]
+        return_prop = schema["function"]["return"]
+        # Check union in parameter
+        assert len(value_prop["type"]) == 2
+        # Check union in return type
+        assert len(return_prop["type"]) == 2
+
+    def test_nested_types(self, nested_types_func):
+        """Test schema generation for nested complex types."""
+        schema = get_json_schema(nested_types_func)
+        data_prop = schema["function"]["parameters"]["properties"]["data"]
+        assert data_prop["type"] == "array"
+
+    def test_typed_docstring_parsing(self, typed_docstring_func):
+        """Test parsing of docstrings with type annotations."""
+        schema = get_json_schema(typed_docstring_func)
+        # Type hints should take precedence over docstring types
+        assert schema["function"]["parameters"]["properties"]["x"]["type"] == "integer"
+        assert schema["function"]["parameters"]["properties"]["y"]["type"] == "number"
+        # Description should be extracted correctly
+        assert (
+            schema["function"]["parameters"]["properties"]["x"]["description"]
+            == "An integer parameter with type in docstring."
         )
-        self.assertEqual(schema["function"], expected_schema)
+        assert (
+            schema["function"]["parameters"]["properties"]["y"]["description"]
+            == "A float parameter with type in docstring."
+        )
+        # Return type and description should be correct
+        assert schema["function"]["return"]["type"] == "number"
+        assert schema["function"]["return"]["description"] == "The calculated result."
+
+    def test_mismatched_docstring_types(self, mismatched_types_func):
+        """Test that type hints take precedence over docstring types when they conflict."""
+        schema = get_json_schema(mismatched_types_func)
+        # Type hints should take precedence over docstring types
+        assert schema["function"]["parameters"]["properties"]["value"]["type"] == "integer"
+        # Return type from type hint should be used, not docstring
+        assert schema["function"]["return"]["type"] == "string"
+
+    def test_complex_docstring_types(self, complex_docstring_types_func):
+        """Test parsing of complex type annotations in docstrings."""
+        schema = get_json_schema(complex_docstring_types_func)
+        # Check that complex nested type is parsed correctly from type hints
+        data_prop = schema["function"]["parameters"]["properties"]["data"]
+        assert data_prop["type"] == "object"
+        # Check return type
+        return_prop = schema["function"]["return"]
+        assert return_prop["type"] == "array"
+        # Description should include the type information from docstring
+        assert data_prop["description"] == "Nested structure with types."
+        assert return_prop["description"] == "Processed results with types."
+
+    @pytest.mark.parametrize(
+        "fixture_name,expected_description",
+        [
+            ("typed_docstring_func", "An integer parameter with type in docstring."),
+            ("complex_docstring_types_func", "Nested structure with types."),
+        ],
+    )
+    def test_type_in_description_handling(self, request, fixture_name, expected_description):
+        """Test that type information in docstrings is preserved in description."""
+        func = request.getfixturevalue(fixture_name)
+        schema = get_json_schema(func)
+        # First parameter description should contain the expected text
+        first_param_name = list(schema["function"]["parameters"]["properties"].keys())[0]
+        assert schema["function"]["parameters"]["properties"][first_param_name]["description"] == expected_description
+
+    def test_with_special_words_in_description_func(self, keywords_in_description_func):
+        schema = get_json_schema(keywords_in_description_func)
+        assert schema["function"]["description"] == "Function with Args: or Returns: keywords in its description."
 
 
 class TestGetCode:
@@ -114,5 +510,5 @@ class TestGetCode:
             ),
         ],
     )
-    def test_get_imports(self, code: str, expected: List[str]):
+    def test_get_imports(self, code: str, expected: list[str]):
         assert sorted(get_imports(code)) == sorted(expected)
diff --git a/tests/test_import.py b/tests/test_import.py
index aaa284d39..c977de8d2 100644
--- a/tests/test_import.py
+++ b/tests/test_import.py
@@ -1,11 +1,27 @@
+import os
 import subprocess
+import tempfile
 
 
-def test_import_smolagents_without_extras():
-    # Run the import statement in an isolated virtual environment
-    result = subprocess.run(
-        ["uv", "run", "--isolated", "--no-editable", "-"], input="import smolagents", text=True, capture_output=True
-    )
+def test_import_smolagents_without_extras(monkeypatch):
+    monkeypatch.delenv("VIRTUAL_ENV", raising=False)
+    with tempfile.TemporaryDirectory() as temp_dir:
+        # Create a virtual environment
+        venv_dir = os.path.join(temp_dir, "venv")
+        subprocess.run(["uv", "venv", venv_dir], check=True)
+
+        # Install smolagents in the virtual environment
+        subprocess.run(
+            ["uv", "pip", "install", "--python", os.path.join(venv_dir, "bin", "python"), "smolagents @ ."], check=True
+        )
+
+        # Run the import test in the virtual environment
+        result = subprocess.run(
+            [os.path.join(venv_dir, "bin", "python"), "-c", "import smolagents"],
+            capture_output=True,
+            text=True,
+        )
+
     # Check if the import was successful
     assert result.returncode == 0, (
         "Import failed with error: "
diff --git a/tests/test_local_python_executor.py b/tests/test_local_python_executor.py
index 29e1ec94c..f7d43c282 100644
--- a/tests/test_local_python_executor.py
+++ b/tests/test_local_python_executor.py
@@ -16,20 +16,27 @@
 import ast
 import types
 import unittest
+from contextlib import nullcontext as does_not_raise
 from textwrap import dedent
+from unittest.mock import patch
 
 import numpy as np
 import pandas as pd
 import pytest
 
-from smolagents.default_tools import BASE_PYTHON_TOOLS
+from smolagents.default_tools import BASE_PYTHON_TOOLS, FinalAnswerTool
 from smolagents.local_python_executor import (
+    DANGEROUS_FUNCTIONS,
+    DANGEROUS_MODULES,
     InterpreterError,
+    LocalPythonExecutor,
     PrintContainer,
-    check_module_authorized,
+    check_import_authorized,
+    evaluate_boolop,
     evaluate_condition,
     evaluate_delete,
     evaluate_python_code,
+    evaluate_subscript,
     fix_final_answer_code,
     get_safe_module,
 )
@@ -52,14 +59,14 @@ def test_evaluate_assign(self):
         state = {}
         result, _ = evaluate_python_code(code, {}, state=state)
         assert result == 3
-        self.assertDictEqualNoPrint(state, {"x": 3, "_operations_count": 2})
+        self.assertDictEqualNoPrint(state, {"x": 3, "_operations_count": {"counter": 2}})
 
         code = "x = y"
         state = {"y": 5}
         result, _ = evaluate_python_code(code, {}, state=state)
         # evaluate returns the value of the last assignment.
         assert result == 5
-        self.assertDictEqualNoPrint(state, {"x": 5, "y": 5, "_operations_count": 2})
+        self.assertDictEqualNoPrint(state, {"x": 5, "y": 5, "_operations_count": {"counter": 2}})
 
         code = "a=1;b=None"
         result, _ = evaluate_python_code(code, {}, state={})
@@ -85,26 +92,46 @@ def test_evaluate_call(self):
         state = {"x": 3}
         result, _ = evaluate_python_code(code, {"add_two": add_two}, state=state)
         assert result == 5
-        self.assertDictEqualNoPrint(state, {"x": 3, "y": 5, "_operations_count": 3})
+        self.assertDictEqualNoPrint(state, {"x": 3, "y": 5, "_operations_count": {"counter": 3}})
 
         # Should not work without the tool
-        with pytest.raises(InterpreterError) as e:
+        with pytest.raises(InterpreterError, match="Forbidden function evaluation: 'add_two'"):
             evaluate_python_code(code, {}, state=state)
-        assert "tried to execute add_two" in str(e.value)
+
+    def test_evaluate_class_def(self):
+        code = dedent('''\
+            class MyClass:
+                """A class with a value."""
+
+                def __init__(self, value):
+                    self.value = value
+
+                def get_value(self):
+                    return self.value
+
+            instance = MyClass(42)
+            result = instance.get_value()
+        ''')
+        state = {}
+        result, _ = evaluate_python_code(code, {}, state=state)
+        assert result == 42
+        assert state["instance"].__doc__ == "A class with a value."
 
     def test_evaluate_constant(self):
         code = "x = 3"
         state = {}
         result, _ = evaluate_python_code(code, {}, state=state)
         assert result == 3
-        self.assertDictEqualNoPrint(state, {"x": 3, "_operations_count": 2})
+        self.assertDictEqualNoPrint(state, {"x": 3, "_operations_count": {"counter": 2}})
 
     def test_evaluate_dict(self):
         code = "test_dict = {'x': x, 'y': add_two(x)}"
         state = {"x": 3}
         result, _ = evaluate_python_code(code, {"add_two": add_two}, state=state)
         self.assertDictEqual(result, {"x": 3, "y": 5})
-        self.assertDictEqualNoPrint(state, {"x": 3, "test_dict": {"x": 3, "y": 5}, "_operations_count": 7})
+        self.assertDictEqualNoPrint(
+            state, {"x": 3, "test_dict": {"x": 3, "y": 5}, "_operations_count": {"counter": 7}}
+        )
 
     def test_evaluate_expression(self):
         code = "x = 3\ny = 5"
@@ -112,7 +139,7 @@ def test_evaluate_expression(self):
         result, _ = evaluate_python_code(code, {}, state=state)
         # evaluate returns the value of the last assignment.
         assert result == 5
-        self.assertDictEqualNoPrint(state, {"x": 3, "y": 5, "_operations_count": 4})
+        self.assertDictEqualNoPrint(state, {"x": 3, "y": 5, "_operations_count": {"counter": 4}})
 
     def test_evaluate_f_string(self):
         code = "text = f'This is x: {x}.'"
@@ -120,14 +147,16 @@ def test_evaluate_f_string(self):
         result, _ = evaluate_python_code(code, {}, state=state)
         # evaluate returns the value of the last assignment.
         assert result == "This is x: 3."
-        self.assertDictEqualNoPrint(state, {"x": 3, "text": "This is x: 3.", "_operations_count": 6})
+        self.assertDictEqualNoPrint(state, {"x": 3, "text": "This is x: 3.", "_operations_count": {"counter": 6}})
 
     def test_evaluate_f_string_with_format(self):
         code = "text = f'This is x: {x:.2f}.'"
         state = {"x": 3.336}
         result, _ = evaluate_python_code(code, {}, state=state)
         assert result == "This is x: 3.34."
-        self.assertDictEqualNoPrint(state, {"x": 3.336, "text": "This is x: 3.34.", "_operations_count": 8})
+        self.assertDictEqualNoPrint(
+            state, {"x": 3.336, "text": "This is x: 3.34.", "_operations_count": {"counter": 8}}
+        )
 
     def test_evaluate_f_string_with_complex_format(self):
         code = "text = f'This is x: {x:>{width}.{precision}f}.'"
@@ -135,7 +164,14 @@ def test_evaluate_f_string_with_complex_format(self):
         result, _ = evaluate_python_code(code, {}, state=state)
         assert result == "This is x:       3.34."
         self.assertDictEqualNoPrint(
-            state, {"x": 3.336, "width": 10, "precision": 2, "text": "This is x:       3.34.", "_operations_count": 14}
+            state,
+            {
+                "x": 3.336,
+                "width": 10,
+                "precision": 2,
+                "text": "This is x:       3.34.",
+                "_operations_count": {"counter": 14},
+            },
         )
 
     def test_evaluate_if(self):
@@ -144,40 +180,42 @@ def test_evaluate_if(self):
         result, _ = evaluate_python_code(code, {}, state=state)
         # evaluate returns the value of the last assignment.
         assert result == 2
-        self.assertDictEqualNoPrint(state, {"x": 3, "y": 2, "_operations_count": 6})
+        self.assertDictEqualNoPrint(state, {"x": 3, "y": 2, "_operations_count": {"counter": 6}})
 
         state = {"x": 8}
         result, _ = evaluate_python_code(code, {}, state=state)
         # evaluate returns the value of the last assignment.
         assert result == 5
-        self.assertDictEqualNoPrint(state, {"x": 8, "y": 5, "_operations_count": 6})
+        self.assertDictEqualNoPrint(state, {"x": 8, "y": 5, "_operations_count": {"counter": 6}})
 
     def test_evaluate_list(self):
         code = "test_list = [x, add_two(x)]"
         state = {"x": 3}
         result, _ = evaluate_python_code(code, {"add_two": add_two}, state=state)
         self.assertListEqual(result, [3, 5])
-        self.assertDictEqualNoPrint(state, {"x": 3, "test_list": [3, 5], "_operations_count": 5})
+        self.assertDictEqualNoPrint(state, {"x": 3, "test_list": [3, 5], "_operations_count": {"counter": 5}})
 
     def test_evaluate_name(self):
         code = "y = x"
         state = {"x": 3}
         result, _ = evaluate_python_code(code, {}, state=state)
         assert result == 3
-        self.assertDictEqualNoPrint(state, {"x": 3, "y": 3, "_operations_count": 2})
+        self.assertDictEqualNoPrint(state, {"x": 3, "y": 3, "_operations_count": {"counter": 2}})
 
     def test_evaluate_subscript(self):
         code = "test_list = [x, add_two(x)]\ntest_list[1]"
         state = {"x": 3}
         result, _ = evaluate_python_code(code, {"add_two": add_two}, state=state)
         assert result == 5
-        self.assertDictEqualNoPrint(state, {"x": 3, "test_list": [3, 5], "_operations_count": 9})
+        self.assertDictEqualNoPrint(state, {"x": 3, "test_list": [3, 5], "_operations_count": {"counter": 9}})
 
         code = "test_dict = {'x': x, 'y': add_two(x)}\ntest_dict['y']"
         state = {"x": 3}
         result, _ = evaluate_python_code(code, {"add_two": add_two}, state=state)
         assert result == 5
-        self.assertDictEqualNoPrint(state, {"x": 3, "test_dict": {"x": 3, "y": 5}, "_operations_count": 11})
+        self.assertDictEqualNoPrint(
+            state, {"x": 3, "test_dict": {"x": 3, "y": 5}, "_operations_count": {"counter": 11}}
+        )
 
         code = "vendor = {'revenue': 31000, 'rent': 50312}; vendor['ratio'] = round(vendor['revenue'] / vendor['rent'], 2)"
         state = {}
@@ -201,14 +239,14 @@ def test_evaluate_for(self):
         state = {}
         result, _ = evaluate_python_code(code, {"range": range}, state=state)
         assert result == 2
-        self.assertDictEqualNoPrint(state, {"x": 2, "i": 2, "_operations_count": 11})
+        self.assertDictEqualNoPrint(state, {"x": 2, "i": 2, "_operations_count": {"counter": 11}})
 
     def test_evaluate_binop(self):
         code = "y + x"
         state = {"x": 3, "y": 6}
         result, _ = evaluate_python_code(code, {}, state=state)
         assert result == 9
-        self.assertDictEqualNoPrint(state, {"x": 3, "y": 6, "_operations_count": 4})
+        self.assertDictEqualNoPrint(state, {"x": 3, "y": 6, "_operations_count": {"counter": 4}})
 
     def test_recursive_function(self):
         code = """
@@ -221,6 +259,38 @@ def recur_fibo(n):
         result, _ = evaluate_python_code(code, {}, state={})
         assert result == 8
 
+    def test_max_operations(self):
+        # Check that operation counter is not reset in functions
+        code = dedent(
+            """
+            def func(a):
+                for j in range(10):
+                    a += j
+                return a
+
+            for i in range(5):
+                func(i)
+            """
+        )
+        with patch("smolagents.local_python_executor.MAX_OPERATIONS", 100):
+            with pytest.raises(InterpreterError) as exception_info:
+                evaluate_python_code(code, {"range": range}, state={})
+        assert "Reached the max number of operations" in str(exception_info.value)
+
+    def test_operations_count(self):
+        # Check that operation counter is not reset in functions
+        code = dedent(
+            """
+            def func():
+                return 0
+
+            func()
+            """
+        )
+        state = {}
+        evaluate_python_code(code, {"range": range}, state=state)
+        assert state["_operations_count"]["counter"] == 5
+
     def test_evaluate_string_methods(self):
         code = "'hello'.replace('h', 'o').split('e')"
         result, _ = evaluate_python_code(code, {}, state={})
@@ -232,9 +302,12 @@ def test_evaluate_slicing(self):
         assert result == "le"
 
     def test_access_attributes(self):
-        code = "integer = 1\nobj_class = integer.__class__\nobj_class"
-        result, _ = evaluate_python_code(code, {}, state={})
-        assert result is int
+        class A:
+            attr = 2
+
+        code = "A.attr"
+        result, _ = evaluate_python_code(code, {}, state={"A": A})
+        assert result == 2
 
     def test_list_comprehension(self):
         code = "sentence = 'THESEAGULL43'\nmeaningful_sentence = '-'.join([char.lower() for char in sentence if char.isalpha()])"
@@ -312,6 +385,11 @@ def test_listcomp(self):
         result, _ = evaluate_python_code(code, {"range": range}, state={})
         assert result == [0, 1, 2]
 
+    def test_setcomp(self):
+        code = "batman_times = {entry['time'] for entry in [{'time': 10}, {'time': 19}, {'time': 20}]}"
+        result, _ = evaluate_python_code(code, {}, state={})
+        assert result == {10, 19, 20}
+
     def test_break_continue(self):
         code = "for i in range(10):\n    if i == 5:\n        break\ni"
         result, _ = evaluate_python_code(code, {"range": range}, state={})
@@ -359,17 +437,19 @@ def test_while(self):
 
         # test infinite loop
         code = "i = 0\nwhile i < 3:\n    i -= 1\ni"
-        with pytest.raises(InterpreterError) as e:
-            evaluate_python_code(code, BASE_PYTHON_TOOLS, state={})
-        assert "iterations in While loop exceeded" in str(e)
+        with patch("smolagents.local_python_executor.MAX_WHILE_ITERATIONS", 100):
+            with pytest.raises(InterpreterError, match=".*Maximum number of 100 iterations in While loop exceeded"):
+                evaluate_python_code(code, BASE_PYTHON_TOOLS, state={})
 
         # test lazy evaluation
-        code = """
-house_positions = [0, 7, 10, 15, 18, 22, 22]
-i, n, loc = 0, 7, 30
-while i < n and house_positions[i] <= loc:
-    i += 1
-"""
+        code = dedent(
+            """
+            house_positions = [0, 7, 10, 15, 18, 22, 22]
+            i, n, loc = 0, 7, 30
+            while i < n and house_positions[i] <= loc:
+                i += 1
+            """
+        )
         state = {}
         evaluate_python_code(code, BASE_PYTHON_TOOLS, state=state)
 
@@ -399,6 +479,22 @@ def test_boolops(self):
         result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={"a": 1, "b": 2, "c": 3, "d": 4, "e": 5})
         assert result == "Sacramento"
 
+        # Short-circuit evaluation:
+        # (T and 0) or (T and T) => 0 or True => True
+        code = "result = (x > 3 and y) or (z == 10 and not y)\nresult"
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={"x": 5, "y": 0, "z": 10})
+        assert result
+
+        # (None or "") or "Found" => "" or "Found" => "Found"
+        code = "result = (a or c) or b\nresult"
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={"a": None, "b": "Found", "c": ""})
+        assert result == "Found"
+
+        # ("First" and "") or "Third" => "" or "Third" -> "Third"
+        code = "result = (a and b) or c\nresult"
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={"a": "First", "b": "", "c": "Third"})
+        assert result == "Third"
+
     def test_if_conditions(self):
         code = """char='a'
 if char.isalpha():
@@ -446,22 +542,35 @@ def test_imports(self):
 
         # Test submodules are handled properly, thus not raising error
         code = "import numpy.random as rd\nrng = rd.default_rng(12345)\nrng.random()"
-        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={}, authorized_imports=["numpy"])
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={}, authorized_imports=["numpy.random"])
 
         code = "from numpy.random import default_rng as d_rng\nrng = d_rng(12345)\nrng.random()"
-        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={}, authorized_imports=["numpy"])
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={}, authorized_imports=["numpy.random"])
 
     def test_additional_imports(self):
         code = "import numpy as np"
         evaluate_python_code(code, authorized_imports=["numpy"], state={})
 
+        # Test that allowing 'numpy.*' allows numpy root package and its submodules
+        code = "import numpy as np\nnp.random.default_rng(123)\nnp.array([1, 2])"
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={}, authorized_imports=["numpy.*"])
+
+        # Test that allowing 'numpy.*' allows importing a submodule
+        code = "import numpy.random as rd\nrd.default_rng(12345)"
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={}, authorized_imports=["numpy.*"])
+
         code = "import numpy.random as rd"
         evaluate_python_code(code, authorized_imports=["numpy.random"], state={})
-        evaluate_python_code(code, authorized_imports=["numpy"], state={})
+        evaluate_python_code(code, authorized_imports=["numpy.*"], state={})
         evaluate_python_code(code, authorized_imports=["*"], state={})
         with pytest.raises(InterpreterError):
             evaluate_python_code(code, authorized_imports=["random"], state={})
 
+        with pytest.raises(InterpreterError):
+            evaluate_python_code(code, authorized_imports=["numpy.a"], state={})
+        with pytest.raises(InterpreterError):
+            evaluate_python_code(code, authorized_imports=["numpy.a.*"], state={})
+
     def test_multiple_comparators(self):
         code = "0 <= -1 < 4 and 0 <= -5 < 4"
         result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={})
@@ -937,22 +1046,6 @@ def test_fix_final_answer_code(self):
     Got:      {result}
     """
 
-    def test_dangerous_subpackage_access_blocked(self):
-        # Direct imports with dangerous patterns should fail
-        code = "import random._os"
-        with pytest.raises(InterpreterError):
-            evaluate_python_code(code)
-
-        # Import of whitelisted modules should succeed but dangerous submodules should not exist
-        code = "import random;random._os.system('echo bad command passed')"
-        with pytest.raises(InterpreterError) as e:
-            evaluate_python_code(code)
-        assert "AttributeError: module 'random' has no attribute '_os'" in str(e)
-
-        code = "import doctest;doctest.inspect.os.system('echo bad command passed')"
-        with pytest.raises(InterpreterError):
-            evaluate_python_code(code, authorized_imports=["doctest"])
-
     def test_close_matches_subscript(self):
         code = 'capitals = {"Czech Republic": "Prague", "Monaco": "Monaco", "Bhutan": "Thimphu"};capitals["Butan"]'
         with pytest.raises(Exception) as e:
@@ -973,21 +1066,18 @@ def test_dangerous_builtins_calls_are_blocked(self):
         with pytest.raises(InterpreterError):
             evaluate_python_code(dangerous_code, static_tools=BASE_PYTHON_TOOLS)
 
-    def test_dangerous_builtins_are_callable_if_explicitly_added(self):
-        dangerous_code = """
-compile = callable.__self__.compile
-eval = callable.__self__.eval
-exec = callable.__self__.exec
-
-eval("1 + 1")
-exec(compile("1 + 1", "no filename", "exec"))
-
-teval("1 + 1")
-texec(tcompile("1 + 1", "no filename", "exec"))
-        """
+    def test_final_answer_accepts_kwarg_answer(self):
+        code = "final_answer(answer=2)"
+        result, _ = evaluate_python_code(code, {"final_answer": (lambda x: 2 * x)}, state={})
+        assert result == 4
 
+    def test_dangerous_builtins_are_callable_if_explicitly_added(self):
+        dangerous_code = dedent("""
+            eval("1 + 1")
+            exec(compile("1 + 1", "no filename", "exec"))
+        """)
         evaluate_python_code(
-            dangerous_code, static_tools={"tcompile": compile, "teval": eval, "texec": exec} | BASE_PYTHON_TOOLS
+            dangerous_code, static_tools={"compile": compile, "eval": eval, "exec": exec} | BASE_PYTHON_TOOLS
         )
 
     def test_can_import_os_if_explicitly_authorized(self):
@@ -998,6 +1088,64 @@ def test_can_import_os_if_all_imports_authorized(self):
         dangerous_code = "import os; os.listdir('./')"
         evaluate_python_code(dangerous_code, authorized_imports=["*"])
 
+    @pytest.mark.filterwarnings("ignore::DeprecationWarning")
+    def test_can_import_scipy_if_explicitly_authorized(self):
+        code = "import scipy"
+        evaluate_python_code(code, authorized_imports=["scipy"])
+
+    @pytest.mark.filterwarnings("ignore::DeprecationWarning")
+    def test_can_import_sklearn_if_explicitly_authorized(self):
+        code = "import sklearn"
+        evaluate_python_code(code, authorized_imports=["sklearn"])
+
+    def test_function_def_recovers_source_code(self):
+        executor = LocalPythonExecutor([])
+
+        executor.send_tools({"final_answer": FinalAnswerTool()})
+
+        res, _, _ = executor(
+            dedent(
+                """
+                def target_function():
+                    return "Hello world"
+
+                final_answer(target_function)
+                """
+            )
+        )
+        assert res.__name__ == "target_function"
+        assert res.__source__ == "def target_function():\n    return 'Hello world'"
+
+
+def test_evaluate_annassign():
+    code = dedent("""\
+        # Basic annotated assignment
+        x: int = 42
+
+        # Type annotations with expressions
+        y: float = x / 2
+
+        # Type annotation without assignment
+        z: list
+
+        # Type annotation with complex value
+        names: list = ["Alice", "Bob", "Charlie"]
+
+        # Type hint shouldn't restrict values at runtime
+        s: str = 123  # Would be a type error in static checking, but valid at runtime
+
+        # Access the values
+        result = (x, y, names, s)
+    """)
+    state = {}
+    evaluate_python_code(code, BASE_PYTHON_TOOLS, state=state)
+    assert state["x"] == 42
+    assert state["y"] == 21.0
+    assert "z" not in state  # z should be not be defined
+    assert state["names"] == ["Alice", "Bob", "Charlie"]
+    assert state["s"] == 123  # Type hints don't restrict at runtime
+    assert state["result"] == (42, 21.0, ["Alice", "Bob", "Charlie"], 123)
+
 
 @pytest.mark.parametrize(
     "code, expected_result",
@@ -1132,7 +1280,7 @@ def __{operator_name}__(self, other):
                 del x[2]
                 x[2]
             """),
-            "Index 2 out of bounds for list of length 2",
+            "IndexError: list index out of range",
         ),
         (
             dedent("""\
@@ -1157,6 +1305,26 @@ def test_evaluate_python_code_with_evaluate_delete(code, expected_error_message)
     assert expected_error_message in str(exception_info.value)
 
 
+@pytest.mark.parametrize("a", [1, 0])
+@pytest.mark.parametrize("b", [2, 0])
+@pytest.mark.parametrize("c", [3, 0])
+def test_evaluate_boolop_and(a, b, c):
+    boolop_ast = ast.parse("a and b and c").body[0].value
+    state = {"a": a, "b": b, "c": c}
+    result = evaluate_boolop(boolop_ast, state, {}, {}, [])
+    assert result == (a and b and c)
+
+
+@pytest.mark.parametrize("a", [1, 0])
+@pytest.mark.parametrize("b", [2, 0])
+@pytest.mark.parametrize("c", [3, 0])
+def test_evaluate_boolop_or(a, b, c):
+    boolop_ast = ast.parse("a or b or c").body[0].value
+    state = {"a": a, "b": b, "c": c}
+    result = evaluate_boolop(boolop_ast, state, {}, {}, [])
+    assert result == (a or b or c)
+
+
 @pytest.mark.parametrize(
     "code, state, expectation",
     [
@@ -1303,6 +1471,123 @@ def test_evaluate_condition_with_pandas_exceptions(condition, state, expected_ex
     assert str(expected_exception) in str(exception_info.value)
 
 
+@pytest.mark.parametrize(
+    "subscript, state, expected_result",
+    [
+        ("dct[1]", {"dct": {1: 11, 2: 22}}, 11),
+        ("dct[2]", {"dct": {1: "a", 2: "b"}}, "b"),
+        ("dct['b']", {"dct": {"a": 1, "b": 2}}, 2),
+        ("dct['a']", {"dct": {"a": "aa", "b": "bb"}}, "aa"),
+        ("dct[1, 2]", {"dct": {(1, 2): 3}}, 3),  # tuple-index
+        ("dct['a']['b']", {"dct": {"a": {"b": 1}}}, 1),  # nested
+        ("lst[0]", {"lst": [1, 2, 3]}, 1),
+        ("lst[-1]", {"lst": [1, 2, 3]}, 3),
+        ("lst[1:3]", {"lst": [1, 2, 3, 4]}, [2, 3]),
+        ("lst[:]", {"lst": [1, 2, 3]}, [1, 2, 3]),
+        ("lst[::2]", {"lst": [1, 2, 3, 4]}, [1, 3]),
+        ("lst[::-1]", {"lst": [1, 2, 3]}, [3, 2, 1]),
+        ("tup[1]", {"tup": (1, 2, 3)}, 2),
+        ("tup[-1]", {"tup": (1, 2, 3)}, 3),
+        ("tup[1:3]", {"tup": (1, 2, 3, 4)}, (2, 3)),
+        ("tup[:]", {"tup": (1, 2, 3)}, (1, 2, 3)),
+        ("tup[::2]", {"tup": (1, 2, 3, 4)}, (1, 3)),
+        ("tup[::-1]", {"tup": (1, 2, 3)}, (3, 2, 1)),
+        ("st[1]", {"str": "abc"}, "b"),
+        ("st[-1]", {"str": "abc"}, "c"),
+        ("st[1:3]", {"str": "abcd"}, "bc"),
+        ("st[:]", {"str": "abc"}, "abc"),
+        ("st[::2]", {"str": "abcd"}, "ac"),
+        ("st[::-1]", {"str": "abc"}, "cba"),
+        ("arr[1]", {"arr": np.array([1, 2, 3])}, 2),
+        ("arr[1:3]", {"arr": np.array([1, 2, 3, 4])}, np.array([2, 3])),
+        ("arr[:]", {"arr": np.array([1, 2, 3])}, np.array([1, 2, 3])),
+        ("arr[::2]", {"arr": np.array([1, 2, 3, 4])}, np.array([1, 3])),
+        ("arr[::-1]", {"arr": np.array([1, 2, 3])}, np.array([3, 2, 1])),
+        ("arr[1, 2]", {"arr": np.array([[1, 2, 3], [4, 5, 6]])}, 6),
+        ("ser[1]", {"ser": pd.Series([1, 2, 3])}, 2),
+        ("ser.loc[1]", {"ser": pd.Series([1, 2, 3])}, 2),
+        ("ser.loc[1]", {"ser": pd.Series([1, 2, 3], index=[2, 3, 1])}, 3),
+        ("ser.iloc[1]", {"ser": pd.Series([1, 2, 3])}, 2),
+        ("ser.iloc[1]", {"ser": pd.Series([1, 2, 3], index=[2, 3, 1])}, 2),
+        ("ser.at[1]", {"ser": pd.Series([1, 2, 3])}, 2),
+        ("ser.at[1]", {"ser": pd.Series([1, 2, 3], index=[2, 3, 1])}, 3),
+        ("ser.iat[1]", {"ser": pd.Series([1, 2, 3])}, 2),
+        ("ser.iat[1]", {"ser": pd.Series([1, 2, 3], index=[2, 3, 1])}, 2),
+        ("ser[1:3]", {"ser": pd.Series([1, 2, 3, 4])}, pd.Series([2, 3], index=[1, 2])),
+        ("ser[:]", {"ser": pd.Series([1, 2, 3])}, pd.Series([1, 2, 3])),
+        ("ser[::2]", {"ser": pd.Series([1, 2, 3, 4])}, pd.Series([1, 3], index=[0, 2])),
+        ("ser[::-1]", {"ser": pd.Series([1, 2, 3])}, pd.Series([3, 2, 1], index=[2, 1, 0])),
+        ("df['y'][1]", {"df": pd.DataFrame({"x": [1, 2], "y": [3, 4]})}, 4),
+        ("df['y'][5]", {"df": pd.DataFrame({"x": [1, 2], "y": [3, 4]}, index=[5, 6])}, 3),
+        ("df.loc[1, 'y']", {"df": pd.DataFrame({"x": [1, 2], "y": [3, 4]})}, 4),
+        ("df.loc[5, 'y']", {"df": pd.DataFrame({"x": [1, 2], "y": [3, 4]}, index=[5, 6])}, 3),
+        ("df.iloc[1, 1]", {"df": pd.DataFrame({"x": [1, 2], "y": [3, 4]})}, 4),
+        ("df.iloc[1, 1]", {"df": pd.DataFrame({"x": [1, 2], "y": [3, 4]}, index=[5, 6])}, 4),
+        ("df.at[1, 'y']", {"df": pd.DataFrame({"x": [1, 2], "y": [3, 4]})}, 4),
+        ("df.at[5, 'y']", {"df": pd.DataFrame({"x": [1, 2], "y": [3, 4]}, index=[5, 6])}, 3),
+        ("df.iat[1, 1]", {"df": pd.DataFrame({"x": [1, 2], "y": [3, 4]})}, 4),
+        ("df.iat[1, 1]", {"df": pd.DataFrame({"x": [1, 2], "y": [3, 4]}, index=[5, 6])}, 4),
+    ],
+)
+def test_evaluate_subscript(subscript, state, expected_result):
+    subscript_ast = ast.parse(subscript).body[0].value
+    result = evaluate_subscript(subscript_ast, state, {}, {}, [])
+    try:
+        assert result == expected_result
+    except ValueError:
+        assert (result == expected_result).all()
+
+
+@pytest.mark.parametrize(
+    "subscript, state, expected_error_message",
+    [
+        ("dct['a']", {"dct": {}}, "KeyError: 'a'"),
+        ("dct[0]", {"dct": {}}, "KeyError: 0"),
+        ("dct['c']", {"dct": {"a": 1, "b": 2}}, "KeyError: 'c'"),
+        ("dct[1, 2, 3]", {"dct": {(1, 2): 3}}, "KeyError: (1, 2, 3)"),
+        ("lst[0]", {"lst": []}, "IndexError: list index out of range"),
+        ("lst[3]", {"lst": [1, 2, 3]}, "IndexError: list index out of range"),
+        ("lst[-4]", {"lst": [1, 2, 3]}, "IndexError: list index out of range"),
+        ("value[0]", {"value": 1}, "TypeError: 'int' object is not subscriptable"),
+    ],
+)
+def test_evaluate_subscript_error(subscript, state, expected_error_message):
+    subscript_ast = ast.parse(subscript).body[0].value
+    with pytest.raises(InterpreterError, match="Could not index") as exception_info:
+        _ = evaluate_subscript(subscript_ast, state, {}, {}, [])
+    assert expected_error_message in str(exception_info.value)
+
+
+@pytest.mark.parametrize(
+    "subscriptable_class, expectation",
+    [
+        (True, 20),
+        (False, InterpreterError("TypeError: 'Custom' object is not subscriptable")),
+    ],
+)
+def test_evaluate_subscript_with_custom_class(subscriptable_class, expectation):
+    if subscriptable_class:
+
+        class Custom:
+            def __getitem__(self, key):
+                return key * 10
+    else:
+
+        class Custom:
+            pass
+
+    state = {"obj": Custom()}
+    subscript = "obj[2]"
+    subscript_ast = ast.parse(subscript).body[0].value
+    if isinstance(expectation, Exception):
+        with pytest.raises(type(expectation), match="Could not index") as exception_info:
+            evaluate_subscript(subscript_ast, state, {}, {}, [])
+        assert "TypeError: 'Custom' object is not subscriptable" in str(exception_info.value)
+    else:
+        result = evaluate_subscript(subscript_ast, state, {}, {}, [])
+        assert result == expectation
+
+
 def test_get_safe_module_handle_lazy_imports():
     class FakeModule(types.ModuleType):
         def __init__(self, name):
@@ -1382,15 +1667,497 @@ def test_len(self):
 @pytest.mark.parametrize(
     "module,authorized_imports,expected",
     [
-        ("os", ["*"], True),
+        ("os", ["other", "*"], True),
         ("AnyModule", ["*"], True),
         ("os", ["os"], True),
         ("AnyModule", ["AnyModule"], True),
         ("Module.os", ["Module"], False),
-        ("Module.os", ["Module", "os"], True),
-        ("os.path", ["os"], True),
-        ("os", ["os.path"], False),
+        ("Module.os", ["Module", "Module.os"], True),
+        ("os.path", ["os.*"], True),
+        ("os", ["os.path"], True),
     ],
 )
-def test_check_module_authorized(module: str, authorized_imports: list[str], expected: bool):
-    assert check_module_authorized(module, authorized_imports) == expected
+def test_check_import_authorized(module: str, authorized_imports: list[str], expected: bool):
+    assert check_import_authorized(module, authorized_imports) == expected
+
+
+class TestLocalPythonExecutor:
+    def test_state_name(self):
+        executor = LocalPythonExecutor(additional_authorized_imports=[])
+        assert executor.state.get("__name__") == "__main__"
+
+    @pytest.mark.parametrize(
+        "code",
+        [
+            "d = {'func': lambda x: x + 10}; func = d['func']; func(1)",
+            "d = {'func': lambda x: x + 10}; d['func'](1)",
+        ],
+    )
+    def test_call_from_dict(self, code):
+        executor = LocalPythonExecutor([])
+        result, _, _ = executor(code)
+        assert result == 11
+
+    @pytest.mark.parametrize(
+        "code",
+        [
+            "a = b = 1; a",
+            "a = b = 1; b",
+            "a, b = c, d = 1, 1; a",
+            "a, b = c, d = 1, 1; b",
+            "a, b = c, d = 1, 1; c",
+            "a, b = c, d = {1, 2}; a",
+            "a, b = c, d = {1, 2}; c",
+            "a, b = c, d = {1: 10, 2: 20}; a",
+            "a, b = c, d = {1: 10, 2: 20}; c",
+            "a = b = (lambda: 1)(); b",
+            "a = b = (lambda: 1)(); lambda x: 10; b",
+            "a = b = (lambda x: lambda y: x + y)(0)(1); b",
+            dedent("""
+            def foo():
+                return 1;
+            a = b = foo(); b"""),
+            dedent("""
+            def foo(*args, **kwargs):
+                return sum(args)
+            a = b = foo(1,-1,1); b"""),
+            "a, b = 1, 2; a, b = b, a; b",
+        ],
+    )
+    def test_chained_assignments(self, code):
+        executor = LocalPythonExecutor([])
+        executor.send_tools({})
+        result, _, _ = executor(code)
+        assert result == 1
+
+    def test_evaluate_assign_error(self):
+        code = "a, b = 1, 2, 3; a"
+        executor = LocalPythonExecutor([])
+        with pytest.raises(InterpreterError, match=".*Cannot unpack tuple of wrong size"):
+            executor(code)
+
+
+class TestLocalPythonExecutorSecurity:
+    @pytest.mark.parametrize(
+        "additional_authorized_imports, expected_error",
+        [([], InterpreterError("Import of os is not allowed")), (["os"], None)],
+    )
+    def test_vulnerability_import(self, additional_authorized_imports, expected_error):
+        executor = LocalPythonExecutor(additional_authorized_imports)
+        with (
+            pytest.raises(type(expected_error), match=f".*{expected_error}")
+            if isinstance(expected_error, Exception)
+            else does_not_raise()
+        ):
+            executor("import os")
+
+    @pytest.mark.parametrize(
+        "additional_authorized_imports, expected_error",
+        [([], InterpreterError("Import of builtins is not allowed")), (["builtins"], None)],
+    )
+    def test_vulnerability_builtins(self, additional_authorized_imports, expected_error):
+        executor = LocalPythonExecutor(additional_authorized_imports)
+        with (
+            pytest.raises(type(expected_error), match=f".*{expected_error}")
+            if isinstance(expected_error, Exception)
+            else does_not_raise()
+        ):
+            executor("import builtins")
+
+    @pytest.mark.parametrize(
+        "additional_authorized_imports, expected_error",
+        [([], InterpreterError("Import of builtins is not allowed")), (["builtins"], None)],
+    )
+    def test_vulnerability_builtins_safe_functions(self, additional_authorized_imports, expected_error):
+        executor = LocalPythonExecutor(additional_authorized_imports)
+        with (
+            pytest.raises(type(expected_error), match=f".*{expected_error}")
+            if isinstance(expected_error, Exception)
+            else does_not_raise()
+        ):
+            executor("import builtins; builtins.print(1)")
+
+    @pytest.mark.parametrize(
+        "additional_authorized_imports, additional_tools, expected_error",
+        [
+            ([], [], InterpreterError("Import of builtins is not allowed")),
+            (["builtins"], [], InterpreterError("Forbidden access to function: exec")),
+            (["builtins"], ["exec"], None),
+        ],
+    )
+    def test_vulnerability_builtins_dangerous_functions(
+        self, additional_authorized_imports, additional_tools, expected_error
+    ):
+        executor = LocalPythonExecutor(additional_authorized_imports)
+        if additional_tools:
+            from builtins import exec
+
+            executor.send_tools({"exec": exec})
+        with (
+            pytest.raises(type(expected_error), match=f".*{expected_error}")
+            if isinstance(expected_error, Exception)
+            else does_not_raise()
+        ):
+            executor("import builtins; builtins.exec")
+
+    @pytest.mark.parametrize(
+        "additional_authorized_imports, additional_tools, expected_error",
+        [
+            ([], [], InterpreterError("Import of os is not allowed")),
+            (["os"], [], InterpreterError("Forbidden access to function: popen")),
+            (["os"], ["popen"], None),
+        ],
+    )
+    def test_vulnerability_dangerous_functions(self, additional_authorized_imports, additional_tools, expected_error):
+        executor = LocalPythonExecutor(additional_authorized_imports)
+        if additional_tools:
+            from os import popen
+
+            executor.send_tools({"popen": popen})
+        with (
+            pytest.raises(type(expected_error), match=f".*{expected_error}")
+            if isinstance(expected_error, Exception)
+            else does_not_raise()
+        ):
+            executor("import os; os.popen")
+
+    @pytest.mark.parametrize("dangerous_function", DANGEROUS_FUNCTIONS)
+    def test_vulnerability_for_all_dangerous_functions(self, dangerous_function):
+        dangerous_module_name, dangerous_function_name = dangerous_function.rsplit(".", 1)
+        # Skip test if module is not installed: posix module is not installed on Windows
+        pytest.importorskip(dangerous_module_name)
+        executor = LocalPythonExecutor([dangerous_module_name])
+        if "__" in dangerous_function_name:
+            error_match = f".*Forbidden access to dunder attribute: {dangerous_function_name}"
+        else:
+            error_match = f".*Forbidden access to function: {dangerous_function_name}.*"
+        with pytest.raises(InterpreterError, match=error_match):
+            executor(f"import {dangerous_module_name}; {dangerous_function}")
+
+    @pytest.mark.parametrize(
+        "additional_authorized_imports, expected_error",
+        [
+            ([], InterpreterError("Import of sys is not allowed")),
+            (["sys"], InterpreterError("Forbidden access to module: os")),
+            (["sys", "os"], None),
+        ],
+    )
+    def test_vulnerability_via_sys(self, additional_authorized_imports, expected_error):
+        executor = LocalPythonExecutor(additional_authorized_imports)
+        with (
+            pytest.raises(type(expected_error), match=f".*{expected_error}")
+            if isinstance(expected_error, Exception)
+            else does_not_raise()
+        ):
+            executor(
+                dedent(
+                    """
+                    import sys
+                    sys.modules["os"].system(":")
+                    """
+                )
+            )
+
+    @pytest.mark.parametrize("dangerous_module", DANGEROUS_MODULES)
+    def test_vulnerability_via_sys_for_all_dangerous_modules(self, dangerous_module):
+        import sys
+
+        if dangerous_module not in sys.modules or dangerous_module == "sys":
+            pytest.skip("module not present in sys.modules")
+        executor = LocalPythonExecutor(["sys"])
+        with pytest.raises(InterpreterError) as exception_info:
+            executor(
+                dedent(
+                    f"""
+                    import sys
+                    sys.modules["{dangerous_module}"]
+                    """
+                )
+            )
+        assert f"Forbidden access to module: {dangerous_module}" in str(exception_info.value)
+
+    @pytest.mark.parametrize(
+        "additional_authorized_imports, expected_error",
+        [(["importlib"], InterpreterError("Forbidden access to module: os")), (["importlib", "os"], None)],
+    )
+    def test_vulnerability_via_importlib(self, additional_authorized_imports, expected_error):
+        executor = LocalPythonExecutor(additional_authorized_imports)
+        with (
+            pytest.raises(type(expected_error), match=f".*{expected_error}")
+            if isinstance(expected_error, Exception)
+            else does_not_raise()
+        ):
+            executor(
+                dedent(
+                    """
+                    import importlib
+                    importlib.import_module("os").system(":")
+                    """
+                )
+            )
+
+    @pytest.mark.parametrize(
+        "code, additional_authorized_imports, expected_error",
+        [
+            # os submodule
+            (
+                "import queue; queue.threading._os.system(':')",
+                [],
+                InterpreterError("Forbidden access to module: threading"),
+            ),
+            (
+                "import queue; queue.threading._os.system(':')",
+                ["threading"],
+                InterpreterError("Forbidden access to module: os"),
+            ),
+            ("import random; random._os.system(':')", [], InterpreterError("Forbidden access to module: os")),
+            (
+                "import random; random.__dict__['_os'].system(':')",
+                [],
+                InterpreterError("Forbidden access to dunder attribute: __dict__"),
+            ),
+            (
+                "import doctest; doctest.inspect.os.system(':')",
+                ["doctest"],
+                InterpreterError("Forbidden access to module: inspect"),
+            ),
+            (
+                "import doctest; doctest.inspect.os.system(':')",
+                ["doctest", "inspect"],
+                InterpreterError("Forbidden access to module: os"),
+            ),
+            # subprocess submodule
+            (
+                "import asyncio; asyncio.base_events.events.subprocess",
+                ["asyncio"],
+                InterpreterError("Forbidden access to module: asyncio.base_events"),
+            ),
+            (
+                "import asyncio; asyncio.base_events.events.subprocess",
+                ["asyncio", "asyncio.base_events"],
+                InterpreterError("Forbidden access to module: asyncio.events"),
+            ),
+            (
+                "import asyncio; asyncio.base_events.events.subprocess",
+                ["asyncio", "asyncio.base_events", "asyncio.base_events.events"],
+                InterpreterError("Forbidden access to module: asyncio.events"),
+            ),
+            # sys submodule
+            (
+                "import queue; queue.threading._sys.modules['os'].system(':')",
+                [],
+                InterpreterError("Forbidden access to module: threading"),
+            ),
+            (
+                "import queue; queue.threading._sys.modules['os'].system(':')",
+                ["threading"],
+                InterpreterError("Forbidden access to module: sys"),
+            ),
+            # Allowed
+            ("import pandas; pandas.io", ["pandas", "pandas.io"], None),
+        ],
+    )
+    def test_vulnerability_via_submodules(self, code, additional_authorized_imports, expected_error):
+        executor = LocalPythonExecutor(additional_authorized_imports)
+        with (
+            pytest.raises(type(expected_error), match=f".*{expected_error}")
+            if isinstance(expected_error, Exception)
+            else does_not_raise()
+        ):
+            executor(code)
+
+    @pytest.mark.parametrize(
+        "additional_authorized_imports, additional_tools, expected_error",
+        [
+            ([], [], InterpreterError("Import of sys is not allowed")),
+            (["sys"], [], InterpreterError("Forbidden access to module: builtins")),
+            (
+                ["sys", "builtins"],
+                [],
+                InterpreterError("Forbidden access to function: __import__"),
+            ),
+            (["sys", "builtins"], ["__import__"], InterpreterError("Forbidden access to module: os")),
+            (["sys", "builtins", "os"], ["__import__"], None),
+        ],
+    )
+    def test_vulnerability_builtins_via_sys(self, additional_authorized_imports, additional_tools, expected_error):
+        executor = LocalPythonExecutor(additional_authorized_imports)
+        if additional_tools:
+            from builtins import __import__
+
+            executor.send_tools({"__import__": __import__})
+        with (
+            pytest.raises(type(expected_error), match=f".*{expected_error}")
+            if isinstance(expected_error, Exception)
+            else does_not_raise()
+        ):
+            executor(
+                dedent(
+                    """
+                    import sys
+                    builtins = sys._getframe().f_builtins
+                    builtins_import = builtins["__import__"]
+                    os_module = builtins_import("os")
+                    os_module.system(":")
+                    """
+                )
+            )
+
+    @pytest.mark.parametrize("patch_builtin_import_module", [False, True])  # builtins_import.__module__ = None
+    @pytest.mark.parametrize(
+        "additional_authorized_imports, additional_tools, expected_error",
+        [
+            ([], [], InterpreterError("Forbidden access to dunder attribute: __traceback__")),
+            (
+                ["builtins", "os"],
+                ["__import__"],
+                InterpreterError("Forbidden access to dunder attribute: __traceback__"),
+            ),
+        ],
+    )
+    def test_vulnerability_builtins_via_traceback(
+        self, patch_builtin_import_module, additional_authorized_imports, additional_tools, expected_error, monkeypatch
+    ):
+        if patch_builtin_import_module:
+            monkeypatch.setattr("builtins.__import__.__module__", None)  # inspect.getmodule(func) = None
+        executor = LocalPythonExecutor(additional_authorized_imports)
+        if additional_tools:
+            from builtins import __import__
+
+            executor.send_tools({"__import__": __import__})
+        with (
+            pytest.raises(type(expected_error), match=f".*{expected_error}")
+            if isinstance(expected_error, Exception)
+            else does_not_raise()
+        ):
+            executor(
+                dedent(
+                    """
+                    try:
+                        1 / 0
+                    except Exception as e:
+                        builtins = e.__traceback__.tb_frame.f_back.f_globals["__builtins__"]
+                        builtins_import = builtins["__import__"]
+                        os_module = builtins_import("os")
+                        os_module.system(":")
+                    """
+                )
+            )
+
+    @pytest.mark.parametrize("patch_builtin_import_module", [False, True])  # builtins_import.__module__ = None
+    @pytest.mark.parametrize(
+        "additional_authorized_imports, additional_tools, expected_error",
+        [
+            ([], [], InterpreterError("Forbidden access to dunder attribute: __base__")),
+            (["warnings"], [], InterpreterError("Forbidden access to dunder attribute: __base__")),
+            (
+                ["warnings", "builtins"],
+                [],
+                InterpreterError("Forbidden access to dunder attribute: __base__"),
+            ),
+            (["warnings", "builtins", "os"], [], InterpreterError("Forbidden access to dunder attribute: __base__")),
+            (
+                ["warnings", "builtins", "os"],
+                ["__import__"],
+                InterpreterError("Forbidden access to dunder attribute: __base__"),
+            ),
+        ],
+    )
+    def test_vulnerability_builtins_via_class_catch_warnings(
+        self, patch_builtin_import_module, additional_authorized_imports, additional_tools, expected_error, monkeypatch
+    ):
+        if patch_builtin_import_module:
+            monkeypatch.setattr("builtins.__import__.__module__", None)  # inspect.getmodule(func) = None
+        executor = LocalPythonExecutor(additional_authorized_imports)
+        if additional_tools:
+            from builtins import __import__
+
+            executor.send_tools({"__import__": __import__})
+        if isinstance(expected_error, tuple):  # different error depending on patch status
+            expected_error = expected_error[patch_builtin_import_module]
+        if isinstance(expected_error, Exception):
+            expectation = pytest.raises(type(expected_error), match=f".*{expected_error}")
+        elif expected_error is None:
+            expectation = does_not_raise()
+        with expectation:
+            executor(
+                dedent(
+                    """
+                    classes = {}.__class__.__base__.__subclasses__()
+                    for cls in classes:
+                        if cls.__name__ == "catch_warnings":
+                            break
+                    builtins = cls()._module.__builtins__
+                    builtins_import = builtins["__import__"]
+                    os_module = builtins_import('os')
+                    os_module.system(":")
+                    """
+                )
+            )
+
+    @pytest.mark.filterwarnings("ignore::DeprecationWarning")
+    @pytest.mark.parametrize(
+        "additional_authorized_imports, expected_error",
+        [
+            ([], InterpreterError("Forbidden access to dunder attribute: __base__")),
+            (["os"], InterpreterError("Forbidden access to dunder attribute: __base__")),
+        ],
+    )
+    def test_vulnerability_load_module_via_builtin_importer(self, additional_authorized_imports, expected_error):
+        executor = LocalPythonExecutor(additional_authorized_imports)
+        with (
+            pytest.raises(type(expected_error), match=f".*{expected_error}")
+            if isinstance(expected_error, Exception)
+            else does_not_raise()
+        ):
+            executor(
+                dedent(
+                    """
+                    classes = {}.__class__.__base__.__subclasses__()
+                    for cls in classes:
+                        if cls.__name__ == "BuiltinImporter":
+                            break
+                    os_module = cls().load_module("os")
+                    os_module.system(":")
+                    """
+                )
+            )
+
+    def test_vulnerability_class_via_subclasses(self):
+        # Subclass: subprocess.Popen
+        executor = LocalPythonExecutor([])
+        code = dedent(
+            """
+            for cls in ().__class__.__base__.__subclasses__():
+                if 'Popen' in cls.__class__.__repr__(cls):
+                    break
+            cls(["sh", "-c", ":"]).wait()
+            """
+        )
+        with pytest.raises(InterpreterError, match="Forbidden access to dunder attribute: __base__"):
+            executor(code)
+
+        code = dedent(
+            """
+            [c for c in ().__class__.__base__.__subclasses__() if "Popen" in c.__class__.__repr__(c)][0](
+                ["sh", "-c", ":"]
+            ).wait()
+            """
+        )
+        with pytest.raises(InterpreterError, match="Forbidden access to dunder attribute: __base__"):
+            executor(code)
+
+    @pytest.mark.parametrize(
+        "code, dunder_attribute",
+        [("a = (); b = a.__class__", "__class__"), ("class A:\n    attr=1\nx = A()\nx_dict = x.__dict__", "__dict__")],
+    )
+    def test_vulnerability_via_dunder_access(self, code, dunder_attribute):
+        executor = LocalPythonExecutor([])
+        with pytest.raises(InterpreterError, match=f"Forbidden access to dunder attribute: {dunder_attribute}"):
+            executor(code)
+
+    def test_vulnerability_via_dunder_indirect_access(self):
+        executor = LocalPythonExecutor([])
+        code = "a = (); b = getattr(a, '__class__')"
+        with pytest.raises(InterpreterError, match="Forbidden function evaluation: 'getattr'"):
+            executor(code)
diff --git a/tests/test_mcp_client.py b/tests/test_mcp_client.py
new file mode 100644
index 000000000..30b658a70
--- /dev/null
+++ b/tests/test_mcp_client.py
@@ -0,0 +1,60 @@
+from textwrap import dedent
+
+import pytest
+from mcp import StdioServerParameters
+
+from smolagents.mcp_client import MCPClient
+
+
+@pytest.fixture
+def echo_server_script():
+    return dedent(
+        '''
+        from mcp.server.fastmcp import FastMCP
+
+        mcp = FastMCP("Echo Server")
+
+        @mcp.tool()
+        def echo_tool(text: str) -> str:
+            """Echo the input text"""
+            return f"Echo: {text}"
+
+        mcp.run()
+        '''
+    )
+
+
+def test_mcp_client_with_syntax(echo_server_script: str):
+    """Test the MCPClient with the context manager syntax."""
+    server_parameters = StdioServerParameters(command="python", args=["-c", echo_server_script])
+    with MCPClient(server_parameters) as tools:
+        assert len(tools) == 1
+        assert tools[0].name == "echo_tool"
+        assert tools[0].forward(**{"text": "Hello, world!"}) == "Echo: Hello, world!"
+
+
+def test_mcp_client_try_finally_syntax(echo_server_script: str):
+    """Test the MCPClient with the try ... finally syntax."""
+    server_parameters = StdioServerParameters(command="python", args=["-c", echo_server_script])
+    mcp_client = MCPClient(server_parameters)
+    try:
+        tools = mcp_client.get_tools()
+        assert len(tools) == 1
+        assert tools[0].name == "echo_tool"
+        assert tools[0].forward(**{"text": "Hello, world!"}) == "Echo: Hello, world!"
+    finally:
+        mcp_client.disconnect()
+
+
+def test_multiple_servers(echo_server_script: str):
+    """Test the MCPClient with multiple servers."""
+    server_parameters = [
+        StdioServerParameters(command="python", args=["-c", echo_server_script]),
+        StdioServerParameters(command="python", args=["-c", echo_server_script]),
+    ]
+    with MCPClient(server_parameters) as tools:
+        assert len(tools) == 2
+        assert tools[0].name == "echo_tool"
+        assert tools[1].name == "echo_tool"
+        assert tools[0].forward(**{"text": "Hello, world!"}) == "Echo: Hello, world!"
+        assert tools[1].forward(**{"text": "Hello, world!"}) == "Echo: Hello, world!"
diff --git a/tests/test_memory.py b/tests/test_memory.py
index c007a185c..04c6b7f47 100644
--- a/tests/test_memory.py
+++ b/tests/test_memory.py
@@ -70,7 +70,7 @@ def test_action_step_to_messages():
         assert "type" in content
         assert "text" in content
     message = messages[1]
-    assert message["role"] == MessageRole.ASSISTANT
+    assert message["role"] == MessageRole.TOOL_CALL
 
     assert len(message["content"]) == 1
     text_content = message["content"][0]
@@ -78,23 +78,43 @@ def test_action_step_to_messages():
     assert "type" in text_content
     assert "text" in text_content
 
-    observation_message = messages[2]
-    assert observation_message["role"] == MessageRole.TOOL_RESPONSE
-    assert "Observation:\nThis is a nice observation" in observation_message["content"][0]["text"]
-
-    image_message = messages[3]
-    image_content = image_message["content"][1]
+    image_message = messages[2]
+    image_content = image_message["content"][0]
     assert isinstance(image_content, dict)
     assert "type" in image_content
     assert "image" in image_content
 
+    observation_message = messages[3]
+    assert observation_message["role"] == MessageRole.TOOL_RESPONSE
+    assert "Observation:\nThis is a nice observation" in observation_message["content"][0]["text"]
+
+
+def test_action_step_to_messages_no_tool_calls_with_observations():
+    action_step = ActionStep(
+        model_input_messages=None,
+        tool_calls=None,
+        start_time=None,
+        end_time=None,
+        step_number=None,
+        error=None,
+        duration=None,
+        model_output_message=None,
+        model_output=None,
+        observations="This is an observation.",
+        observations_images=None,
+        action_output=None,
+    )
+    messages = action_step.to_messages()
+    assert len(messages) == 1
+    observation_message = messages[0]
+    assert observation_message["role"] == MessageRole.TOOL_RESPONSE
+    assert "Observation:\nThis is an observation." in observation_message["content"][0]["text"]
+
 
 def test_planning_step_to_messages():
     planning_step = PlanningStep(
         model_input_messages=[Message(role=MessageRole.USER, content="Hello")],
-        model_output_message_facts=ChatMessage(role=MessageRole.ASSISTANT, content="Facts"),
-        facts="These are facts.",
-        model_output_message_plan=ChatMessage(role=MessageRole.ASSISTANT, content="Plan"),
+        model_output_message=ChatMessage(role=MessageRole.ASSISTANT, content="Plan"),
         plan="This is a plan.",
     )
     messages = planning_step.to_messages(summary_mode=False)
@@ -103,14 +123,14 @@ def test_planning_step_to_messages():
         assert isinstance(message, dict)
         assert "role" in message
         assert "content" in message
-        assert isinstance(message["role"], MessageRole)
-        assert message["role"] == MessageRole.ASSISTANT
         assert isinstance(message["content"], list)
         assert len(message["content"]) == 1
         for content in message["content"]:
             assert isinstance(content, dict)
             assert "type" in content
             assert "text" in content
+    assert messages[0]["role"] == MessageRole.ASSISTANT
+    assert messages[1]["role"] == MessageRole.USER
 
 
 def test_task_step_to_messages():
diff --git a/tests/test_models.py b/tests/test_models.py
index f663972a7..fa81ae82a 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -15,35 +15,67 @@
 import json
 import sys
 import unittest
-from pathlib import Path
-from typing import Optional
+from contextlib import ExitStack
 from unittest.mock import MagicMock, patch
 
 import pytest
-from transformers.testing_utils import get_tests_dir
+from huggingface_hub import ChatCompletionOutputMessage
 
 from smolagents.models import (
+    AmazonBedrockServerModel,
+    AzureOpenAIServerModel,
     ChatMessage,
+    ChatMessageToolCall,
     HfApiModel,
+    InferenceClientModel,
     LiteLLMModel,
+    LiteLLMRouterModel,
     MessageRole,
     MLXModel,
+    Model,
     OpenAIServerModel,
     TransformersModel,
     get_clean_message_list,
+    get_tool_call_from_text,
     get_tool_json_schema,
     parse_json_if_needed,
-    parse_tool_args_if_needed,
+    supports_stop_parameter,
 )
 from smolagents.tools import tool
 
 from .utils.markers import require_run_all
 
 
-class ModelTests(unittest.TestCase):
+class TestModel:
+    @pytest.mark.parametrize(
+        "model_id, stop_sequences, should_contain_stop",
+        [
+            ("regular-model", ["stop1", "stop2"], True),  # Regular model should include stop
+            ("openai/o3", ["stop1", "stop2"], False),  # o3 model should not include stop
+            ("openai/o4-mini", ["stop1", "stop2"], False),  # o4-mini model should not include stop
+            ("something/else/o3", ["stop1", "stop2"], False),  # Path ending with o3 should not include stop
+            ("something/else/o4-mini", ["stop1", "stop2"], False),  # Path ending with o4-mini should not include stop
+            ("o3", ["stop1", "stop2"], False),  # Exact o3 model should not include stop
+            ("o4-mini", ["stop1", "stop2"], False),  # Exact o4-mini model should not include stop
+            ("regular-model", None, False),  # None stop_sequences should not add stop parameter
+        ],
+    )
+    def test_prepare_completion_kwargs_stop_sequences(self, model_id, stop_sequences, should_contain_stop):
+        model = Model()
+        model.model_id = model_id
+        completion_kwargs = model._prepare_completion_kwargs(
+            messages=[{"role": "user", "content": [{"type": "text", "text": "Hello"}]}], stop_sequences=stop_sequences
+        )
+        # Verify that the stop parameter is only included when appropriate
+        if should_contain_stop:
+            assert "stop" in completion_kwargs
+            assert completion_kwargs["stop"] == stop_sequences
+        else:
+            assert "stop" not in completion_kwargs
+
     def test_get_json_schema_has_nullable_args(self):
         @tool
-        def get_weather(location: str, celsius: Optional[bool] = False) -> str:
+        def get_weather(location: str, celsius: bool | None = False) -> str:
             """
             Get weather in the next days at given location.
             Secretly this tool does not care about the location, it hates the weather everywhere.
@@ -81,7 +113,8 @@ def test_get_mlx_message_tricky_stop_sequence(self):
         # check stop_sequence capture when output has trailing chars
         assert model(messages, stop_sequences=[stop_sequence]).content == "I'm ready to help you"
 
-    def test_transformers_message_no_tool(self):
+    def test_transformers_message_no_tool(self, monkeypatch):
+        monkeypatch.setattr("huggingface_hub.constants.HF_HUB_DOWNLOAD_TIMEOUT", 30)  # instead of 10
         model = TransformersModel(
             model_id="HuggingFaceTB/SmolLM2-135M-Instruct",
             max_new_tokens=5,
@@ -89,27 +122,35 @@ def test_transformers_message_no_tool(self):
             do_sample=False,
         )
         messages = [{"role": "user", "content": [{"type": "text", "text": "Hello!"}]}]
-        output = model(messages, stop_sequences=["great"]).content
+        output = model.generate(messages, stop_sequences=["great"]).content
         assert output == "assistant\nHello"
 
-    def test_transformers_message_vl_no_tool(self):
-        from PIL import Image
+        output = model.generate_stream(messages, stop_sequences=["great"])
+        output_str = ""
+        for el in output:
+            output_str += el.content
+        assert output_str == "assistant\nHello"
+
+    def test_transformers_message_vl_no_tool(self, shared_datadir, monkeypatch):
+        monkeypatch.setattr("huggingface_hub.constants.HF_HUB_DOWNLOAD_TIMEOUT", 30)  # instead of 10
+        import PIL.Image
 
-        img = Image.open(Path(get_tests_dir("fixtures")) / "000000039769.png")
+        img = PIL.Image.open(shared_datadir / "000000039769.png")
         model = TransformersModel(
             model_id="llava-hf/llava-interleave-qwen-0.5b-hf",
-            max_new_tokens=5,
+            max_new_tokens=4,
             device_map="cpu",
             do_sample=False,
         )
         messages = [{"role": "user", "content": [{"type": "text", "text": "Hello!"}, {"type": "image", "image": img}]}]
-        output = model(messages, stop_sequences=["great"]).content
-        assert output == "Hello! How can"
+        output = model.generate(messages, stop_sequences=["great"]).content
+        assert output == "I am"
 
-    def test_parse_tool_args_if_needed(self):
-        original_message = ChatMessage(role="user", content=[{"type": "text", "text": "Hello!"}])
-        parsed_message = parse_tool_args_if_needed(original_message)
-        assert parsed_message == original_message
+        output = model.generate_stream(messages, stop_sequences=["great"])
+        output_str = ""
+        for el in output:
+            output_str += el.content
+        assert output_str == "I am"
 
     def test_parse_json_if_needed(self):
         args = "abc"
@@ -129,11 +170,13 @@ def test_parse_json_if_needed(self):
         assert parsed_args == 3
 
 
-class TestHfApiModel:
+class TestInferenceClientModel:
     def test_call_with_custom_role_conversions(self):
         custom_role_conversions = {MessageRole.USER: MessageRole.SYSTEM}
-        model = HfApiModel(model_id="test-model", custom_role_conversions=custom_role_conversions)
+        model = InferenceClientModel(model_id="test-model", custom_role_conversions=custom_role_conversions)
         model.client = MagicMock()
+        mock_response = model.client.chat_completion.return_value
+        mock_response.choices[0].message = ChatCompletionOutputMessage(role="assistant")
         messages = [{"role": "user", "content": "Test message"}]
         _ = model(messages)
         # Verify that the role conversion was applied
@@ -141,24 +184,73 @@ def test_call_with_custom_role_conversions(self):
             "role conversion should be applied"
         )
 
+    def test_init_model_with_tokens(self):
+        model = InferenceClientModel(model_id="test-model", token="abc")
+        assert model.client.token == "abc"
+
+        model = InferenceClientModel(model_id="test-model", api_key="abc")
+        assert model.client.token == "abc"
+
+        with pytest.raises(ValueError, match="Received both `token` and `api_key` arguments."):
+            InferenceClientModel(model_id="test-model", token="abc", api_key="def")
+
     @require_run_all
     def test_get_hfapi_message_no_tool(self):
-        model = HfApiModel(model="Qwen/Qwen2.5-Coder-32B-Instruct", max_tokens=10)
+        model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct", max_tokens=10)
         messages = [{"role": "user", "content": [{"type": "text", "text": "Hello!"}]}]
         model(messages, stop_sequences=["great"])
 
     @require_run_all
     def test_get_hfapi_message_no_tool_external_provider(self):
-        model = HfApiModel(model="Qwen/Qwen2.5-Coder-32B-Instruct", provider="together", max_tokens=10)
+        model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct", provider="together", max_tokens=10)
         messages = [{"role": "user", "content": [{"type": "text", "text": "Hello!"}]}]
         model(messages, stop_sequences=["great"])
 
 
+class TestHfApiModel:
+    def test_init_model_with_tokens(self):
+        model = HfApiModel(model_id="test-model", token="abc")
+        assert model.client.token == "abc"
+
+        model = HfApiModel(model_id="test-model", api_key="abc")
+        assert model.client.token == "abc"
+
+        with pytest.raises(ValueError) as e:
+            _ = HfApiModel(model_id="test-model", token="abc", api_key="def")
+        assert "Received both `token` and `api_key` arguments." in str(e)
+
+    @require_run_all
+    def test_get_hfapi_message_no_tool(self):
+        model = HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct", max_tokens=10)
+        messages = [{"role": "user", "content": [{"type": "text", "text": "Hello!"}]}]
+        model.generate(messages, stop_sequences=["great"])
+
+    @require_run_all
+    def test_get_hfapi_message_no_tool_external_provider(self):
+        model = HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct", provider="together", max_tokens=10)
+        messages = [{"role": "user", "content": [{"type": "text", "text": "Hello!"}]}]
+        model.generate(messages, stop_sequences=["great"])
+
+    @require_run_all
+    def test_get_hfapi_message_stream_no_tool(self):
+        model = HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct", max_tokens=10)
+        messages = [{"role": "user", "content": [{"type": "text", "text": "Hello!"}]}]
+        for el in model.generate_stream(messages, stop_sequences=["great"]):
+            assert el.content is not None
+
+    @require_run_all
+    def test_get_hfapi_message_stream_no_tool_external_provider(self):
+        model = HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct", provider="together", max_tokens=10)
+        messages = [{"role": "user", "content": [{"type": "text", "text": "Hello!"}]}]
+        for el in model.generate_stream(messages, stop_sequences=["great"]):
+            assert el.content is not None
+
+
 class TestLiteLLMModel:
     @pytest.mark.parametrize(
         "model_id, error_flag",
         [
-            ("groq/llama-3.3-70b", "Missing API Key"),
+            ("groq/llama-3.3-70b", "Invalid API Key"),
             ("cerebras/llama-3.3-70b", "The api_key client option must be set"),
             ("mistral/mistral-tiny", "The api_key client option must be set"),
         ],
@@ -168,7 +260,12 @@ def test_call_different_providers_without_key(self, model_id, error_flag):
         messages = [{"role": "user", "content": [{"type": "text", "text": "Test message"}]}]
         with pytest.raises(Exception) as e:
             # This should raise 401 error because of missing API key, not fail for any "bad format" reason
-            model(messages)
+            model.generate(messages)
+        assert error_flag in str(e)
+        with pytest.raises(Exception) as e:
+            # This should raise 401 error because of missing API key, not fail for any "bad format" reason
+            for el in model.generate_stream(messages):
+                assert el.content is not None
         assert error_flag in str(e)
 
     def test_passing_flatten_messages(self):
@@ -179,6 +276,41 @@ def test_passing_flatten_messages(self):
         assert model.flatten_messages_as_text
 
 
+class TestLiteLLMRouterModel:
+    @pytest.mark.parametrize(
+        "model_id, expected",
+        [
+            ("llama-3.3-70b", False),
+            ("llama-3.3-70b", True),
+            ("mistral-tiny", True),
+        ],
+    )
+    def test_flatten_messages_as_text(self, model_id, expected):
+        model_list = [
+            {"model_name": "llama-3.3-70b", "litellm_params": {"model": "groq/llama-3.3-70b"}},
+            {"model_name": "llama-3.3-70b", "litellm_params": {"model": "cerebras/llama-3.3-70b"}},
+            {"model_name": "mistral-tiny", "litellm_params": {"model": "mistral/mistral-tiny"}},
+        ]
+        model = LiteLLMRouterModel(model_id=model_id, model_list=model_list, flatten_messages_as_text=expected)
+        assert model.flatten_messages_as_text is expected
+
+    def test_create_client(self):
+        model_list = [
+            {"model_name": "llama-3.3-70b", "litellm_params": {"model": "groq/llama-3.3-70b"}},
+            {"model_name": "llama-3.3-70b", "litellm_params": {"model": "cerebras/llama-3.3-70b"}},
+        ]
+        with patch("litellm.Router") as mock_router:
+            router_model = LiteLLMRouterModel(
+                model_id="model-group-1", model_list=model_list, client_kwargs={"routing_strategy": "simple-shuffle"}
+            )
+            # Ensure that the Router constructor was called with the expected keyword arguments
+            mock_router.assert_called_once()
+            assert mock_router.call_count == 1
+            assert mock_router.call_args.kwargs["model_list"] == model_list
+            assert mock_router.call_args.kwargs["routing_strategy"] == "simple-shuffle"
+            assert router_model.client == mock_router.return_value
+
+
 class TestOpenAIServerModel:
     def test_client_kwargs_passed_correctly(self):
         model_id = "gpt-3.5-turbo"
@@ -189,7 +321,7 @@ def test_client_kwargs_passed_correctly(self):
         client_kwargs = {"max_retries": 5}
 
         with patch("openai.OpenAI") as MockOpenAI:
-            _ = OpenAIServerModel(
+            model = OpenAIServerModel(
                 model_id=model_id,
                 api_base=api_base,
                 api_key=api_key,
@@ -197,10 +329,103 @@ def test_client_kwargs_passed_correctly(self):
                 project=project,
                 client_kwargs=client_kwargs,
             )
-            MockOpenAI.assert_called_once_with(
-                base_url=api_base, api_key=api_key, organization=organization, project=project, max_retries=5
+        MockOpenAI.assert_called_once_with(
+            base_url=api_base, api_key=api_key, organization=organization, project=project, max_retries=5
+        )
+        assert model.client == MockOpenAI.return_value
+
+
+class TestAmazonBedrockServerModel:
+    def test_client_for_bedrock(self):
+        model_id = "us.amazon.nova-pro-v1:0"
+
+        with patch("boto3.client") as MockBoto3:
+            model = AmazonBedrockServerModel(
+                model_id=model_id,
             )
 
+        assert model.client == MockBoto3.return_value
+
+
+class TestAzureOpenAIServerModel:
+    def test_client_kwargs_passed_correctly(self):
+        model_id = "gpt-3.5-turbo"
+        api_key = "test_api_key"
+        api_version = "2023-12-01-preview"
+        azure_endpoint = "https://example-resource.azure.openai.com/"
+        organization = "test_org"
+        project = "test_project"
+        client_kwargs = {"max_retries": 5}
+
+        with patch("openai.OpenAI") as MockOpenAI, patch("openai.AzureOpenAI") as MockAzureOpenAI:
+            model = AzureOpenAIServerModel(
+                model_id=model_id,
+                api_key=api_key,
+                api_version=api_version,
+                azure_endpoint=azure_endpoint,
+                organization=organization,
+                project=project,
+                client_kwargs=client_kwargs,
+            )
+        assert MockOpenAI.call_count == 0
+        MockAzureOpenAI.assert_called_once_with(
+            base_url=None,
+            api_key=api_key,
+            api_version=api_version,
+            azure_endpoint=azure_endpoint,
+            organization=organization,
+            project=project,
+            max_retries=5,
+        )
+        assert model.client == MockAzureOpenAI.return_value
+
+
+class TestTransformersModel:
+    @pytest.mark.parametrize(
+        "patching",
+        [
+            [
+                (
+                    "transformers.AutoModelForImageTextToText.from_pretrained",
+                    {"side_effect": ValueError("Unrecognized configuration class")},
+                ),
+                ("transformers.AutoModelForCausalLM.from_pretrained", {}),
+                ("transformers.AutoTokenizer.from_pretrained", {}),
+            ],
+            [
+                ("transformers.AutoModelForImageTextToText.from_pretrained", {}),
+                ("transformers.AutoProcessor.from_pretrained", {}),
+            ],
+        ],
+    )
+    def test_init(self, patching):
+        with ExitStack() as stack:
+            mocks = {target: stack.enter_context(patch(target, **kwargs)) for target, kwargs in patching}
+            model = TransformersModel(
+                model_id="test-model", device_map="cpu", torch_dtype="float16", trust_remote_code=True
+            )
+        assert model.model_id == "test-model"
+        if "transformers.AutoTokenizer.from_pretrained" in mocks:
+            assert model.model == mocks["transformers.AutoModelForCausalLM.from_pretrained"].return_value
+            assert mocks["transformers.AutoModelForCausalLM.from_pretrained"].call_args.kwargs == {
+                "device_map": "cpu",
+                "torch_dtype": "float16",
+                "trust_remote_code": True,
+            }
+            assert model.tokenizer == mocks["transformers.AutoTokenizer.from_pretrained"].return_value
+            assert mocks["transformers.AutoTokenizer.from_pretrained"].call_args.args == ("test-model",)
+            assert mocks["transformers.AutoTokenizer.from_pretrained"].call_args.kwargs == {"trust_remote_code": True}
+        elif "transformers.AutoProcessor.from_pretrained" in mocks:
+            assert model.model == mocks["transformers.AutoModelForImageTextToText.from_pretrained"].return_value
+            assert mocks["transformers.AutoModelForImageTextToText.from_pretrained"].call_args.kwargs == {
+                "device_map": "cpu",
+                "torch_dtype": "float16",
+                "trust_remote_code": True,
+            }
+            assert model.processor == mocks["transformers.AutoProcessor.from_pretrained"].return_value
+            assert mocks["transformers.AutoProcessor.from_pretrained"].call_args.args == ("test-model",)
+            assert mocks["transformers.AutoProcessor.from_pretrained"].call_args.kwargs == {"trust_remote_code": True}
+
 
 def test_get_clean_message_list_basic():
     messages = [
@@ -277,4 +502,144 @@ def test_get_clean_message_list_flatten_messages_as_text():
     result = get_clean_message_list(messages, flatten_messages_as_text=True)
     assert len(result) == 1
     assert result[0]["role"] == "user"
-    assert result[0]["content"] == "Hello!How are you?"
+    assert result[0]["content"] == "Hello!\nHow are you?"
+
+
+@pytest.mark.parametrize(
+    "model_class, model_kwargs, patching, expected_flatten_messages_as_text",
+    [
+        (AzureOpenAIServerModel, {}, ("openai.AzureOpenAI", {}), False),
+        (InferenceClientModel, {}, ("huggingface_hub.InferenceClient", {}), False),
+        (LiteLLMModel, {}, None, False),
+        (LiteLLMModel, {"model_id": "ollama"}, None, True),
+        (LiteLLMModel, {"model_id": "groq"}, None, True),
+        (LiteLLMModel, {"model_id": "cerebras"}, None, True),
+        (MLXModel, {}, ("mlx_lm.load", {"return_value": (MagicMock(), MagicMock())}), True),
+        (OpenAIServerModel, {}, ("openai.OpenAI", {}), False),
+        (OpenAIServerModel, {"flatten_messages_as_text": True}, ("openai.OpenAI", {}), True),
+        (
+            TransformersModel,
+            {},
+            [
+                (
+                    "transformers.AutoModelForImageTextToText.from_pretrained",
+                    {"side_effect": ValueError("Unrecognized configuration class")},
+                ),
+                ("transformers.AutoModelForCausalLM.from_pretrained", {}),
+                ("transformers.AutoTokenizer.from_pretrained", {}),
+            ],
+            True,
+        ),
+        (
+            TransformersModel,
+            {},
+            [
+                ("transformers.AutoModelForImageTextToText.from_pretrained", {}),
+                ("transformers.AutoProcessor.from_pretrained", {}),
+            ],
+            False,
+        ),
+    ],
+)
+def test_flatten_messages_as_text_for_all_models(
+    model_class, model_kwargs, patching, expected_flatten_messages_as_text
+):
+    with ExitStack() as stack:
+        if isinstance(patching, list):
+            for target, kwargs in patching:
+                stack.enter_context(patch(target, **kwargs))
+        elif patching:
+            target, kwargs = patching
+            stack.enter_context(patch(target, **kwargs))
+
+        model = model_class(**{"model_id": "test-model", **model_kwargs})
+    assert model.flatten_messages_as_text is expected_flatten_messages_as_text, f"{model_class.__name__} failed"
+
+
+@pytest.mark.parametrize(
+    "model_id,expected",
+    [
+        # Unsupported base models
+        ("o3", False),
+        ("o4-mini", False),
+        # Unsupported versioned models
+        ("o3-2025-04-16", False),
+        ("o4-mini-2025-04-16", False),
+        # Unsupported models with path prefixes
+        ("openai/o3", False),
+        ("openai/o4-mini", False),
+        ("openai/o3-2025-04-16", False),
+        ("openai/o4-mini-2025-04-16", False),
+        # Supported models
+        ("o3-mini", True),  # Different from o3
+        ("o3-mini-2025-01-31", True),  # Different from o3
+        ("o4", True),  # Different from o4-mini
+        ("o4-turbo", True),  # Different from o4-mini
+        ("gpt-4", True),
+        ("claude-3-5-sonnet", True),
+        ("mistral-large", True),
+        # Supported models with path prefixes
+        ("openai/gpt-4", True),
+        ("anthropic/claude-3-5-sonnet", True),
+        ("mistralai/mistral-large", True),
+        # Edge cases
+        ("", True),  # Empty string doesn't match pattern
+        ("o3x", True),  # Not exactly o3
+        ("o3_mini", True),  # Not o3-mini format
+        ("prefix-o3", True),  # o3 not at start
+    ],
+)
+def test_supports_stop_parameter(model_id, expected):
+    """Test the supports_stop_parameter function with various model IDs"""
+    assert supports_stop_parameter(model_id) == expected, f"Failed for model_id: {model_id}"
+
+
+class TestGetToolCallFromText:
+    @pytest.fixture(autouse=True)
+    def mock_uuid4(self):
+        with patch("uuid.uuid4", return_value="test-uuid"):
+            yield
+
+    def test_get_tool_call_from_text_basic(self):
+        text = '{"name": "weather_tool", "arguments": "New York"}'
+        result = get_tool_call_from_text(text, "name", "arguments")
+        assert isinstance(result, ChatMessageToolCall)
+        assert result.id == "test-uuid"
+        assert result.type == "function"
+        assert result.function.name == "weather_tool"
+        assert result.function.arguments == "New York"
+
+    def test_get_tool_call_from_text_name_key_missing(self):
+        text = '{"action": "weather_tool", "arguments": "New York"}'
+        with pytest.raises(ValueError) as exc_info:
+            get_tool_call_from_text(text, "name", "arguments")
+        error_msg = str(exc_info.value)
+        assert "Key tool_name_key='name' not found" in error_msg
+        assert "'action', 'arguments'" in error_msg
+
+    def test_get_tool_call_from_text_json_object_args(self):
+        text = '{"name": "weather_tool", "arguments": {"city": "New York"}}'
+        result = get_tool_call_from_text(text, "name", "arguments")
+        assert result.function.arguments == {"city": "New York"}
+
+    def test_get_tool_call_from_text_json_string_args(self):
+        text = '{"name": "weather_tool", "arguments": "{\\"city\\": \\"New York\\"}"}'
+        result = get_tool_call_from_text(text, "name", "arguments")
+        assert result.function.arguments == {"city": "New York"}
+
+    def test_get_tool_call_from_text_missing_args(self):
+        text = '{"name": "weather_tool"}'
+        result = get_tool_call_from_text(text, "name", "arguments")
+        assert result.function.arguments is None
+
+    def test_get_tool_call_from_text_custom_keys(self):
+        text = '{"tool": "weather_tool", "params": "New York"}'
+        result = get_tool_call_from_text(text, "tool", "params")
+        assert result.function.name == "weather_tool"
+        assert result.function.arguments == "New York"
+
+    def test_get_tool_call_from_text_numeric_args(self):
+        text = '{"name": "calculator", "arguments": 42}'
+        result = get_tool_call_from_text(text, "name", "arguments")
+        assert result.function.name == "calculator"
+        assert result.function.arguments == 42
diff --git a/tests/test_monitoring.py b/tests/test_monitoring.py
index 7483214b1..41bbc8b8e 100644
--- a/tests/test_monitoring.py
+++ b/tests/test_monitoring.py
@@ -15,8 +15,9 @@
 
 import unittest
 
+import pytest
+
 from smolagents import (
-    AgentError,
     AgentImage,
     CodeAgent,
     ToolCallingAgent,
@@ -26,16 +27,16 @@
     ChatMessage,
     ChatMessageToolCall,
     ChatMessageToolCallDefinition,
+    Model,
 )
-from smolagents.monitoring import AgentLogger, LogLevel
 
 
-class FakeLLMModel:
+class FakeLLMModel(Model):
     def __init__(self):
         self.last_input_token_count = 10
         self.last_output_token_count = 20
 
-    def __call__(self, prompt, tools_to_call_from=None, **kwargs):
+    def generate(self, prompt, tools_to_call_from=None, **kwargs):
         if tools_to_call_from is not None:
             return ChatMessage(
                 role="assistant",
@@ -84,12 +85,12 @@ def test_toolcalling_agent_metrics(self):
         self.assertEqual(agent.monitor.total_output_token_count, 20)
 
     def test_code_agent_metrics_max_steps(self):
-        class FakeLLMModelMalformedAnswer:
+        class FakeLLMModelMalformedAnswer(Model):
             def __init__(self):
                 self.last_input_token_count = 10
                 self.last_output_token_count = 20
 
-            def __call__(self, prompt, **kwargs):
+            def generate(self, prompt, **kwargs):
                 return ChatMessage(role="assistant", content="Malformed answer")
 
         agent = CodeAgent(
@@ -104,12 +105,12 @@ def __call__(self, prompt, **kwargs):
         self.assertEqual(agent.monitor.total_output_token_count, 40)
 
     def test_code_agent_metrics_generation_error(self):
-        class FakeLLMModelGenerationException:
+        class FakeLLMModelGenerationException(Model):
             def __init__(self):
                 self.last_input_token_count = 10
                 self.last_output_token_count = 20
 
-            def __call__(self, prompt, **kwargs):
+            def generate(self, prompt, **kwargs):
                 self.last_input_token_count = 10
                 self.last_output_token_count = 0
                 raise Exception("Cannot generate")
@@ -119,9 +120,10 @@ def __call__(self, prompt, **kwargs):
             model=FakeLLMModelGenerationException(),
             max_steps=1,
         )
-        agent.run("Fake task")
+        with pytest.raises(Exception):
+            agent.run("Fake task")
 
-        self.assertEqual(agent.monitor.total_input_token_count, 20)  # Should have done two monitoring callbacks
+        self.assertEqual(agent.monitor.total_input_token_count, 10)  # Should have done one monitoring callbacks
         self.assertEqual(agent.monitor.total_output_token_count, 0)
 
     def test_streaming_agent_text_output(self):
@@ -129,12 +131,16 @@ def test_streaming_agent_text_output(self):
             tools=[],
             model=FakeLLMModel(),
             max_steps=1,
+            planning_interval=2,
         )
 
         # Use stream_to_gradio to capture the output
         outputs = list(stream_to_gradio(agent, task="Test task"))
 
-        self.assertEqual(len(outputs), 7)
+        self.assertEqual(len(outputs), 11)
+        plan_message = outputs[1]
+        self.assertEqual(plan_message.role, "assistant")
+        self.assertIn("Code:", plan_message.content)
         final_message = outputs[-1]
         self.assertEqual(final_message.role, "assistant")
         self.assertIn("This is the final answer.", final_message.content)
@@ -155,7 +161,7 @@ def test_streaming_agent_image_output(self):
             )
         )
 
-        self.assertEqual(len(outputs), 5)
+        self.assertEqual(len(outputs), 6)
         final_message = outputs[-1]
         self.assertEqual(final_message.role, "assistant")
         self.assertIsInstance(final_message.content, dict)
@@ -163,21 +169,20 @@ def test_streaming_agent_image_output(self):
         self.assertEqual(final_message.content["mime_type"], "image/png")
 
     def test_streaming_with_agent_error(self):
-        logger = AgentLogger(level=LogLevel.INFO)
-
-        def dummy_model(prompt, **kwargs):
-            raise AgentError("Simulated agent error", logger)
+        class DummyModel(Model):
+            def generate(self, prompt, **kwargs):
+                return ChatMessage(role="assistant", content="Malformed call")
 
         agent = CodeAgent(
             tools=[],
-            model=dummy_model,
+            model=DummyModel(),
             max_steps=1,
         )
 
         # Use stream_to_gradio to capture the output
         outputs = list(stream_to_gradio(agent, task="Test task"))
 
-        self.assertEqual(len(outputs), 9)
+        self.assertEqual(len(outputs), 13)
         final_message = outputs[-1]
         self.assertEqual(final_message.role, "assistant")
-        self.assertIn("Simulated agent error", final_message.content)
+        self.assertIn("Malformed call", final_message.content)
diff --git a/tests/test_remote_executors.py b/tests/test_remote_executors.py
new file mode 100644
index 000000000..f7fe05ed2
--- /dev/null
+++ b/tests/test_remote_executors.py
@@ -0,0 +1,105 @@
+import io
+from textwrap import dedent
+from unittest.mock import MagicMock, patch
+
+import docker
+import PIL.Image
+import pytest
+from rich.console import Console
+
+from smolagents.monitoring import AgentLogger, LogLevel
+from smolagents.remote_executors import DockerExecutor, E2BExecutor
+from smolagents.utils import AgentError
+
+from .utils.markers import require_run_all
+
+
+class TestE2BExecutorMock:
+    def test_e2b_executor_instantiation(self):
+        logger = MagicMock()
+        with patch("e2b_code_interpreter.Sandbox") as mock_sandbox:
+            mock_sandbox.return_value.commands.run.return_value.error = None
+            mock_sandbox.return_value.run_code.return_value.error = None
+            executor = E2BExecutor(
+                additional_imports=[], logger=logger, api_key="dummy-api-key", template="dummy-template-id", timeout=60
+            )
+        assert isinstance(executor, E2BExecutor)
+        assert executor.logger == logger
+        assert executor.final_answer_pattern.pattern == r"^final_answer\((.*)\)$"
+        assert executor.sandbox == mock_sandbox.return_value
+        assert mock_sandbox.call_count == 1
+        assert mock_sandbox.call_args.kwargs == {
+            "api_key": "dummy-api-key",
+            "template": "dummy-template-id",
+            "timeout": 60,
+        }
+
+
+@pytest.fixture
+def docker_executor():
+    executor = DockerExecutor(
+        additional_imports=["pillow", "numpy"],
+        logger=AgentLogger(LogLevel.INFO, Console(force_terminal=False, file=io.StringIO())),
+    )
+    yield executor
+    executor.delete()
+
+
+@require_run_all
+class TestDockerExecutor:
+    @pytest.fixture(autouse=True)
+    def set_executor(self, docker_executor):
+        self.executor = docker_executor
+
+    def test_initialization(self):
+        """Check if DockerExecutor initializes without errors"""
+        assert self.executor.container is not None, "Container should be initialized"
+
+    def test_state_persistence(self):
+        """Test that variables and imports form one snippet persist in the next"""
+        code_action = "import numpy as np; a = 2"
+        self.executor(code_action)
+
+        code_action = "print(np.sqrt(a))"
+        result, logs, final_answer = self.executor(code_action)
+        assert "1.41421" in logs
+
+    def test_execute_output(self):
+        """Test execution that returns a string"""
+        code_action = 'final_answer("This is the final answer")'
+        result, logs, final_answer = self.executor(code_action)
+        assert result == "This is the final answer", "Result should be 'This is the final answer'"
+
+    def test_execute_multiline_output(self):
+        """Test execution that returns a string"""
+        code_action = 'result = "This is the final answer"\nfinal_answer(result)'
+        result, logs, final_answer = self.executor(code_action)
+        assert result == "This is the final answer", "Result should be 'This is the final answer'"
+
+    def test_execute_image_output(self):
+        """Test execution that returns a base64 image"""
+        code_action = dedent("""
+            import base64
+            from PIL import Image
+            from io import BytesIO
+            image = Image.new("RGB", (10, 10), (255, 0, 0))
+            final_answer(image)
+        """)
+        result, logs, final_answer = self.executor(code_action)
+        assert isinstance(result, PIL.Image.Image), "Result should be a PIL Image"
+
+    def test_syntax_error_handling(self):
+        """Test handling of syntax errors"""
+        code_action = 'print("Missing Parenthesis'  # Syntax error
+        with pytest.raises(AgentError) as exception_info:
+            self.executor(code_action)
+        assert "SyntaxError" in str(exception_info.value), "Should raise a syntax error"
+
+    def test_cleanup_on_deletion(self):
+        """Test if Docker container stops and removes on deletion"""
+        container_id = self.executor.container.id
+        self.executor.delete()  # Trigger cleanup
+
+        client = docker.from_env()
+        containers = [c.id for c in client.containers.list(all=True)]
+        assert container_id not in containers, "Container should be removed"
diff --git a/tests/test_search.py b/tests/test_search.py
index c146c6a67..7ed66636c 100644
--- a/tests/test_search.py
+++ b/tests/test_search.py
@@ -13,7 +13,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 
 from smolagents import DuckDuckGoSearchTool
 
@@ -21,8 +20,8 @@
 from .utils.markers import require_run_all
 
 
-class DuckDuckGoSearchToolTester(unittest.TestCase, ToolTesterMixin):
-    def setUp(self):
+class TestDuckDuckGoSearchTool(ToolTesterMixin):
+    def setup_method(self):
         self.tool = DuckDuckGoSearchTool()
         self.tool.setup()
 
diff --git a/tests/test_tool_validation.py b/tests/test_tool_validation.py
index f3a94ded2..a1ce170e7 100644
--- a/tests/test_tool_validation.py
+++ b/tests/test_tool_validation.py
@@ -1,8 +1,11 @@
+import ast
+from textwrap import dedent
+
 import pytest
 
 from smolagents.default_tools import DuckDuckGoSearchTool, GoogleSearchTool, SpeechToTextTool, VisitWebpageTool
-from smolagents.tool_validation import validate_tool_attributes
-from smolagents.tools import Tool
+from smolagents.tool_validation import MethodChecker, validate_tool_attributes
+from smolagents.tools import Tool, tool
 
 
 UNDEFINED_VARIABLE = "undefined_variable"
@@ -29,8 +32,32 @@ def forward(self, input: str) -> str:
         return input.upper()
 
 
-def test_validate_tool_attributes_valid():
-    assert validate_tool_attributes(ValidTool) is None
+@tool
+def valid_tool_function(input: str) -> str:
+    """A valid tool function.
+
+    Args:
+        input (str): Input string.
+    """
+    return input.upper()
+
+
+@pytest.mark.parametrize("tool_class", [ValidTool, valid_tool_function.__class__])
+def test_validate_tool_attributes_valid(tool_class):
+    assert validate_tool_attributes(tool_class) is None
+
+
+class InvalidToolName(Tool):
+    name = "invalid tool name"
+    description = "Tool with invalid name"
+    inputs = {"input": {"type": "string", "description": "input"}}
+    output_type = "string"
+
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, input: str) -> str:
+        return input
 
 
 class InvalidToolComplexAttrs(Tool):
@@ -88,6 +115,10 @@ def forward(self, input: str) -> str:
 @pytest.mark.parametrize(
     "tool_class, expected_error",
     [
+        (
+            InvalidToolName,
+            "Class attribute 'name' must be a valid Python identifier and not a reserved keyword, found 'invalid tool name'",
+        ),
         (InvalidToolComplexAttrs, "Complex attributes should be defined in __init__, not as class attributes"),
         (InvalidToolRequiredParams, "Parameters in __init__ must have default values, found required parameters"),
         (
@@ -100,3 +131,51 @@ def forward(self, input: str) -> str:
 def test_validate_tool_attributes_exceptions(tool_class, expected_error):
     with pytest.raises(ValueError, match=expected_error):
         validate_tool_attributes(tool_class)
+
+
+class MultipleAssignmentsTool(Tool):
+    name = "multiple_assignments_tool"
+    description = "Tool with multiple assignments"
+    inputs = {"input": {"type": "string", "description": "input"}}
+    output_type = "string"
+
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, input: str) -> str:
+        a, b = "1", "2"
+        return a + b
+
+
+def test_validate_tool_attributes_multiple_assignments():
+    validate_tool_attributes(MultipleAssignmentsTool)
+
+
+@tool
+def tool_function_with_multiple_assignments(input: str) -> str:
+    """A valid tool function.
+
+    Args:
+        input (str): Input string.
+    """
+    a, b = "1", "2"
+    return input.upper() + a + b
+
+
+@pytest.mark.parametrize("tool_instance", [MultipleAssignmentsTool(), tool_function_with_multiple_assignments])
+def test_tool_to_dict_validation_with_multiple_assignments(tool_instance):
+    tool_instance.to_dict()
+
+
+class TestMethodChecker:
+    def test_multiple_assignments(self):
+        source_code = dedent(
+            """
+            def forward(self) -> str:
+                a, b = "1", "2"
+                return a + b
+            """
+        )
+        method_checker = MethodChecker(set())
+        method_checker.visit(ast.parse(source_code))
+        assert method_checker.errors == []
diff --git a/tests/test_tools.py b/tests/test_tools.py
index 4ac48e07d..f82c08753 100644
--- a/tests/test_tools.py
+++ b/tests/test_tools.py
@@ -12,93 +12,76 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import inspect
 import os
-import tempfile
-import unittest
-from pathlib import Path
 from textwrap import dedent
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, Literal
 from unittest.mock import MagicMock, patch
 
 import mcp
 import numpy as np
+import PIL.Image
 import pytest
-import torch
-from transformers import is_torch_available, is_vision_available
-from transformers.testing_utils import get_tests_dir
 
-from smolagents.agent_types import _AGENT_TYPE_MAPPING, AgentAudio, AgentImage, AgentText
-from smolagents.tools import AUTHORIZED_TYPES, Tool, ToolCollection, tool
+from smolagents.agent_types import _AGENT_TYPE_MAPPING
+from smolagents.tools import AUTHORIZED_TYPES, Tool, ToolCollection, launch_gradio_demo, tool
 
-
-if is_torch_available():
-    import torch
-
-if is_vision_available():
-    from PIL import Image
-
-
-def create_inputs(tool_inputs: Dict[str, Dict[Union[str, type], str]]):
-    inputs = {}
-
-    for input_name, input_desc in tool_inputs.items():
-        input_type = input_desc["type"]
-
-        if input_type == "string":
-            inputs[input_name] = "Text input"
-        elif input_type == "image":
-            inputs[input_name] = Image.open(Path(get_tests_dir("fixtures")) / "000000039769.png").resize((512, 512))
-        elif input_type == "audio":
-            inputs[input_name] = np.ones(3000)
-        else:
-            raise ValueError(f"Invalid type requested: {input_type}")
-
-    return inputs
-
-
-def output_type(output):
-    if isinstance(output, (str, AgentText)):
-        return "string"
-    elif isinstance(output, (Image.Image, AgentImage)):
-        return "image"
-    elif isinstance(output, (torch.Tensor, AgentAudio)):
-        return "audio"
-    else:
-        raise TypeError(f"Invalid output: {output}")
+from .utils.markers import require_run_all
 
 
 class ToolTesterMixin:
     def test_inputs_output(self):
-        self.assertTrue(hasattr(self.tool, "inputs"))
-        self.assertTrue(hasattr(self.tool, "output_type"))
+        assert hasattr(self.tool, "inputs")
+        assert hasattr(self.tool, "output_type")
 
         inputs = self.tool.inputs
-        self.assertTrue(isinstance(inputs, dict))
+        assert isinstance(inputs, dict)
 
         for _, input_spec in inputs.items():
-            self.assertTrue("type" in input_spec)
-            self.assertTrue("description" in input_spec)
-            self.assertTrue(input_spec["type"] in AUTHORIZED_TYPES)
-            self.assertTrue(isinstance(input_spec["description"], str))
+            assert "type" in input_spec
+            assert "description" in input_spec
+            assert input_spec["type"] in AUTHORIZED_TYPES
+            assert isinstance(input_spec["description"], str)
 
         output_type = self.tool.output_type
-        self.assertTrue(output_type in AUTHORIZED_TYPES)
+        assert output_type in AUTHORIZED_TYPES
 
     def test_common_attributes(self):
-        self.assertTrue(hasattr(self.tool, "description"))
-        self.assertTrue(hasattr(self.tool, "name"))
-        self.assertTrue(hasattr(self.tool, "inputs"))
-        self.assertTrue(hasattr(self.tool, "output_type"))
+        assert hasattr(self.tool, "description")
+        assert hasattr(self.tool, "name")
+        assert hasattr(self.tool, "inputs")
+        assert hasattr(self.tool, "output_type")
 
-    def test_agent_type_output(self):
+    def test_agent_type_output(self, create_inputs):
         inputs = create_inputs(self.tool.inputs)
         output = self.tool(**inputs, sanitize_inputs_outputs=True)
         if self.tool.output_type != "any":
             agent_type = _AGENT_TYPE_MAPPING[self.tool.output_type]
-            self.assertTrue(isinstance(output, agent_type))
+            assert isinstance(output, agent_type)
+
+    @pytest.fixture
+    def create_inputs(self, shared_datadir):
+        def _create_inputs(tool_inputs: dict[str, dict[str | type, str]]) -> dict[str, Any]:
+            inputs = {}
+
+            for input_name, input_desc in tool_inputs.items():
+                input_type = input_desc["type"]
 
+                if input_type == "string":
+                    inputs[input_name] = "Text input"
+                elif input_type == "image":
+                    inputs[input_name] = PIL.Image.open(shared_datadir / "000000039769.png").resize((512, 512))
+                elif input_type == "audio":
+                    inputs[input_name] = np.ones(3000)
+                else:
+                    raise ValueError(f"Invalid type requested: {input_type}")
 
-class ToolTests(unittest.TestCase):
+            return inputs
+
+        return _create_inputs
+
+
+class TestTool:
     def test_tool_init_with_decorator(self):
         @tool
         def coolfunc(a: str, b: int) -> float:
@@ -163,7 +146,7 @@ def coolfunc(a: str, b: int) -> int:
             assert coolfunc.output_type == "number"
         assert "docstring has no description for the argument" in str(e)
 
-    def test_saving_tool_raises_error_imports_outside_function(self):
+    def test_saving_tool_raises_error_imports_outside_function(self, tmp_path):
         with pytest.raises(Exception) as e:
             import numpy as np
 
@@ -174,7 +157,7 @@ def get_current_time() -> str:
                 """
                 return str(np.random.random())
 
-            get_current_time.save("output")
+            get_current_time.save(tmp_path)
 
         assert "np" in str(e)
 
@@ -191,7 +174,7 @@ def forward(self):
                     return str(np.random.random())
 
             get_current_time = GetCurrentTimeTool()
-            get_current_time.save("output")
+            get_current_time.save(tmp_path)
 
         assert "np" in str(e)
 
@@ -243,7 +226,7 @@ class PassTool(Tool):
             inputs = {"string_input": {"type": "string", "description": "input description"}}
             output_type = "string"
 
-            def __init__(self, url: Optional[str] = "none"):
+            def __init__(self, url: str | None = "none"):
                 super().__init__(self)
                 self.url = url
 
@@ -253,7 +236,7 @@ def forward(self, string_input: str) -> str:
         fail_tool = PassTool()
         fail_tool.to_dict()
 
-    def test_saving_tool_allows_no_imports_from_outside_methods(self):
+    def test_saving_tool_allows_no_imports_from_outside_methods(self, tmp_path):
         # Test that using imports from outside functions fails
         import numpy as np
 
@@ -272,7 +255,7 @@ def forward(self, string_input):
 
         fail_tool = FailTool()
         with pytest.raises(Exception) as e:
-            fail_tool.save("output")
+            fail_tool.save(tmp_path)
         assert "'np' is undefined" in str(e)
 
         # Test that putting these imports inside functions works
@@ -292,7 +275,7 @@ def forward(self, string_input):
                 return self.useless_method() + string_input
 
         success_tool = SuccessTool()
-        success_tool.save("output")
+        success_tool.save(tmp_path)
 
     def test_tool_missing_class_attributes_raises_error(self):
         with pytest.raises(Exception) as e:
@@ -308,7 +291,7 @@ class GetWeatherTool(Tool):
                     },
                 }
 
-                def forward(self, location: str, celsius: Optional[bool] = False) -> str:
+                def forward(self, location: str, celsius: bool | None = False) -> str:
                     return "The weather is UNGODLY with torrential rains and temperatures below -10°C"
 
             GetWeatherTool()
@@ -316,7 +299,7 @@ def forward(self, location: str, celsius: Optional[bool] = False) -> str:
 
     def test_tool_from_decorator_optional_args(self):
         @tool
-        def get_weather(location: str, celsius: Optional[bool] = False) -> str:
+        def get_weather(location: str, celsius: bool | None = False) -> str:
             """
             Get weather in the next days at given location.
             Secretly this tool does not care about the location, it hates the weather everywhere.
@@ -346,7 +329,7 @@ class GetWeatherTool(Tool):
                 }
                 output_type = "string"
 
-                def forward(self, location: str, celsius: Optional[bool] = False) -> str:
+                def forward(self, location: str, celsius: bool | None = False) -> str:
                     return "The weather is UNGODLY with torrential rains and temperatures below -10°C"
 
             GetWeatherTool()
@@ -407,7 +390,7 @@ def get_weather(location: str, celsius: bool = False) -> str:
 
         assert get_weather.inputs["celsius"]["nullable"]
 
-    def test_tool_supports_any_none(self):
+    def test_tool_supports_any_none(self, tmp_path):
         @tool
         def get_weather(location: Any) -> None:
             """
@@ -418,14 +401,13 @@ def get_weather(location: Any) -> None:
             """
             return
 
-        with tempfile.TemporaryDirectory() as tmp_dir:
-            get_weather.save(tmp_dir)
+        get_weather.save(tmp_path)
         assert get_weather.inputs["location"]["type"] == "any"
         assert get_weather.output_type == "null"
 
     def test_tool_supports_array(self):
         @tool
-        def get_weather(locations: List[str], months: Optional[Tuple[str, str]] = None) -> Dict[str, float]:
+        def get_weather(locations: list[str], months: tuple[str, str] | None = None) -> dict[str, float]:
             """
             Get weather in the next days at given locations.
 
@@ -438,7 +420,50 @@ def get_weather(locations: List[str], months: Optional[Tuple[str, str]] = None)
         assert get_weather.inputs["locations"]["type"] == "array"
         assert get_weather.inputs["months"]["type"] == "array"
 
-    def test_saving_tool_produces_valid_pyhon_code_with_multiline_description(self):
+    def test_tool_supports_string_literal(self):
+        @tool
+        def get_weather(unit: Literal["celsius", "fahrenheit"] = "celsius") -> None:
+            """
+            Get weather in the next days at given location.
+
+            Args:
+                unit: The unit of temperature
+            """
+            return
+
+        assert get_weather.inputs["unit"]["type"] == "string"
+        assert get_weather.inputs["unit"]["enum"] == ["celsius", "fahrenheit"]
+
+    def test_tool_supports_numeric_literal(self):
+        @tool
+        def get_choice(choice: Literal[1, 2, 3]) -> None:
+            """
+            Get choice based on the provided numeric literal.
+
+            Args:
+                choice: The numeric choice to be made.
+            """
+            return
+
+        assert get_choice.inputs["choice"]["type"] == "integer"
+        assert get_choice.inputs["choice"]["enum"] == [1, 2, 3]
+
+    def test_tool_supports_nullable_literal(self):
+        @tool
+        def get_choice(choice: Literal[1, 2, 3, None]) -> None:
+            """
+            Get choice based on the provided value.
+
+            Args:
+                choice: The numeric choice to be made.
+            """
+            return
+
+        assert get_choice.inputs["choice"]["type"] == "integer"
+        assert get_choice.inputs["choice"]["nullable"] is True
+        assert get_choice.inputs["choice"]["enum"] == [1, 2, 3]
+
+    def test_saving_tool_produces_valid_pyhon_code_with_multiline_description(self, tmp_path):
         @tool
         def get_weather(location: Any) -> None:
             """
@@ -450,33 +475,81 @@ def get_weather(location: Any) -> None:
             """
             return
 
-        with tempfile.TemporaryDirectory() as tmp_dir:
-            get_weather.save(tmp_dir)
-            with open(os.path.join(tmp_dir, "tool.py"), "r", encoding="utf-8") as f:
-                source_code = f.read()
-                compile(source_code, f.name, "exec")
+        get_weather.save(tmp_path)
+        with open(os.path.join(tmp_path, "tool.py"), "r", encoding="utf-8") as f:
+            source_code = f.read()
+            compile(source_code, f.name, "exec")
+
+    @pytest.mark.parametrize("fixture_name", ["boolean_default_tool_class", "boolean_default_tool_function"])
+    def test_to_dict_boolean_default_input(self, fixture_name, request):
+        """Test that boolean input parameter with default value is correctly represented in to_dict output"""
+        tool = request.getfixturevalue(fixture_name)
+        result = tool.to_dict()
+        # Check that the boolean default annotation is preserved
+        assert "flag: bool = False" in result["code"]
+        # Check nullable attribute is set for the parameter with default value
+        assert "'nullable': True" in result["code"]
+
+    @pytest.mark.parametrize("fixture_name", ["optional_input_tool_class", "optional_input_tool_function"])
+    def test_to_dict_optional_input(self, fixture_name, request):
+        """Test that Optional/nullable input parameter is correctly represented in to_dict output"""
+        tool = request.getfixturevalue(fixture_name)
+        result = tool.to_dict()
+        # Check the Optional type annotation is preserved
+        assert "optional_text: str | None = None" in result["code"]
+        # Check that the input is marked as nullable in the code
+        assert "'nullable': True" in result["code"]
+
+    def test_from_dict_roundtrip(self, example_tool):
+        # Convert to dict
+        tool_dict = example_tool.to_dict()
+        # Create from dict
+        recreated_tool = Tool.from_dict(tool_dict)
+        # Verify properties
+        assert recreated_tool.name == example_tool.name
+        assert recreated_tool.description == example_tool.description
+        assert recreated_tool.inputs == example_tool.inputs
+        assert recreated_tool.output_type == example_tool.output_type
+        # Verify functionality
+        test_input = "Hello, world!"
+        assert recreated_tool(test_input) == test_input.upper()
+
+    def test_tool_from_dict_invalid(self):
+        # Missing code key
+        with pytest.raises(ValueError) as e:
+            Tool.from_dict({"name": "invalid_tool"})
+        assert "must contain 'code' key" in str(e)
+
+    def test_tool_decorator_preserves_original_function(self):
+        # Define a test function with type hints and docstring
+        def test_function(items: list[str]) -> str:
+            """Join a list of strings.
+            Args:
+                items: A list of strings to join
+            Returns:
+                The joined string
+            """
+            return ", ".join(items)
 
-    def test_saving_tool_produces_valid_python_code_with_complex_name(self):
-        # Test one cannot save tool with additional args in init
-        class FailTool(Tool):
-            name = 'spe"\rcific'
-            description = """test \n\r
-            description"""
-            inputs = {"string_input": {"type": "string", "description": "input description"}}
-            output_type = "string"
+        # Store original function signature, name, and source
+        original_signature = inspect.signature(test_function)
+        original_name = test_function.__name__
+        original_docstring = test_function.__doc__
 
-            def __init__(self):
-                super().__init__(self)
+        # Create a tool from the function
+        test_tool = tool(test_function)
 
-            def forward(self, string_input):
-                return "foo"
+        # Check that the original function is unchanged
+        assert original_signature == inspect.signature(test_function)
+        assert original_name == test_function.__name__
+        assert original_docstring == test_function.__doc__
 
-        fail_tool = FailTool()
-        with tempfile.TemporaryDirectory() as tmp_dir:
-            fail_tool.save(tmp_dir)
-            with open(os.path.join(tmp_dir, "tool.py"), "r", encoding="utf-8") as f:
-                source_code = f.read()
-                compile(source_code, f.name, "exec")
+        # Verify that the tool's forward method has a different signature (it has 'self')
+        tool_forward_sig = inspect.signature(test_tool.forward)
+        assert list(tool_forward_sig.parameters.keys())[0] == "self"
+
+        # Original function should not have 'self' parameter
+        assert "self" not in original_signature.parameters
 
 
 @pytest.fixture
@@ -500,12 +573,13 @@ def mock_smolagents_adapter():
 
 class TestToolCollection:
     def test_from_mcp(self, mock_server_parameters, mock_mcp_adapt, mock_smolagents_adapter):
-        with ToolCollection.from_mcp(mock_server_parameters) as tool_collection:
+        with ToolCollection.from_mcp(mock_server_parameters, trust_remote_code=True) as tool_collection:
             assert isinstance(tool_collection, ToolCollection)
             assert len(tool_collection.tools) == 2
             assert "tool1" in tool_collection.tools
             assert "tool2" in tool_collection.tools
 
+    @require_run_all
     def test_integration_from_mcp(self):
         # define the most simple mcp server with one tool that echoes the input text
         mcp_server_script = dedent("""\
@@ -525,7 +599,52 @@ def echo_tool(text: str) -> str:
             args=["-c", mcp_server_script],
         )
 
-        with ToolCollection.from_mcp(mcp_server_params) as tool_collection:
+        with ToolCollection.from_mcp(mcp_server_params, trust_remote_code=True) as tool_collection:
             assert len(tool_collection.tools) == 1, "Expected 1 tool"
             assert tool_collection.tools[0].name == "echo_tool", "Expected tool name to be 'echo_tool'"
             assert tool_collection.tools[0](text="Hello") == "Hello", "Expected tool to echo the input text"
+
+    def test_integration_from_mcp_with_sse(self):
+        import subprocess
+        import time
+
+        # define the most simple mcp server with one tool that echoes the input text
+        mcp_server_script = dedent("""\
+            from mcp.server.fastmcp import FastMCP
+
+            mcp = FastMCP("Echo Server", host="127.0.0.1", port=8000)
+
+            @mcp.tool()
+            def echo_tool(text: str) -> str:
+                return text
+
+            mcp.run("sse")
+        """).strip()
+
+        # start the SSE mcp server in a subprocess
+        server_process = subprocess.Popen(
+            ["python", "-c", mcp_server_script],
+        )
+
+        # wait for the server to start
+        time.sleep(1)
+
+        try:
+            with ToolCollection.from_mcp(
+                {"url": "http://127.0.0.1:8000/sse"}, trust_remote_code=True
+            ) as tool_collection:
+                assert len(tool_collection.tools) == 1, "Expected 1 tool"
+                assert tool_collection.tools[0].name == "echo_tool", "Expected tool name to be 'echo_tool'"
+                assert tool_collection.tools[0](text="Hello") == "Hello", "Expected tool to echo the input text"
+        finally:
+            # clean up the process when test is done
+            server_process.kill()
+            server_process.wait()
+
+
+@pytest.mark.parametrize("tool_fixture_name", ["boolean_default_tool_class"])
+def test_launch_gradio_demo_does_not_raise(tool_fixture_name, request):
+    tool = request.getfixturevalue(tool_fixture_name)
+    with patch("gradio.Interface.launch") as mock_launch:
+        launch_gradio_demo(tool)
+    assert mock_launch.call_count == 1
diff --git a/tests/test_types.py b/tests/test_types.py
index 73465d0ed..e3050c9dd 100644
--- a/tests/test_types.py
+++ b/tests/test_types.py
@@ -16,17 +16,16 @@
 import tempfile
 import unittest
 import uuid
-from pathlib import Path
 
-from PIL import Image
+import PIL.Image
 from transformers.testing_utils import (
     require_soundfile,
-    require_torch,
-    require_vision,
 )
 
 from smolagents.agent_types import AgentAudio, AgentImage, AgentText
 
+from .utils.markers import require_torch
+
 
 def get_new_path(suffix="") -> str:
     directory = tempfile.mkdtemp()
@@ -70,9 +69,8 @@ def test_from_string(self):
         self.assertEqual(agent_type.to_string(), path)
 
 
-@require_vision
 @require_torch
-class AgentImageTests(unittest.TestCase):
+class TestAgentImage:
     def test_from_tensor(self):
         import torch
 
@@ -81,37 +79,37 @@ def test_from_tensor(self):
         path = str(agent_type.to_string())
 
         # Ensure that the tensor and the agent_type's tensor are the same
-        self.assertTrue(torch.allclose(tensor, agent_type._tensor, atol=1e-4))
+        assert torch.allclose(tensor, agent_type._tensor, atol=1e-4)
 
-        self.assertIsInstance(agent_type.to_raw(), Image.Image)
+        assert isinstance(agent_type.to_raw(), PIL.Image.Image)
 
         # Ensure the path remains even after the object deletion
         del agent_type
-        self.assertTrue(os.path.exists(path))
+        assert os.path.exists(path)
 
-    def test_from_string(self):
-        path = Path("tests/fixtures/000000039769.png")
-        image = Image.open(path)
+    def test_from_string(self, shared_datadir):
+        path = shared_datadir / "000000039769.png"
+        image = PIL.Image.open(path)
         agent_type = AgentImage(path)
 
-        self.assertTrue(path.samefile(agent_type.to_string()))
-        self.assertTrue(image == agent_type.to_raw())
+        assert path.samefile(agent_type.to_string())
+        assert image == agent_type.to_raw()
 
         # Ensure the path remains even after the object deletion
         del agent_type
-        self.assertTrue(os.path.exists(path))
+        assert os.path.exists(path)
 
-    def test_from_image(self):
-        path = Path("tests/fixtures/000000039769.png")
-        image = Image.open(path)
+    def test_from_image(self, shared_datadir):
+        path = shared_datadir / "000000039769.png"
+        image = PIL.Image.open(path)
         agent_type = AgentImage(image)
 
-        self.assertFalse(path.samefile(agent_type.to_string()))
-        self.assertTrue(image == agent_type.to_raw())
+        assert not path.samefile(agent_type.to_string())
+        assert image == agent_type.to_raw()
 
         # Ensure the path remains even after the object deletion
         del agent_type
-        self.assertTrue(os.path.exists(path))
+        assert os.path.exists(path)
 
 
 class AgentTextTests(unittest.TestCase):
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 16ba39141..d4aa11970 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -14,8 +14,6 @@
 # limitations under the License.
 import inspect
 import os
-import pathlib
-import tempfile
 import textwrap
 import unittest
 
@@ -24,7 +22,75 @@
 
 from smolagents import Tool
 from smolagents.tools import tool
-from smolagents.utils import get_source, parse_code_blobs
+from smolagents.utils import get_source, instance_to_source, is_valid_name, parse_code_blobs, parse_json_blob
+
+
+class ValidTool(Tool):
+    name = "valid_tool"
+    description = "A valid tool"
+    inputs = {"input": {"type": "string", "description": "input"}}
+    output_type = "string"
+    simple_attr = "string"
+    dict_attr = {"key": "value"}
+
+    def __init__(self, optional_param="default"):
+        super().__init__()
+        self.param = optional_param
+
+    def forward(self, input: str) -> str:
+        return input.upper()
+
+
+@tool
+def valid_tool_function(input: str) -> str:
+    """A valid tool function.
+
+    Args:
+        input (str): Input string.
+    """
+    return input.upper()
+
+
+VALID_TOOL_SOURCE = """\
+from smolagents.tools import Tool
+
+class ValidTool(Tool):
+    name = "valid_tool"
+    description = "A valid tool"
+    inputs = {'input': {'type': 'string', 'description': 'input'}}
+    output_type = "string"
+    simple_attr = "string"
+    dict_attr = {'key': 'value'}
+
+    def __init__(self, optional_param="default"):
+        super().__init__()
+        self.param = optional_param
+
+    def forward(self, input: str) -> str:
+        return input.upper()
+"""
+
+VALID_TOOL_FUNCTION_SOURCE = '''\
+from smolagents.tools import Tool
+
+class SimpleTool(Tool):
+    name = "valid_tool_function"
+    description = "A valid tool function."
+    inputs = {'input': {'type': 'string', 'description': 'Input string.'}}
+    output_type = "string"
+
+    def __init__(self):
+        self.is_initialized = True
+
+    @tool
+    def valid_tool_function(input: str) -> str:
+        """A valid tool function.
+
+        Args:
+            input (str): Input string.
+        """
+        return input.upper()
+'''
 
 
 class AgentTextTests(unittest.TestCase):
@@ -47,25 +113,14 @@ def test_parse_code_blobs(self):
         output = parse_code_blobs(code_blob)
         assert output == code_blob
 
-    def test_multiple_code_blobs(self):
-        test_input = """Here's a function that adds numbers:
-```python
-def add(a, b):
-    return a + b
-```
-And here's a function that multiplies them:
-```py
-def multiply(a, b):
-    return a * b
-```"""
-
-        expected_output = """def add(a, b):
-    return a + b
+        # Allow whitespaces after header
+        output = parse_code_blobs("```py    \ncode_a\n````")
+        assert output == "code_a"
 
-def multiply(a, b):
-    return a * b"""
+    def test_multiple_code_blobs(self):
+        test_input = "```\nFoo\n```\n\n```py\ncode_a\n````\n\n```python\ncode_b\n```"
         result = parse_code_blobs(test_input)
-        assert result == expected_output
+        assert result == "Foo\n\ncode_a\n\ncode_b"
 
 
 @pytest.fixture(scope="function")
@@ -127,7 +182,15 @@ def test_get_source_ipython_errors_type_error():
         get_source(None)
 
 
-def test_e2e_class_tool_save():
+@pytest.mark.parametrize(
+    "tool, expected_tool_source", [(ValidTool(), VALID_TOOL_SOURCE), (valid_tool_function, VALID_TOOL_FUNCTION_SOURCE)]
+)
+def test_instance_to_source(tool, expected_tool_source):
+    tool_source = instance_to_source(tool, base_cls=Tool)
+    assert tool_source == expected_tool_source
+
+
+def test_e2e_class_tool_save(tmp_path):
     class TestTool(Tool):
         name = "test_tool"
         description = "Test tool description"
@@ -145,48 +208,46 @@ def forward(self, task: str):
             return task
 
     test_tool = TestTool()
-    with tempfile.TemporaryDirectory() as tmp_dir:
-        test_tool.save(tmp_dir, make_gradio_app=True)
-        assert set(os.listdir(tmp_dir)) == {"requirements.txt", "app.py", "tool.py"}
-        assert (
-            pathlib.Path(tmp_dir, "tool.py").read_text()
-            == """from typing import Any, Optional
-from smolagents.tools import Tool
-import IPython
-
-class TestTool(Tool):
-    name = "test_tool"
-    description = "Test tool description"
-    inputs = {'task': {'type': 'string', 'description': 'tool input'}}
-    output_type = "string"
-
-    def forward(self, task: str):
-        import IPython  # noqa: F401
+    test_tool.save(tmp_path, make_gradio_app=True)
+    assert set(os.listdir(tmp_path)) == {"requirements.txt", "app.py", "tool.py"}
+    assert (tmp_path / "tool.py").read_text() == textwrap.dedent(
+        """\
+        from typing import Any, Optional
+        from smolagents.tools import Tool
+        import IPython
 
-        return task
+        class TestTool(Tool):
+            name = "test_tool"
+            description = "Test tool description"
+            inputs = {'task': {'type': 'string', 'description': 'tool input'}}
+            output_type = "string"
 
-    def __init__(self, *args, **kwargs):
-        self.is_initialized = False
-"""
-        )
-        requirements = set(pathlib.Path(tmp_dir, "requirements.txt").read_text().split())
-        assert requirements == {"IPython", "smolagents"}
-        assert (
-            pathlib.Path(tmp_dir, "app.py").read_text()
-            == """from smolagents import launch_gradio_demo
-from tool import TestTool
+            def forward(self, task: str):
+                import IPython  # noqa: F401
 
-tool = TestTool()
+                return task
 
-launch_gradio_demo(tool)
-"""
-        )
+            def __init__(self, *args, **kwargs):
+                self.is_initialized = False
+        """
+    )
+    requirements = set((tmp_path / "requirements.txt").read_text().split())
+    assert requirements == {"IPython", "smolagents"}
+    assert (tmp_path / "app.py").read_text() == textwrap.dedent(
+        """\
+        from smolagents import launch_gradio_demo
+        from tool import TestTool
+
+        tool = TestTool()
+        launch_gradio_demo(tool)
+        """
+    )
 
 
-def test_e2e_ipython_class_tool_save():
+def test_e2e_ipython_class_tool_save(tmp_path):
     shell = InteractiveShell.instance()
-    with tempfile.TemporaryDirectory() as tmp_dir:
-        code_blob = textwrap.dedent(f"""
+    code_blob = textwrap.dedent(
+        f"""\
         from smolagents.tools import Tool
         class TestTool(Tool):
             name = "test_tool"
@@ -201,46 +262,46 @@ def forward(self, task: str):
                 import IPython  # noqa: F401
 
                 return task
-        TestTool().save("{tmp_dir}", make_gradio_app=True)
-    """)
-        assert shell.run_cell(code_blob, store_history=True).success
-        assert set(os.listdir(tmp_dir)) == {"requirements.txt", "app.py", "tool.py"}
-        assert (
-            pathlib.Path(tmp_dir, "tool.py").read_text()
-            == """from typing import Any, Optional
-from smolagents.tools import Tool
-import IPython
-
-class TestTool(Tool):
-    name = "test_tool"
-    description = "Test tool description"
-    inputs = {'task': {'type': 'string', 'description': 'tool input'}}
-    output_type = "string"
-
-    def forward(self, task: str):
-        import IPython  # noqa: F401
+        TestTool().save("{tmp_path}", make_gradio_app=True)
+        """
+    )
+    assert shell.run_cell(code_blob, store_history=True).success
+    assert set(os.listdir(tmp_path)) == {"requirements.txt", "app.py", "tool.py"}
+    assert (tmp_path / "tool.py").read_text() == textwrap.dedent(
+        """\
+        from typing import Any, Optional
+        from smolagents.tools import Tool
+        import IPython
 
-        return task
+        class TestTool(Tool):
+            name = "test_tool"
+            description = "Test tool description"
+            inputs = {'task': {'type': 'string', 'description': 'tool input'}}
+            output_type = "string"
 
-    def __init__(self, *args, **kwargs):
-        self.is_initialized = False
-"""
-        )
-        requirements = set(pathlib.Path(tmp_dir, "requirements.txt").read_text().split())
-        assert requirements == {"IPython", "smolagents"}
-        assert (
-            pathlib.Path(tmp_dir, "app.py").read_text()
-            == """from smolagents import launch_gradio_demo
-from tool import TestTool
+            def forward(self, task: str):
+                import IPython  # noqa: F401
 
-tool = TestTool()
+                return task
 
-launch_gradio_demo(tool)
-"""
-        )
+            def __init__(self, *args, **kwargs):
+                self.is_initialized = False
+        """
+    )
+    requirements = set((tmp_path / "requirements.txt").read_text().split())
+    assert requirements == {"IPython", "smolagents"}
+    assert (tmp_path / "app.py").read_text() == textwrap.dedent(
+        """\
+        from smolagents import launch_gradio_demo
+        from tool import TestTool
+
+        tool = TestTool()
+        launch_gradio_demo(tool)
+        """
+    )
 
 
-def test_e2e_function_tool_save():
+def test_e2e_function_tool_save(tmp_path):
     @tool
     def test_tool(task: str) -> str:
         """
@@ -253,49 +314,47 @@ def test_tool(task: str) -> str:
 
         return task
 
-    with tempfile.TemporaryDirectory() as tmp_dir:
-        test_tool.save(tmp_dir, make_gradio_app=True)
-        assert set(os.listdir(tmp_dir)) == {"requirements.txt", "app.py", "tool.py"}
-        assert (
-            pathlib.Path(tmp_dir, "tool.py").read_text()
-            == """from smolagents import Tool
-from typing import Any, Optional
+    test_tool.save(tmp_path, make_gradio_app=True)
+    assert set(os.listdir(tmp_path)) == {"requirements.txt", "app.py", "tool.py"}
+    assert (tmp_path / "tool.py").read_text() == textwrap.dedent(
+        """\
+        from smolagents import Tool
+        from typing import Any, Optional
 
-class SimpleTool(Tool):
-    name = "test_tool"
-    description = "Test tool description"
-    inputs = {"task":{"type":"string","description":"tool input"}}
-    output_type = "string"
-
-    def forward(self, task: str) -> str:
-        \"""
-        Test tool description
-
-        Args:
-            task: tool input
-        \"""
-        import IPython  # noqa: F401
+        class SimpleTool(Tool):
+            name = "test_tool"
+            description = "Test tool description"
+            inputs = {'task': {'type': 'string', 'description': 'tool input'}}
+            output_type = "string"
 
-        return task"""
-        )
-        requirements = set(pathlib.Path(tmp_dir, "requirements.txt").read_text().split())
-        assert requirements == {"smolagents"}  # FIXME: IPython should be in the requirements
-        assert (
-            pathlib.Path(tmp_dir, "app.py").read_text()
-            == """from smolagents import launch_gradio_demo
-from tool import SimpleTool
+            def forward(self, task: str) -> str:
+                \"""
+                Test tool description
 
-tool = SimpleTool()
+                Args:
+                    task: tool input
+                \"""
+                import IPython  # noqa: F401
 
-launch_gradio_demo(tool)
-"""
-        )
+                return task"""
+    )
+    requirements = set((tmp_path / "requirements.txt").read_text().split())
+    assert requirements == {"smolagents"}  # FIXME: IPython should be in the requirements
+    assert (tmp_path / "app.py").read_text() == textwrap.dedent(
+        """\
+        from smolagents import launch_gradio_demo
+        from tool import SimpleTool
+
+        tool = SimpleTool()
+        launch_gradio_demo(tool)
+        """
+    )
 
 
-def test_e2e_ipython_function_tool_save():
+def test_e2e_ipython_function_tool_save(tmp_path):
     shell = InteractiveShell.instance()
-    with tempfile.TemporaryDirectory() as tmp_dir:
-        code_blob = textwrap.dedent(f"""
+    code_blob = textwrap.dedent(
+        f"""
         from smolagents import tool
 
         @tool
@@ -310,41 +369,129 @@ def test_tool(task: str) -> str:
 
             return task
 
-        test_tool.save("{tmp_dir}", make_gradio_app=True)
-        """)
-        assert shell.run_cell(code_blob, store_history=True).success
-        assert set(os.listdir(tmp_dir)) == {"requirements.txt", "app.py", "tool.py"}
-        assert (
-            pathlib.Path(tmp_dir, "tool.py").read_text()
-            == """from smolagents import Tool
-from typing import Any, Optional
+        test_tool.save("{tmp_path}", make_gradio_app=True)
+        """
+    )
+    assert shell.run_cell(code_blob, store_history=True).success
+    assert set(os.listdir(tmp_path)) == {"requirements.txt", "app.py", "tool.py"}
+    assert (tmp_path / "tool.py").read_text() == textwrap.dedent(
+        """\
+        from smolagents import Tool
+        from typing import Any, Optional
+
+        class SimpleTool(Tool):
+            name = "test_tool"
+            description = "Test tool description"
+            inputs = {'task': {'type': 'string', 'description': 'tool input'}}
+            output_type = "string"
 
-class SimpleTool(Tool):
-    name = "test_tool"
-    description = "Test tool description"
-    inputs = {"task":{"type":"string","description":"tool input"}}
-    output_type = "string"
+            def forward(self, task: str) -> str:
+                \"""
+                Test tool description
 
-    def forward(self, task: str) -> str:
-        \"""
-        Test tool description
+                Args:
+                    task: tool input
+                \"""
+                import IPython  # noqa: F401
 
-        Args:
-            task: tool input
-        \"""
-        import IPython  # noqa: F401
+                return task"""
+    )
+    requirements = set((tmp_path / "requirements.txt").read_text().split())
+    assert requirements == {"smolagents"}  # FIXME: IPython should be in the requirements
+    assert (tmp_path / "app.py").read_text() == textwrap.dedent(
+        """\
+        from smolagents import launch_gradio_demo
+        from tool import SimpleTool
+
+        tool = SimpleTool()
+        launch_gradio_demo(tool)
+        """
+    )
+
+
+@pytest.mark.parametrize(
+    "raw_json, expected_data, expected_blob",
+    [
+        (
+            """{}""",
+            {},
+            "",
+        ),
+        (
+            """Text{}""",
+            {},
+            "Text",
+        ),
+        (
+            """{"simple": "json"}""",
+            {"simple": "json"},
+            "",
+        ),
+        (
+            """With text here{"simple": "json"}""",
+            {"simple": "json"},
+            "With text here",
+        ),
+        (
+            """{"simple": "json"}With text after""",
+            {"simple": "json"},
+            "",
+        ),
+        (
+            """With text before{"simple": "json"}And text after""",
+            {"simple": "json"},
+            "With text before",
+        ),
+    ],
+)
+def test_parse_json_blob_with_valid_json(raw_json, expected_data, expected_blob):
+    data, blob = parse_json_blob(raw_json)
 
-        return task"""
-        )
-        requirements = set(pathlib.Path(tmp_dir, "requirements.txt").read_text().split())
-        assert requirements == {"smolagents"}  # FIXME: IPython should be in the requirements
-        assert (
-            pathlib.Path(tmp_dir, "app.py").read_text()
-            == """from smolagents import launch_gradio_demo
-from tool import SimpleTool
+    assert data == expected_data
+    assert blob == expected_blob
 
-tool = SimpleTool()
 
-launch_gradio_demo(tool)
-"""
-        )
+@pytest.mark.parametrize(
+    "raw_json",
+    [
+        """simple": "json"}""",
+        """With text here"simple": "json"}""",
+        """{"simple": ""json"}With text after""",
+        """{"simple": "json"With text after""",
+        "}}",
+    ],
+)
+def test_parse_json_blob_with_invalid_json(raw_json):
+    with pytest.raises(Exception):
+        parse_json_blob(raw_json)
+
+
+@pytest.mark.parametrize(
+    "name,expected",
+    [
+        # Valid identifiers
+        ("valid_name", True),
+        ("ValidName", True),
+        ("valid123", True),
+        ("_private", True),
+        # Invalid identifiers
+        ("", False),
+        ("123invalid", False),
+        ("invalid-name", False),
+        ("invalid name", False),
+        ("invalid.name", False),
+        # Python keywords
+        ("if", False),
+        ("for", False),
+        ("class", False),
+        ("return", False),
+        # Non-string inputs
+        (123, False),
+        (None, False),
+        ([], False),
+        ({}, False),
+    ],
+)
+def test_is_valid_name(name, expected):
+    """Test the is_valid_name function with various inputs."""
+    assert is_valid_name(name) is expected
diff --git a/tests/utils/markers.py b/tests/utils/markers.py
index 8901f5f25..5240f9880 100644
--- a/tests/utils/markers.py
+++ b/tests/utils/markers.py
@@ -15,8 +15,10 @@
 """Markers for tests ."""
 
 import os
+from importlib.util import find_spec
 
 import pytest
 
 
 require_run_all = pytest.mark.skipif(not os.getenv("RUN_ALL"), reason="requires RUN_ALL environment variable")
+require_torch = pytest.mark.skipif(find_spec("torch") is None, reason="requires torch")
diff --git a/utils/check_tests_in_ci.py b/utils/check_tests_in_ci.py
deleted file mode 100644
index b320e23e7..000000000
--- a/utils/check_tests_in_ci.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# coding=utf-8
-# Copyright 2025-present, the HuggingFace Inc. team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Check that all tests are called in CI."""
-
-from pathlib import Path
-
-
-ROOT = Path(__file__).parent.parent
-
-TESTS_FOLDER = ROOT / "tests"
-CI_WORKFLOW_FILE = ROOT / ".github" / "workflows" / "tests.yml"
-
-
-def check_tests_in_ci():
-    """List all test files in `./tests/` and check if they are listed in the CI workflow.
-
-    Since each test file is triggered separately in the CI workflow, it is easy to forget a new one when adding new
-    tests, hence this check.
-
-    NOTE: current implementation is quite naive but should work for now. Must be updated if one want to ignore some
-          tests or if file naming is updated (currently only files starting by `test_*` are checked)
-    """
-    test_files = [
-        path.relative_to(TESTS_FOLDER).as_posix()
-        for path in TESTS_FOLDER.glob("**/*.py")
-        if path.name.startswith("test_")
-    ]
-    ci_workflow_file_content = CI_WORKFLOW_FILE.read_text()
-    missing_test_files = [test_file for test_file in test_files if test_file not in ci_workflow_file_content]
-    if missing_test_files:
-        print(
-            "❌ Some test files seem to be ignored in the CI:\n"
-            + "\n".join(f"   - {test_file}" for test_file in missing_test_files)
-            + f"\n   Please add them manually in {CI_WORKFLOW_FILE}."
-        )
-        exit(1)
-    else:
-        print("✅ All good!")
-        exit(0)
-
-
-if __name__ == "__main__":
-    check_tests_in_ci()