joaopauloschuler · joaopauloschuler · Apr 27, 2025 · Feb 24, 2025 · Feb 24, 2025 · Feb 24, 2025
diff --git a/.github/workflows/quality.yml b/.github/workflows/quality.yml
@@ -2,6 +2,9 @@ name: Quality Check
 
 on: [pull_request]
 
+env:
+  UV_SYSTEM_PYTHON: 1
+
 jobs:
   check_code_quality:
     runs-on: ubuntu-latest
@@ -16,15 +19,13 @@ jobs:
           python-version: "3.12"
 
       # Setup venv
-      - name: Setup venv + uv
+      - name: Setup uv
         run: |
           pip install --upgrade uv
-          uv venv
 
       - name: Install dependencies
         run: uv pip install "smolagents[quality] @ ."
 
       # Equivalent of "make quality" but step by step
-      - run: uv run ruff check examples src tests utils # linter
-      - run: uv run ruff format --check examples src tests utils # formatter
-      - run: uv run python utils/check_tests_in_ci.py
+      - run: ruff check examples src tests  # linter
+      - run: ruff format --check examples src tests  # formatter
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -1,6 +1,13 @@
 name: Python tests
 
-on: [pull_request]
+on:
+  pull_request:
+  push:
+    branches:
+      - ci-*
+
+env:
+  UV_SYSTEM_PYTHON: 1
 
 jobs:
   build-ubuntu:
@@ -21,99 +28,16 @@ jobs:
           python-version: ${{ matrix.python-version }}
 
       # Setup venv
-      - name: Setup venv + uv
+      - name: Setup uv
         run: |
           pip install --upgrade uv
-          uv venv
 
       # Install dependencies
       - name: Install dependencies
         run: |
           uv pip install "smolagents[test] @ ."
 
-      # Run all tests separately for individual feedback
-      # Use 'if success() || failure()' so that all tests are run even if one failed
-      # See https://stackoverflow.com/a/62112985
-      - name: Import tests
-        run: |
-          uv run pytest ./tests/test_import.py
-        if: ${{ success() || failure() }}
-
-      - name: Agent tests
-        run: |
-          uv run pytest ./tests/test_agents.py
-        if: ${{ success() || failure() }}
-
-      - name: Default tools tests
-        run: |
-          uv run pytest ./tests/test_default_tools.py
-        if: ${{ success() || failure() }}
-
-      # - name: Docs tests # Disabled for now (slow test + requires API keys)
-      #   run: |
-      #     uv run pytest ./tests/test_all_docs.py
-
-      - name: Final answer tests
-        run: |
-          uv run pytest ./tests/test_final_answer.py
-        if: ${{ success() || failure() }}
-
-      - name: Models tests
-        run: |
-          uv run pytest ./tests/test_models.py
-        if: ${{ success() || failure() }}
-
-      - name: Memory tests
-        run: |
-          uv run pytest ./tests/test_memory.py
-        if: ${{ success() || failure() }}
-
-      - name: Monitoring tests
-        run: |
-          uv run pytest ./tests/test_monitoring.py
-        if: ${{ success() || failure() }}
-
-      - name: Local Python executor tests
-        run: |
-          uv run pytest ./tests/test_local_python_executor.py
-        if: ${{ success() || failure() }}
-
-      - name: E2B executor tests
-        run: |
-          uv run pytest ./tests/test_e2b_executor.py
-        if: ${{ success() || failure() }}
-
-      - name: Search tests
-        run: |
-          uv run pytest ./tests/test_search.py
-        if: ${{ success() || failure() }}
-
-      - name: Tools tests
-        run: |
-          uv run pytest ./tests/test_tools.py
-        if: ${{ success() || failure() }}
-
-      - name: Tool validation tests
-        run: |
-          uv run pytest ./tests/test_tool_validation.py
-        if: ${{ success() || failure() }}
-
-      - name: Types tests
-        run: |
-          uv run pytest ./tests/test_types.py
-        if: ${{ success() || failure() }}
-
-      - name: Utils tests
-        run: |
-          uv run pytest ./tests/test_utils.py
-        if: ${{ success() || failure() }}
-
-      - name: Gradio UI tests
-        run: |
-          uv run pytest ./tests/test_gradio_ui.py
-        if: ${{ success() || failure() }}
-
-      - name: Function type hints utils tests
+      # Run tests
+      - name: Test with pytest
         run: |
-          uv run pytest ./tests/test_function_type_hints_utils.py
-        if: ${{ success() || failure() }}
+          pytest ./tests/
diff --git a/.gitignore b/.gitignore
@@ -150,3 +150,6 @@ archive/
 savedir/
 output/
 tool_output/
+
+# Gradio runtime
+.gradio/
diff --git a/Dockerfile b/Dockerfile
diff --git a/Makefile b/Makefile
@@ -1,12 +1,11 @@
-.PHONY: quality style test docs utils
+.PHONY: quality style test docs
 
 check_dirs := examples src tests utils
 
 # Check code quality of the source code
 quality:
 	ruff check $(check_dirs)
 	ruff format --check $(check_dirs)
-	python utils/check_tests_in_ci.py
 
 # Format source code automatically
 style:

diff --git a/README.md b/README.md
@@ -26,23 +26,23 @@ limitations under the License.
 <h3 align="center">
   <div style="display:flex;flex-direction:row;">
     <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/smolagents.png" alt="Hugging Face mascot as James Bond" width=400px>
-    <p>A smol library to build great agents!</p>
+    <p>Agents that think in code!</p>
   </div>
 </h3>
 
 `smolagents` is a library that enables you to run powerful agents in a few lines of code. It offers:
 
 ✨ **Simplicity**: the logic for agents fits in ~1,000 lines of code (see [agents.py](https://github.com/huggingface/smolagents/blob/main/src/smolagents/agents.py)). We kept abstractions to their minimal shape above raw code!
 
-🧑‍💻 **First-class support for Code Agents**. Our [`CodeAgent`](https://huggingface.co/docs/smolagents/reference/agents#smolagents.CodeAgent) writes its actions in code (as opposed to "agents being used to write code"). To make it secure, we support executing in sandboxed environments via [E2B](https://e2b.dev/).
+🧑‍💻 **First-class support for Code Agents**. Our [`CodeAgent`](https://huggingface.co/docs/smolagents/reference/agents#smolagents.CodeAgent) writes its actions in code (as opposed to "agents being used to write code"). To make it secure, we support executing in sandboxed environments via [E2B](https://e2b.dev/) or via Docker.
 
-🤗 **Hub integrations**: you can [share/pull tools to/from the Hub](https://huggingface.co/docs/smolagents/reference/tools#smolagents.Tool.from_hub), and more is to come!
+🤗 **Hub integrations**: you can [share/pull tools or agents to/from the Hub](https://huggingface.co/docs/smolagents/reference/tools#smolagents.Tool.from_hub) for instant sharing of the most efficient agents!
 
 🌐 **Model-agnostic**: smolagents supports any LLM. It can be a local `transformers` or `ollama` model, one of [many providers on the Hub](https://huggingface.co/blog/inference-providers), or any model from OpenAI, Anthropic and many others via our [LiteLLM](https://www.litellm.ai/) integration.
 
 👁️ **Modality-agnostic**: Agents support text, vision, video, even audio inputs! Cf [this tutorial](https://huggingface.co/docs/smolagents/examples/web_browser) for vision.
 
-🛠️ **Tool-agnostic**: you can use tools from [LangChain](https://huggingface.co/docs/smolagents/reference/tools#smolagents.Tool.from_langchain), [Anthropic's MCP](https://huggingface.co/docs/smolagents/reference/tools#smolagents.ToolCollection.from_mcp), you can even use a [Hub Space](https://huggingface.co/docs/smolagents/reference/tools#smolagents.Tool.from_space) as a tool.
+🛠️ **Tool-agnostic**: you can use tools from [LangChain](https://huggingface.co/docs/smolagents/reference/tools#smolagents.Tool.from_langchain), [MCP](https://huggingface.co/docs/smolagents/reference/tools#smolagents.ToolCollection.from_mcp), you can even use a [Hub Space](https://huggingface.co/docs/smolagents/reference/tools#smolagents.Tool.from_space) as a tool.
 
 Full documentation can be found [here](https://huggingface.co/docs/smolagents/index).
 
@@ -57,17 +57,17 @@ pip install smolagents
 ```
 Then define your agent, give it the tools it needs and run it!
 ```py
-from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel
+from smolagents import CodeAgent, DuckDuckGoSearchTool, InferenceClientModel
 
-model = HfApiModel()
+model = InferenceClientModel()
 agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model)
 
 agent.run("How many seconds would it take for a leopard at full speed to run through Pont des Arts?")
 ```
 
 https://github.com/user-attachments/assets/cd0226e2-7479-4102-aea0-57c22ca47884
 
-You can even share your agent to hub:
+You can even share your agent to the Hub, as a Space repository:
 ```py
 agent.push_to_hub("m-ric/my_agent")
 
@@ -77,12 +77,12 @@ agent.push_to_hub("m-ric/my_agent")
 Our library is LLM-agnostic: you could switch the example above to any inference provider.
 
 <details>
-<summary> <b>HfApiModel, gateway for 4 inference providers</b></summary>
+<summary> <b>InferenceClientModel, gateway for all <a href="https://huggingface.co/docs/inference-providers/index">inference providers</a> supported on HF</b></summary>
 
 ```py
-from smolagents import HfApiModel
+from smolagents import InferenceClientModel
 
-model = HfApiModel(
+model = InferenceClientModel(
     model_id="deepseek-ai/DeepSeek-R1",
     provider="together",
 )
@@ -95,7 +95,7 @@ model = HfApiModel(
 from smolagents import LiteLLMModel
 
 model = LiteLLMModel(
-    "anthropic/claude-3-5-sonnet-latest",
+    model_id="anthropic/claude-3-5-sonnet-latest",
     temperature=0.2,
     api_key=os.environ["ANTHROPIC_API_KEY"]
 )
@@ -143,6 +143,18 @@ model = AzureOpenAIServerModel(
 )
 ```
 </details>
+<details>
+<summary> <b>Amazon Bedrock models</b></summary>
+
+```py
+import os
+from smolagents import AmazonBedrockServerModel
+
+model = AmazonBedrockServerModel(
+    model_id = os.environ.get("AMAZON_BEDROCK_MODEL_ID") 
+)
+```
+</details>
 
 ## CLI
 
@@ -151,7 +163,7 @@ You can run agents from CLI using two commands: `smolagent` and `webagent`.
 `smolagent` is a generalist command to run a multi-step `CodeAgent` that can be equipped with various tools.
 
 ```bash
-smolagent "Plan a trip to Tokyo, Kyoto and Osaka between Mar 28 and Apr 7."  --model-type "HfApiModel" --model-id "Qwen/Qwen2.5-Coder-32B-Instruct" --imports "pandas numpy" --tools "web_search"
+smolagent "Plan a trip to Tokyo, Kyoto and Osaka between Mar 28 and Apr 7."  --model-type "InferenceClientModel" --model-id "Qwen/Qwen2.5-Coder-32B-Instruct" --imports "pandas numpy" --tools "web_search"
 ```
 
 Meanwhile `webagent` is a specific web-browsing agent using [helium](https://github.com/mherrmann/helium) (read more [here](https://github.com/huggingface/smolagents/blob/main/src/smolagents/vision_web_browser.py)).
@@ -201,7 +213,7 @@ Writing actions as code snippets is demonstrated to work better than the current
 
 Especially, since code execution can be a security concern (arbitrary code execution!), we provide options at runtime:
   - a secure python interpreter to run code more safely in your environment (more secure than raw code execution but still risky)
-  - a sandboxed environment using [E2B](https://e2b.dev/) (removes the risk to your own system).
+  - a sandboxed environment using [E2B](https://e2b.dev/) or Docker (removes the risk to your own system).
 
 On top of this [`CodeAgent`](https://huggingface.co/docs/smolagents/reference/agents#smolagents.CodeAgent) class, we still support the standard [`ToolCallingAgent`](https://huggingface.co/docs/smolagents/reference/agents#smolagents.ToolCallingAgent) that writes actions as JSON/text blobs. But we recommend always using `CodeAgent`.
 
@@ -216,14 +228,22 @@ By the way, why use a framework at all? Well, because a big part of this stuff i
 
 We've created [`CodeAgent`](https://huggingface.co/docs/smolagents/reference/agents#smolagents.CodeAgent) instances with some leading models, and compared them on [this benchmark](https://huggingface.co/datasets/m-ric/agents_medium_benchmark_2) that gathers questions from a few different benchmarks to propose a varied blend of challenges.
 
-[Find the benchmarking code here](https://github.com/huggingface/smolagents/blob/main/examples/benchmark.ipynb) for more detail on the agentic setup used, and see a comparison of using LLMs code agents compared to vanilla (spoilers: code agents works better).
+[Find the benchmarking code here](https://github.com/huggingface/smolagents/blob/main/examples/smolagents_benchmark/run.py) for more detail on the agentic setup used, and see a comparison of using LLMs code agents compared to vanilla (spoilers: code agents works better).
 
 <p align="center">
     <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/benchmark_code_agents.jpeg" alt="benchmark of different models on agentic workflows. Open model DeepSeek-R1 beats closed-source models." width=60% max-width=500px>
 </p>
 
 This comparison shows that open-source models can now take on the best closed models!
 
+## Security
+
+Security is a critical consideration when working with code-executing agents. Our library provides:
+- Sandboxed execution options using [E2B](https://e2b.dev/) or Docker
+- Best practices for running agent code securely
+
+For security policies, vulnerability reporting, and more information on secure agent execution, please see our [Security Policy](SECURITY.md).
+
 ## Contribute
 
 Everyone is welcome to contribute, get started with our [contribution guide](https://github.com/huggingface/smolagents/blob/main/CONTRIBUTING.md).

diff --git a/SECURITY.md b/SECURITY.md
@@ -0,0 +1,9 @@
+# Security Policy
+
+## Reporting a Vulnerability
+
+To report a security vulnerability, please contact: [email protected]
+
+## Learning More About Security
+
+To learn more about running agents more securely, please see the [Secure Code Execution tutorial](docs/source/en/tutorials/secure_code_execution.mdx) which covers sandboxing with E2B and Docker.
diff --git a/docs/README.md b/docs/README.md
@@ -121,10 +121,6 @@ Adding a new tutorial or section is done in two steps:
 
 Make sure to put your new file under the proper section. If you have a doubt, feel free to ask in a Github Issue or PR.
 
-### Translating
-
-When translating, refer to the guide at [./TRANSLATING.md](https://github.com/huggingface/smolagents/blob/main/docs/TRANSLATING.md).
-
 ### Writing source documentation
 
 Values that should be put in `code` should either be surrounded by backticks: \`like so\`. Note that argument names
@@ -271,4 +267,5 @@ is to be used in inference and also include the expected (ideally sensible)
 output.
 Often, readers will try out the example before even going through the function
 or class definitions. Therefore, it is of utmost importance that the example
-works as expected.
+works as expected.
+
diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml
@@ -13,7 +13,7 @@
   - local: tutorials/tools
     title: 🛠️ Tools - in-depth guide
   - local: tutorials/secure_code_execution
-    title: 🛡️ Secure your code execution with E2B
+    title: 🛡️ Secure code execution
   - local: tutorials/memory
     title: 📚 Manage your agent's memory
 - title: Conceptual guides
@@ -27,7 +27,7 @@
   - local: examples/text_to_sql
     title: Self-correcting Text-to-SQL
   - local: examples/rag
-    title: Master you knowledge base with agentic RAG
+    title: Master your knowledge base with agentic RAG
   - local: examples/multiagents
     title: Orchestrate a multi-agent system
   - local: examples/web_browser